From 8421f23975ff6155c027dc38d87533a3c14b9acf Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 28 Jan 2024 22:56:47 +0100
Subject: [PATCH 001/392] #56257 - add failing test and new setting for parsing
 TSV files with crlf

---
 src/Core/Settings.h                           |  1 +
 src/Formats/FormatFactory.cpp                 |  1 +
 src/Formats/FormatSettings.h                  |  1 +
 .../02973_parse_crlf_with_tsv_files.sh        | 23 +++++++++++++++++++
 4 files changed, 26 insertions(+)
 create mode 100755 tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e0b3ca39899..a62380ad926 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1012,6 +1012,7 @@ class IColumn;
     M(Bool, input_format_csv_skip_trailing_empty_lines, false, "Skip trailing empty lines in CSV format", 0) \
     M(Bool, input_format_tsv_skip_trailing_empty_lines, false, "Skip trailing empty lines in TSV format", 0) \
     M(Bool, input_format_custom_skip_trailing_empty_lines, false, "Skip trailing empty lines in CustomSeparated format", 0) \
+    M(Bool, input_format_tsv_crlf_end_of_line, false, "If it is set true, file function will read TSV format with \\r\\n instead of \\n.", 0) \
     \
     M(Bool, input_format_native_allow_types_conversion, true, "Allow data types conversion in Native input format", 0) \
     \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 62cbadec4f4..a4a6e1ab83a 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -175,6 +175,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header;
     format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
     format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
+    format_settings.tsv.crlf_end_of_line_input = settings.input_format_tsv_crlf_end_of_line;
     format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
     format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
     format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 30e4dd04513..7231e10a763 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -351,6 +351,7 @@ struct FormatSettings
         bool try_detect_header = true;
         bool skip_trailing_empty_lines = false;
         bool allow_variable_number_of_columns = false;
+        bool crlf_end_of_line_input = false;
     } tsv;
 
     struct
diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
new file mode 100755
index 00000000000..6f7308e18a4
--- /dev/null
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+# Test setup
+touch ${USER_FILES_PATH:?}/02973_data_without_crlf.tsv
+touch ${USER_FILES_PATH:?}/02973_data_with_crlf.tsv
+echo -e 'Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t34\n1971-72_Utah_Stars_season\t2016-10-01\t1' > "$USER_FILES_PATH/02973_data_without_crlf.tsv"
+echo -e 'Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r' > "$USER_FILES_PATH/02973_data_with_crlf.tsv"
+
+$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_without_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
+$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{clientError 117}"
+
+# Change setting to escape \r 
+$CLICKHOUSE_CLIENT --multiquery "SELECT * FROM file(02973_data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');"
+
+# Test teardown 
+rm "$USER_FILES_PATH/02973_data_without_crlf.tsv" 
+rm "$USER_FILES_PATH/02973_data_with_crlf.tsv"

From 7ae202376f29e56a9dc82ad911155ab451c0317b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 31 Jan 2024 21:03:06 +0100
Subject: [PATCH 002/392] missed place for documentation change

---
 docs/en/operations/settings/settings-formats.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index eb09af44efd..e5c555af018 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -823,7 +823,13 @@ Default value: `0`.
 
 ### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line}
 
-Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF).
+Use DOS/Windows-style line separator (CRLF) in TSV instead of Unix style (LF).
+
+Disabled by default.
+
+### input_format_tsv_crlf_end_of_line {#input_format_tsv_crlf_end_of_line}
+
+Use DOS/Windows-style line separator (CRLF) for TSV input files instead of Unix style (LF).
 
 Disabled by default.
 

From 31416bc4885a5d6302e8e59235921cc018b121b4 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 31 Jan 2024 21:03:47 +0100
Subject: [PATCH 003/392] 2 tests fail - not sure if related to changes, try
 again


From ab384f86527641a6a9c28179fe995e957072e157 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Sun, 4 Feb 2024 15:29:57 +0100
Subject: [PATCH 004/392] add support_crlf for TSV format

---
 .../SerializationFixedString.cpp              |  8 +++-
 .../Serializations/SerializationNullable.cpp  |  5 +-
 .../Serializations/SerializationString.cpp    |  8 +++-
 src/Formats/EscapingRuleUtils.cpp             |  4 +-
 src/IO/ReadHelpers.cpp                        | 46 +++++++++++++++----
 src/IO/ReadHelpers.h                          |  6 ++-
 .../Formats/Impl/TSKVRowInputFormat.cpp       |  2 +-
 .../Impl/TabSeparatedRowInputFormat.cpp       | 11 +++--
 8 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationFixedString.cpp b/src/DataTypes/Serializations/SerializationFixedString.cpp
index fa50af52f2f..cf731409fd0 100644
--- a/src/DataTypes/Serializations/SerializationFixedString.cpp
+++ b/src/DataTypes/Serializations/SerializationFixedString.cpp
@@ -151,9 +151,13 @@ static inline void read(const SerializationFixedString & self, IColumn & column,
 }
 
 
-void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void SerializationFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
+    read(*this, column, [&istr, &settings](ColumnFixedString::Chars & data)
+    { 
+        settings.tsv.crlf_end_of_line_input ? readEscapedStringInto<ColumnFixedString::Chars,true>(data, istr) 
+                                            : readEscapedStringInto<ColumnFixedString::Chars,false>(data, istr); 
+    });
 }
 
 
diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 4b0ad0b54ba..c0fbdfbb022 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -290,6 +290,7 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
                                                     const SerializationPtr & nested_serialization)
 {
     const String & null_representation = settings.tsv.null_representation;
+    const bool supports_crlf = settings.tsv.crlf_end_of_line_input;
 
     /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok.
     if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
@@ -309,10 +310,10 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
     /// Check if we have enough data in buffer to check if it's a null.
     if (istr.available() > null_representation.size())
     {
-        auto check_for_null = [&istr, &null_representation]()
+        auto check_for_null = [&istr, &null_representation, &supports_crlf]()
         {
             auto * pos = istr.position();
-            if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n'))
+            if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n' || (supports_crlf && *istr.position() == '\r')))
                 return true;
             istr.position() = pos;
             return false;
diff --git a/src/DataTypes/Serializations/SerializationString.cpp b/src/DataTypes/Serializations/SerializationString.cpp
index b2b083fd466..4ff0ba9a400 100644
--- a/src/DataTypes/Serializations/SerializationString.cpp
+++ b/src/DataTypes/Serializations/SerializationString.cpp
@@ -301,9 +301,13 @@ void SerializationString::deserializeWholeText(IColumn & column, ReadBuffer & is
 }
 
 
-void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void SerializationString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    read(column, [&](ColumnString::Chars & data) { readEscapedStringInto(data, istr); });
+    read(column, [&](ColumnString::Chars & data)
+    { 
+        settings.tsv.crlf_end_of_line_input ? readEscapedStringInto<PaddedPODArray<UInt8>,true>(data, istr) 
+                                            : readEscapedStringInto<PaddedPODArray<UInt8>,false>(data, istr);
+    });
 }
 
 
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index a7e9fb8e99f..481696edc49 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -76,7 +76,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca
             /// Empty field, just skip spaces
             break;
         case FormatSettings::EscapingRule::Escaped:
-            readEscapedStringInto(out, buf);
+            readEscapedStringInto<NullOutput,false>(out, buf);
             break;
         case FormatSettings::EscapingRule::Quoted:
             readQuotedFieldInto(out, buf);
@@ -236,7 +236,7 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin
             if constexpr (read_string)
                 readEscapedString(result, buf);
             else
-                readTSVField(result, buf);
+                readTSVField<false>(result, buf);
             break;
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read value with {} escaping rule", escapingRuleToString(escaping_rule));
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 05d35a57b12..90168325d99 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -496,13 +496,19 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf)
 }
 
 
-template <typename Vector, bool parse_complex_escape_sequence>
+template <typename Vector, bool parse_complex_escape_sequence, bool support_crlf>
 void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
 {
     while (!buf.eof())
     {
-        char * next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end());
-
+        char * next_pos;
+        if constexpr (support_crlf)
+        {
+            next_pos = find_first_symbols<'\t', '\n', '\\','\r'>(buf.position(), buf.buffer().end());
+        } else {
+            next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end());
+        }
+        
         appendToStringOrVector(s, buf, next_pos);
         buf.position() = next_pos;
 
@@ -529,25 +535,41 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
                 }
             }
         }
+
+        if (*buf.position() == '\r')
+        {
+            ++buf.position(); // advance to \n after \r   
+        }
     }
 }
 
-template <typename Vector>
+template <typename Vector, bool support_crlf>
 void readEscapedStringInto(Vector & s, ReadBuffer & buf)
 {
-    readEscapedStringIntoImpl<Vector, true>(s, buf);
+    readEscapedStringIntoImpl<Vector, true, support_crlf>(s, buf);
 }
 
 
 void readEscapedString(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readEscapedStringInto(s, buf);
+    readEscapedStringInto<String,false>(s, buf);
 }
 
-template void readEscapedStringInto<PaddedPODArray<UInt8>>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
-template void readEscapedStringInto<NullOutput>(NullOutput & s, ReadBuffer & buf);
+template<bool support_crlf>
+void readEscapedStringCRLF(String & s, ReadBuffer & buf)
+{
+    s.clear();
+    readEscapedStringInto<String,support_crlf>(s, buf);
+}
 
+template void readEscapedStringInto<PaddedPODArray<UInt8>,false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
+template void readEscapedStringInto<NullOutput,false>(NullOutput & s, ReadBuffer & buf);
+template void readEscapedStringInto<PaddedPODArray<UInt8>,true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
+template void readEscapedStringInto<NullOutput,true>(NullOutput & s, ReadBuffer & buf);
+
+template void readEscapedStringCRLF<true>(String & s, ReadBuffer & buf);
+template void readEscapedStringCRLF<false>(String & s, ReadBuffer & buf);
 
 /** If enable_sql_style_quoting == true,
   *  strings like 'abc''def' will be parsed as abc'def.
@@ -1761,10 +1783,16 @@ void readJSONField(String & s, ReadBuffer & buf)
     readParsedValueInto(s, buf, parse_func);
 }
 
+template<bool support_crlf>
 void readTSVField(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readEscapedStringIntoImpl<String, false>(s, buf);
+    readEscapedStringIntoImpl<String, false, support_crlf>(s, buf);
 }
 
+template void readTSVField<true>(String & s, ReadBuffer & buf);
+template void readTSVField<false>(String & s, ReadBuffer & buf);
+
 }
+
+
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 85584d63ee8..5ee56201035 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -548,6 +548,9 @@ void readString(String & s, ReadBuffer & buf);
 
 void readEscapedString(String & s, ReadBuffer & buf);
 
+template<bool support_crlf>
+void readEscapedStringCRLF(String & s, ReadBuffer & buf);
+
 void readQuotedString(String & s, ReadBuffer & buf);
 void readQuotedStringWithSQLStyle(String & s, ReadBuffer & buf);
 
@@ -601,7 +604,7 @@ void readStringInto(Vector & s, ReadBuffer & buf);
 template <typename Vector>
 void readNullTerminated(Vector & s, ReadBuffer & buf);
 
-template <typename Vector>
+template <typename Vector, bool support_crlf>
 void readEscapedStringInto(Vector & s, ReadBuffer & buf);
 
 template <bool enable_sql_style_quoting, typename Vector>
@@ -1757,6 +1760,7 @@ void readQuotedField(String & s, ReadBuffer & buf);
 
 void readJSONField(String & s, ReadBuffer & buf);
 
+template<bool support_crlf>
 void readTSVField(String & s, ReadBuffer & buf);
 
 /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters).
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index 432e944a246..d59b5cdd2d0 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -134,7 +134,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
 
                     /// If the key is not found, skip the value.
                     NullOutput sink;
-                    readEscapedStringInto(sink, *in);
+                    readEscapedStringInto<NullOutput,false>(sink, *in);
                 }
                 else
                 {
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 6f6dae334e5..afd91e913d2 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -10,6 +10,7 @@
 #include <Formats/verbosePrintString.h>
 #include <Formats/EscapingRuleUtils.h>
 #include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
+#include "Formats/FormatSettings.h"
 
 namespace DB
 {
@@ -105,14 +106,17 @@ template <bool read_string>
 String TabSeparatedFormatReader::readFieldIntoString()
 {
     String field;
+    bool support_crlf = format_settings.tsv.crlf_end_of_line_input;
     if (is_raw)
         readString(field, *buf);
     else
     {
         if constexpr (read_string)
-            readEscapedString(field, *buf);
+            support_crlf ? readEscapedStringCRLF<true>(field, *buf)
+                         : readEscapedStringCRLF<false>(field, *buf);
         else
-            readTSVField(field, *buf);
+            support_crlf ? readTSVField<true>(field, *buf)
+                         : readTSVField<false>(field, *buf);
     }
     return field;
 }
@@ -123,7 +127,8 @@ void TabSeparatedFormatReader::skipField()
     if (is_raw)
         readStringInto(out, *buf);
     else
-        readEscapedStringInto(out, *buf);
+        format_settings.tsv.crlf_end_of_line_input ? readEscapedStringInto<NullOutput,true>(out, *buf)
+                                                   : readEscapedStringInto<NullOutput,false>(out, *buf);
 }
 
 void TabSeparatedFormatReader::skipHeaderRow()

From a12d8d749dc660da64c34188cff4dbc2d33946a8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 6 Feb 2024 17:17:24 +0100
Subject: [PATCH 005/392] modify skipRowEndDelimiter for \r

---
 .../Serializations/SerializationNullable.cpp          |  5 ++---
 .../Formats/Impl/TabSeparatedRowInputFormat.cpp       | 11 ++++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index c0fbdfbb022..4b0ad0b54ba 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -290,7 +290,6 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
                                                     const SerializationPtr & nested_serialization)
 {
     const String & null_representation = settings.tsv.null_representation;
-    const bool supports_crlf = settings.tsv.crlf_end_of_line_input;
 
     /// Some data types can deserialize absence of data (e.g. empty string), so eof is ok.
     if (istr.eof() || (!null_representation.empty() && *istr.position() != null_representation[0]))
@@ -310,10 +309,10 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
     /// Check if we have enough data in buffer to check if it's a null.
     if (istr.available() > null_representation.size())
     {
-        auto check_for_null = [&istr, &null_representation, &supports_crlf]()
+        auto check_for_null = [&istr, &null_representation]()
         {
             auto * pos = istr.position();
-            if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n' || (supports_crlf && *istr.position() == '\r')))
+            if (checkString(null_representation, istr) && (*istr.position() == '\t' || *istr.position() == '\n'))
                 return true;
             istr.position() = pos;
             return false;
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index afd91e913d2..5a94a505984 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -23,9 +23,13 @@ namespace ErrorCodes
 
 /** Check for a common error case - usage of Windows line feed.
   */
+template<bool supports_crlf>
 static void checkForCarriageReturn(ReadBuffer & in)
 {
-    if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r')))
+    bool crlf_escaped = false;
+    if constexpr (supports_crlf)
+        crlf_escaped = true;
+    if (!in.eof() && (in.position()[0] == '\r' || (crlf_escaped ? false : (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))))
         throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
             "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
             " You must transform your file to Unix format."
@@ -90,12 +94,13 @@ void TabSeparatedFormatReader::skipFieldDelimiter()
 
 void TabSeparatedFormatReader::skipRowEndDelimiter()
 {
+    bool supports_crfl = format_settings.tsv.crlf_end_of_line_input;
     if (buf->eof())
         return;
 
     if (unlikely(first_row))
-    {
-        checkForCarriageReturn(*buf);
+    {   
+        supports_crfl ? checkForCarriageReturn<true>(*buf) : checkForCarriageReturn<false>(*buf);
         first_row = false;
     }
 

From a2dfc4856712ad8003eef902d33bafb3f47cc6aa Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 8 Feb 2024 07:41:50 +0100
Subject: [PATCH 006/392] change typo crfl to crlf in skipRowEndDelimiter
 function

---
 src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 3f18aad3bd1..dbd939effe1 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -95,16 +95,16 @@ void TabSeparatedFormatReader::skipFieldDelimiter()
 
 void TabSeparatedFormatReader::skipRowEndDelimiter()
 {
-    bool supports_crfl = format_settings.tsv.crlf_end_of_line_input;
+    bool supports_crlf = format_settings.tsv.crlf_end_of_line_input;
     if (buf->eof())
         return;
-    if (supports_crfl && first_row==false)
+    if (supports_crlf && first_row==false)
     {
         ++buf->position();
     }
     if (unlikely(first_row))
     {
-        supports_crfl ? checkForCarriageReturn<true>(*buf) : checkForCarriageReturn<false>(*buf);
+        supports_crlf ? checkForCarriageReturn<true>(*buf) : checkForCarriageReturn<false>(*buf);
         first_row = false;
     }
     assertChar('\n', *buf);

From 04abd62288a55a0d6b3a315e08a6410a39e70199 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 8 Feb 2024 07:43:41 +0100
Subject: [PATCH 007/392] rename reference file to fix typo of crfl to crlf

---
 ..._files.reference => 02973_parse_crlf_with_tsv_files.reference} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/queries/0_stateless/{02973_parse_crfl_with_tsv_files.reference => 02973_parse_crlf_with_tsv_files.reference} (100%)

diff --git a/tests/queries/0_stateless/02973_parse_crfl_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
similarity index 100%
rename from tests/queries/0_stateless/02973_parse_crfl_with_tsv_files.reference
rename to tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference

From d53632d61ea85040572c7f4f449e48b54737090d Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 8 Feb 2024 07:50:13 +0100
Subject: [PATCH 008/392] update SettingsChangesHistory

---
 src/Core/SettingsChangesHistory.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index db3a76e29cd..8b918c1c064 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -90,7 +90,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
               {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
               {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
-              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}}},
+              {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
+              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}}},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
               {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"},

From debc804b777ec8c0355b29d9f325defd461e5e63 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 8 Feb 2024 08:10:53 +0100
Subject: [PATCH 009/392] documentation changes

---
 docs/en/interfaces/formats.md | 1 +
 docs/ru/interfaces/formats.md | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index a11c3e5ef19..0a5a9c6a076 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -195,6 +195,7 @@ SELECT * FROM nestedt FORMAT TSV
 - [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`.
 - [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`.
 - [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`.
+- [input_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV input format will be `\r\n` instead of `\n`. Default value - `false`.
 - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
 - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
 - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index b4794b02743..c4892c74515 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -119,6 +119,7 @@ Hello\nworld
 Hello\
 world
 ```
+`\n\r` (CRLF) поддерживается с помощью настройки `input_format_tsv_crlf_end_of_line`. 
 
 Второй вариант поддерживается, так как его использует MySQL при записи tab-separated дампа.
 

From 3cca8410385c216ced1c9366a8e8cda8503f3407 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 9 Feb 2024 18:55:21 +0100
Subject: [PATCH 010/392] Unite s3/hdfs/azure storage implementations into a
 single class on top of IObjectStorage

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |   26 +-
 src/Backups/BackupIO_AzureBlobStorage.h       |   46 +-
 .../registerBackupEngineAzureBlobStorage.cpp  |   18 +-
 src/CMakeLists.txt                            |    1 +
 .../AzureBlobStorage/AzureObjectStorage.cpp   |    8 +-
 ...jectStorageRemoteMetadataRestoreHelper.cpp |   14 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |    5 +-
 src/Disks/ObjectStorages/IObjectStorage_fwd.h |    3 +
 .../MetadataStorageFromPlainObjectStorage.cpp |    2 +-
 .../ObjectStorages/ObjectStorageIterator.cpp  |    2 +-
 .../ObjectStorages/ObjectStorageIterator.h    |   22 +-
 .../ObjectStorageIteratorAsync.cpp            |    4 +-
 .../ObjectStorageIteratorAsync.h              |    4 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |   26 +-
 .../copyAzureBlobStorageFile.h                |    3 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |    6 +-
 src/Server/TCPHandler.cpp                     |    2 +-
 .../DataLakes/DeltaLakeMetadataParser.cpp     |   87 +-
 .../DataLakes/DeltaLakeMetadataParser.h       |   10 +-
 src/Storages/DataLakes/HudiMetadataParser.cpp |  181 +-
 src/Storages/DataLakes/HudiMetadataParser.h   |   15 +-
 src/Storages/DataLakes/IStorageDataLake.h     |  144 +-
 .../DataLakes/Iceberg/IcebergMetadata.cpp     |   65 +-
 .../DataLakes/Iceberg/IcebergMetadata.h       |   27 +-
 .../DataLakes/Iceberg/StorageIceberg.cpp      |   79 -
 .../DataLakes/Iceberg/StorageIceberg.h        |  117 +-
 src/Storages/DataLakes/S3MetadataReader.cpp   |   86 -
 src/Storages/DataLakes/S3MetadataReader.h     |   25 -
 src/Storages/DataLakes/StorageDeltaLake.h     |    7 +-
 src/Storages/DataLakes/StorageHudi.h          |    7 +-
 src/Storages/DataLakes/registerDataLakes.cpp  |   38 +-
 src/Storages/HDFS/StorageHDFS.cpp             | 1117 ----------
 src/Storages/HDFS/StorageHDFS.h               |  179 --
 src/Storages/HDFS/StorageHDFSCluster.cpp      |   98 -
 src/Storages/HDFS/StorageHDFSCluster.h        |   56 -
 src/Storages/IStorage.h                       |    9 +-
 .../ObjectStorage/AzureConfiguration.cpp      |  451 ++++
 .../ObjectStorage/AzureConfiguration.h        |   54 +
 src/Storages/ObjectStorage/Configuration.h    |   55 +
 .../ObjectStorage/HDFSConfiguration.h         |   81 +
 .../ObjectStorage/ReadBufferIterator.h        |  197 ++
 .../ObjectStorage/ReadFromObjectStorage.h     |  105 +
 .../ObjectStorage/S3Configuration.cpp         |  491 +++++
 src/Storages/ObjectStorage/S3Configuration.h  |   46 +
 src/Storages/ObjectStorage/Settings.h         |   86 +
 .../ObjectStorage/StorageObjectStorage.cpp    |  303 +++
 .../ObjectStorage/StorageObjectStorage.h      |  116 +
 .../StorageObjectStorageCluster.cpp           |  107 +
 .../StorageObjectStorageCluster.h             |   72 +
 .../ObjectStorage/StorageObjectStorageSink.h  |  155 ++
 .../StorageObjectStorageSource.cpp            |  464 ++++
 .../StorageObjectStorageSource.h              |  217 ++
 .../registerStorageObjectStorage.cpp          |  166 ++
 src/Storages/ObjectStorageConfiguration.h     |    0
 src/Storages/S3Queue/S3QueueSource.cpp        |   85 +-
 src/Storages/S3Queue/S3QueueSource.h          |   42 +-
 src/Storages/S3Queue/S3QueueTableMetadata.cpp |    3 +-
 src/Storages/S3Queue/S3QueueTableMetadata.h   |    7 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |  101 +-
 src/Storages/S3Queue/StorageS3Queue.h         |   14 +-
 src/Storages/StorageAzureBlob.cpp             | 1478 -------------
 src/Storages/StorageAzureBlob.h               |  339 ---
 src/Storages/StorageAzureBlobCluster.cpp      |   89 -
 src/Storages/StorageAzureBlobCluster.h        |   56 -
 src/Storages/StorageS3.cpp                    | 1905 -----------------
 src/Storages/StorageS3.h                      |  399 ----
 src/Storages/StorageS3Cluster.cpp             |  103 -
 src/Storages/StorageS3Cluster.h               |   58 -
 .../StorageSystemSchemaInferenceCache.cpp     |    6 +-
 src/Storages/registerStorages.cpp             |   17 +-
 src/TableFunctions/ITableFunctionCluster.h    |    6 +-
 src/TableFunctions/ITableFunctionDataLake.h   |   22 +-
 .../TableFunctionAzureBlobStorage.cpp         |  323 ---
 .../TableFunctionAzureBlobStorage.h           |   80 -
 .../TableFunctionAzureBlobStorageCluster.cpp  |   85 -
 .../TableFunctionAzureBlobStorageCluster.h    |   55 -
 src/TableFunctions/TableFunctionDeltaLake.cpp |   24 +-
 src/TableFunctions/TableFunctionHDFS.cpp      |   54 -
 src/TableFunctions/TableFunctionHDFS.h        |   50 -
 .../TableFunctionHDFSCluster.cpp              |   61 -
 src/TableFunctions/TableFunctionHDFSCluster.h |   54 -
 src/TableFunctions/TableFunctionHudi.cpp      |   24 +-
 src/TableFunctions/TableFunctionIceberg.cpp   |    7 +-
 .../TableFunctionObjectStorage.cpp            |  224 ++
 .../TableFunctionObjectStorage.h              |  150 ++
 .../TableFunctionObjectStorageCluster.cpp     |  113 +
 .../TableFunctionObjectStorageCluster.h       |   91 +
 src/TableFunctions/TableFunctionS3.cpp        |  464 ----
 src/TableFunctions/TableFunctionS3.h          |   86 -
 src/TableFunctions/TableFunctionS3Cluster.cpp |   74 -
 src/TableFunctions/TableFunctionS3Cluster.h   |   64 -
 src/TableFunctions/registerTableFunctions.cpp |   23 +-
 src/TableFunctions/registerTableFunctions.h   |    9 +-
 .../test_storage_azure_blob_storage/test.py   |    8 +-
 94 files changed, 4403 insertions(+), 8155 deletions(-)
 delete mode 100644 src/Storages/DataLakes/S3MetadataReader.cpp
 delete mode 100644 src/Storages/DataLakes/S3MetadataReader.h
 delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp
 delete mode 100644 src/Storages/HDFS/StorageHDFS.h
 delete mode 100644 src/Storages/HDFS/StorageHDFSCluster.cpp
 delete mode 100644 src/Storages/HDFS/StorageHDFSCluster.h
 create mode 100644 src/Storages/ObjectStorage/AzureConfiguration.cpp
 create mode 100644 src/Storages/ObjectStorage/AzureConfiguration.h
 create mode 100644 src/Storages/ObjectStorage/Configuration.h
 create mode 100644 src/Storages/ObjectStorage/HDFSConfiguration.h
 create mode 100644 src/Storages/ObjectStorage/ReadBufferIterator.h
 create mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorage.h
 create mode 100644 src/Storages/ObjectStorage/S3Configuration.cpp
 create mode 100644 src/Storages/ObjectStorage/S3Configuration.h
 create mode 100644 src/Storages/ObjectStorage/Settings.h
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage.cpp
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage.h
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageCluster.h
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSink.h
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSource.h
 create mode 100644 src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
 create mode 100644 src/Storages/ObjectStorageConfiguration.h
 delete mode 100644 src/Storages/StorageAzureBlob.cpp
 delete mode 100644 src/Storages/StorageAzureBlob.h
 delete mode 100644 src/Storages/StorageAzureBlobCluster.cpp
 delete mode 100644 src/Storages/StorageAzureBlobCluster.h
 delete mode 100644 src/Storages/StorageS3.cpp
 delete mode 100644 src/Storages/StorageS3.h
 delete mode 100644 src/Storages/StorageS3Cluster.cpp
 delete mode 100644 src/Storages/StorageS3Cluster.h
 delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorage.cpp
 delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorage.h
 delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
 delete mode 100644 src/TableFunctions/TableFunctionAzureBlobStorageCluster.h
 delete mode 100644 src/TableFunctions/TableFunctionHDFS.cpp
 delete mode 100644 src/TableFunctions/TableFunctionHDFS.h
 delete mode 100644 src/TableFunctions/TableFunctionHDFSCluster.cpp
 delete mode 100644 src/TableFunctions/TableFunctionHDFSCluster.h
 create mode 100644 src/TableFunctions/TableFunctionObjectStorage.cpp
 create mode 100644 src/TableFunctions/TableFunctionObjectStorage.h
 create mode 100644 src/TableFunctions/TableFunctionObjectStorageCluster.cpp
 create mode 100644 src/TableFunctions/TableFunctionObjectStorageCluster.h
 delete mode 100644 src/TableFunctions/TableFunctionS3.cpp
 delete mode 100644 src/TableFunctions/TableFunctionS3.h
 delete mode 100644 src/TableFunctions/TableFunctionS3Cluster.cpp
 delete mode 100644 src/TableFunctions/TableFunctionS3Cluster.h

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 52ce20d5108..dc636f90be7 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -6,7 +6,6 @@
 #include <Interpreters/Context.h>
 #include <IO/SharedThreadPools.h>
 #include <IO/HTTPHeaderEntries.h>
-#include <Storages/StorageAzureBlobCluster.h>
 #include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
 #include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
 #include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
@@ -29,7 +28,7 @@ namespace ErrorCodes
 }
 
 BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
-    StorageAzureBlob::Configuration configuration_,
+    const StorageAzureBlobConfiguration & configuration_,
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
@@ -37,10 +36,10 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    auto client_ptr = configuration.createClient(/* is_read_only */ false);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          StorageAzureBlob::createSettings(context_),
+                                                          configuration.createSettings(context_),
                                                           configuration_.container);
     client = object_storage->getAzureBlobStorageClient();
     settings = object_storage->getSettings();
@@ -137,7 +136,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
 
 
 BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
-    StorageAzureBlob::Configuration configuration_,
+    const StorageAzureBlobConfiguration & configuration_,
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
@@ -145,17 +144,22 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
+    auto client_ptr = configuration.createClient(/* is_read_only */ false);
     object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
                                                           std::move(client_ptr),
-                                                          StorageAzureBlob::createSettings(context_),
-                                                          configuration_.container);
+                                                          configuration.createSettings(context_),
+                                                          configuration.container);
     client = object_storage->getAzureBlobStorageClient();
     settings = object_storage->getSettings();
 }
 
-void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
-                                      bool copy_encrypted, UInt64 start_pos, UInt64 length)
+void BackupWriterAzureBlobStorage::copyFileFromDisk(
+    const String & path_in_backup,
+    DiskPtr src_disk,
+    const String & src_path,
+    bool copy_encrypted,
+    UInt64 start_pos,
+    UInt64 length)
 {
     /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
     auto source_data_source_description = src_disk->getDataSourceDescription();
@@ -241,7 +245,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
     object_storage->listObjects(key,children,/*max_keys*/0);
     if (children.empty())
         throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
-    return children[0].metadata.size_bytes;
+    return children[0]->metadata.size_bytes;
 }
 
 std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 95325044a62..99002c53769 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -5,8 +5,8 @@
 #if USE_AZURE_BLOB_STORAGE
 #include <Backups/BackupIO_Default.h>
 #include <Disks/DiskType.h>
-#include <Storages/StorageAzureBlobCluster.h>
 #include <Interpreters/Context_fwd.h>
+#include <Storages/ObjectStorage/AzureConfiguration.h>
 
 
 namespace DB
@@ -16,20 +16,30 @@ namespace DB
 class BackupReaderAzureBlobStorage : public BackupReaderDefault
 {
 public:
-    BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+    BackupReaderAzureBlobStorage(
+        const StorageAzureBlobConfiguration & configuration_,
+        const ReadSettings & read_settings_,
+        const WriteSettings & write_settings_,
+        const ContextPtr & context_);
+
     ~BackupReaderAzureBlobStorage() override;
 
     bool fileExists(const String & file_name) override;
     UInt64 getFileSize(const String & file_name) override;
     std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
 
-    void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
-                        DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
+    void copyFileToDisk(
+        const String & path_in_backup,
+        size_t file_size,
+        bool encrypted_in_backup,
+        DiskPtr destination_disk,
+        const String & destination_path,
+        WriteMode write_mode) override;
 
 private:
     const DataSourceDescription data_source_description;
     std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
-    StorageAzureBlob::Configuration configuration;
+    StorageAzureBlobConfiguration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
@@ -37,16 +47,31 @@ private:
 class BackupWriterAzureBlobStorage : public BackupWriterDefault
 {
 public:
-    BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_);
+    BackupWriterAzureBlobStorage(
+        const StorageAzureBlobConfiguration & configuration_,
+        const ReadSettings & read_settings_,
+        const WriteSettings & write_settings_,
+        const ContextPtr & context_);
+
     ~BackupWriterAzureBlobStorage() override;
 
     bool fileExists(const String & file_name) override;
     UInt64 getFileSize(const String & file_name) override;
     std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
 
-    void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
-    void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
-                          bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
+    void copyDataToFile(
+        const String & path_in_backup,
+        const CreateReadBufferFunction & create_read_buffer,
+        UInt64 start_pos,
+        UInt64 length) override;
+
+    void copyFileFromDisk(
+        const String & path_in_backup,
+        DiskPtr src_disk,
+        const String & src_path,
+        bool copy_encrypted,
+        UInt64 start_pos,
+        UInt64 length) override;
 
     void copyFile(const String & destination, const String & source, size_t size) override;
 
@@ -56,9 +81,10 @@ public:
 private:
     std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
     void removeFilesBatch(const Strings & file_names);
+
     const DataSourceDescription data_source_description;
     std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
-    StorageAzureBlob::Configuration configuration;
+    StorageAzureBlobConfiguration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 48f66569304..9408c7ccdcf 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -5,11 +5,11 @@
 
 #if USE_AZURE_BLOB_STORAGE
 #include <Backups/BackupIO_AzureBlobStorage.h>
-#include <Storages/StorageAzureBlob.h>
 #include <Backups/BackupImpl.h>
 #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
+#include <Storages/ObjectStorage/AzureConfiguration.h>
 #include <filesystem>
 #endif
 
@@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
         const String & id_arg = params.backup_info.id_arg;
         const auto & args = params.backup_info.args;
 
-        StorageAzureBlob::Configuration configuration;
+        StorageAzureBlobConfiguration configuration;
 
         if (!id_arg.empty())
         {
@@ -59,6 +59,9 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
             if (!config.has(config_prefix))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
 
+            if (!config.has(config_prefix))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no `{}` in config", config_prefix);
+
             if (config.has(config_prefix + ".connection_string"))
             {
                 configuration.connection_url = config.getString(config_prefix + ".connection_string");
@@ -75,10 +78,11 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
             }
 
             if (args.size() > 1)
-                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
+                throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                                "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
 
             if (args.size() == 1)
-                configuration.blob_path = args[0].safeGet<String>();
+                configuration.setPath(args[0].safeGet<String>());
 
         }
         else
@@ -110,12 +114,14 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
         }
 
         BackupImpl::ArchiveParams archive_params;
-        if (hasRegisteredArchiveFileExtension(configuration.blob_path))
+        if (hasRegisteredArchiveFileExtension(configuration.getPath()))
         {
             if (params.is_internal_backup)
                 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
 
-            archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
+            auto path = configuration.getPath();
+            configuration.setPath(removeFileNameFromURL(path));
+            archive_params.archive_name = configuration.getPath();
             archive_params.compression_method = params.compression_method;
             archive_params.compression_level = params.compression_level;
             archive_params.password = params.password;
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 08913ed1b5a..50130e6abd0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -119,6 +119,7 @@ endif()
 
 add_headers_and_sources(dbms Storages/DataLakes)
 add_headers_and_sources(dbms Storages/DataLakes/Iceberg)
+add_headers_and_sources(dbms Storages/ObjectStorage)
 add_headers_and_sources(dbms Common/NamedCollections)
 
 if (TARGET ch_contrib::amqp_cpp)
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 74389aedb64..2ca44137442 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -65,14 +65,14 @@ private:
 
         for (const auto & blob : blobs_list)
         {
-            batch.emplace_back(
+            batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
                 blob.Name,
                 ObjectMetadata{
                     static_cast<uint64_t>(blob.BlobSize),
                     Poco::Timestamp::fromEpochTime(
                         std::chrono::duration_cast<std::chrono::seconds>(
                             static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
-                    {}});
+                    {}}));
         }
 
         if (!blob_list_response.NextPageToken.HasValue() || blob_list_response.NextPageToken.Value().empty())
@@ -156,14 +156,14 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
 
         for (const auto & blob : blobs_list)
         {
-            children.emplace_back(
+            children.emplace_back(std::make_shared<RelativePathWithMetadata>(
                 blob.Name,
                 ObjectMetadata{
                     static_cast<uint64_t>(blob.BlobSize),
                     Poco::Timestamp::fromEpochTime(
                         std::chrono::duration_cast<std::chrono::seconds>(
                             static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
-                    {}});
+                    {}}));
         }
 
         if (max_keys)
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index 0314e0a7e92..cc9ee3db505 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -363,18 +363,18 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
         for (const auto & object : objects)
         {
 
-            LOG_INFO(disk->log, "Calling restore for key for disk {}", object.relative_path);
+            LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path);
 
             /// Skip file operations objects. They will be processed separately.
-            if (object.relative_path.find("/operations/") != String::npos)
+            if (object->relative_path.find("/operations/") != String::npos)
                 continue;
 
-            const auto [revision, _] = extractRevisionAndOperationFromKey(object.relative_path);
+            const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path);
             /// Filter early if it's possible to get revision from key.
             if (revision > restore_information.revision)
                 continue;
 
-            keys_names.push_back(object.relative_path);
+            keys_names.push_back(object->relative_path);
         }
 
         if (!keys_names.empty())
@@ -474,10 +474,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
 
         for (const auto & object : objects)
         {
-            const auto [revision, operation] = extractRevisionAndOperationFromKey(object.relative_path);
+            const auto [revision, operation] = extractRevisionAndOperationFromKey(object->relative_path);
             if (revision == UNKNOWN_REVISION)
             {
-                LOG_WARNING(disk->log, "Skip key {} with unknown revision", object.relative_path);
+                LOG_WARNING(disk->log, "Skip key {} with unknown revision", object->relative_path);
                 continue;
             }
 
@@ -490,7 +490,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
             if (send_metadata)
                 revision_counter = revision - 1;
 
-            auto object_attributes = *(source_object_storage->getObjectMetadata(object.relative_path).attributes);
+            auto object_attributes = *(source_object_storage->getObjectMetadata(object->relative_path).attributes);
             if (operation == rename)
             {
                 auto from_path = object_attributes["from_path"];
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 049935ad60c..7d354e6383d 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -62,6 +62,8 @@ struct RelativePathWithMetadata
         : relative_path(std::move(relative_path_))
         , metadata(std::move(metadata_))
     {}
+
+    virtual ~RelativePathWithMetadata() = default;
 };
 
 struct ObjectKeyWithMetadata
@@ -77,7 +79,8 @@ struct ObjectKeyWithMetadata
     {}
 };
 
-using RelativePathsWithMetadata = std::vector<RelativePathWithMetadata>;
+using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
+using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
 using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
 
 class IObjectStorageIterator;
diff --git a/src/Disks/ObjectStorages/IObjectStorage_fwd.h b/src/Disks/ObjectStorages/IObjectStorage_fwd.h
index f6ebc883682..67efa4aae2b 100644
--- a/src/Disks/ObjectStorages/IObjectStorage_fwd.h
+++ b/src/Disks/ObjectStorages/IObjectStorage_fwd.h
@@ -10,4 +10,7 @@ using ObjectStoragePtr = std::shared_ptr<IObjectStorage>;
 class IMetadataStorage;
 using MetadataStoragePtr = std::shared_ptr<IMetadataStorage>;
 
+class IObjectStorageIterator;
+using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
+
 }
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
index b03809f5b39..f07cf23106f 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
@@ -77,7 +77,7 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
     std::vector<std::string> result;
     for (const auto & path_size : files)
     {
-        result.push_back(path_size.relative_path);
+        result.push_back(path_size->relative_path);
     }
 
     std::unordered_set<std::string> duplicates_filter;
diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp
index 72ec6e0e500..3d939ce9230 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIterator.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIterator.cpp
@@ -9,7 +9,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-RelativePathWithMetadata ObjectStorageIteratorFromList::current()
+RelativePathWithMetadataPtr ObjectStorageIteratorFromList::current()
 {
     if (!isValid())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h
index 841b0ea6664..e934fc2056d 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIterator.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h
@@ -12,9 +12,9 @@ public:
     virtual void next() = 0;
     virtual void nextBatch() = 0;
     virtual bool isValid() = 0;
-    virtual RelativePathWithMetadata current() = 0;
+    virtual RelativePathWithMetadataPtr current() = 0;
     virtual RelativePathsWithMetadata currentBatch() = 0;
-    virtual std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() = 0;
+    virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0;
     virtual size_t getAccumulatedSize() const = 0;
 
     virtual ~IObjectStorageIterator() = default;
@@ -47,22 +47,14 @@ public:
         return batch_iterator != batch.end();
     }
 
-    RelativePathWithMetadata current() override;
+    RelativePathWithMetadataPtr current() override;
 
-    RelativePathsWithMetadata currentBatch() override
-    {
-        return batch;
-    }
+    RelativePathsWithMetadata currentBatch() override { return batch; }
 
-    virtual std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override
-    {
-        return std::nullopt;
-    }
+    std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override { return std::nullopt; }
+
+    size_t getAccumulatedSize() const override { return batch.size(); }
 
-    size_t getAccumulatedSize() const override
-    {
-        return batch.size();
-    }
 private:
     RelativePathsWithMetadata batch;
     RelativePathsWithMetadata::iterator batch_iterator;
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index 990e66fc4e5..b7729623a64 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -82,7 +82,7 @@ bool IObjectStorageIteratorAsync::isValid()
     return current_batch_iterator != current_batch.end();
 }
 
-RelativePathWithMetadata IObjectStorageIteratorAsync::current()
+RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()
 {
     if (!isValid())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
@@ -101,7 +101,7 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch()
     return current_batch;
 }
 
-std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext()
+std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext()
 {
     std::lock_guard lock(mutex);
     if (!is_initialized)
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index a6abe03bac9..8d155f7ec8d 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -26,10 +26,10 @@ public:
     void next() override;
     void nextBatch() override;
     bool isValid() override;
-    RelativePathWithMetadata current() override;
+    RelativePathWithMetadataPtr current() override;
     RelativePathsWithMetadata currentBatch() override;
     size_t getAccumulatedSize() const override;
-    std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override;
+    std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override;
 
     ~IObjectStorageIteratorAsync() override
     {
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 4cc49288af6..cc138c43c71 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -120,25 +120,22 @@ private:
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
 
-        bool result = false;
         auto outcome = client->ListObjectsV2(request);
+
         /// Outcome failure will be handled on the caller side.
         if (outcome.IsSuccess())
         {
+            request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+
             auto objects = outcome.GetResult().GetContents();
-
-            result = !objects.empty();
-
             for (const auto & object : objects)
-                batch.emplace_back(
-                    object.GetKey(),
-                    ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}
-                );
+            {
+                ObjectMetadata metadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}};
+                batch.emplace_back(std::make_shared<RelativePathWithMetadata>(object.GetKey(), std::move(metadata)));
+            }
 
-            if (result)
-                request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
-
-            return result;
+            /// It returns false when all objects were returned
+            return outcome.GetResult().GetIsTruncated();
         }
 
         throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
@@ -249,7 +246,6 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
 
-
     auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
     if (blob_storage_log)
         blob_storage_log->local_path = object.local_path;
@@ -300,12 +296,12 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
             break;
 
         for (const auto & object : objects)
-            children.emplace_back(
+            children.emplace_back(std::make_shared<RelativePathWithMetadata>(
                 object.GetKey(),
                 ObjectMetadata{
                     static_cast<uint64_t>(object.GetSize()),
                     Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()),
-                    {}});
+                    {}}));
 
         if (max_keys)
         {
diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
index 83814f42693..cc23f604278 100644
--- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
+++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h
@@ -4,9 +4,8 @@
 
 #if USE_AZURE_BLOB_STORAGE
 
-#include <Storages/StorageAzureBlobCluster.h>
-#include <Storages/StorageAzureBlob.h>
 #include <Interpreters/threadPoolCallbackRunner.h>
+#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
 #include <base/types.h>
 #include <functional>
 #include <memory>
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 9a80553f149..d697d90c8a6 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -52,11 +52,9 @@
 #include <Storages/Freeze.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageFile.h>
-#include <Storages/StorageS3.h>
 #include <Storages/StorageURL.h>
-#include <Storages/StorageAzureBlob.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/MaterializedView/RefreshTask.h>
-#include <Storages/HDFS/StorageHDFS.h>
 #include <Storages/System/StorageSystemFilesystemCache.h>
 #include <Parsers/ASTSystemQuery.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -482,7 +480,7 @@ BlockIO InterpreterSystemQuery::execute()
                 StorageURL::getSchemaCache(getContext()).clear();
 #if USE_AZURE_BLOB_STORAGE
             if (caches_to_drop.contains("AZURE"))
-                StorageAzureBlob::getSchemaCache(getContext()).clear();
+                StorageAzureBlobStorage::getSchemaCache(getContext()).clear();
 #endif
             break;
         }
diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp
index e1086ac5833..58672a72563 100644
--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@@ -35,7 +35,7 @@
 #include <Server/TCPServer.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/MergeTree/MergeTreeDataPartUUID.h>
-#include <Storages/StorageS3Cluster.h>
+#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
 #include <Core/ExternalTable.h>
 #include <Core/ServerSettings.h>
 #include <Access/AccessControl.h>
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
index 3584f137225..55ff8fefdd5 100644
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
+++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
@@ -4,8 +4,6 @@
 #include <set>
 
 #if USE_AWS_S3 && USE_PARQUET
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <Storages/StorageS3.h>
 #include <parquet/file_reader.h>
 #include <Processors/Formats/Impl/ArrowBufferedStreams.h>
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
@@ -13,10 +11,10 @@
 #include <Formats/FormatFactory.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnNullable.h>
+#include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <parquet/arrow/reader.h>
-#include <ranges>
 
 namespace fs = std::filesystem;
 
@@ -29,8 +27,7 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-template <typename Configuration, typename MetadataReadHelper>
-struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
+struct DeltaLakeMetadataParser::Impl
 {
     /**
      * Useful links:
@@ -65,10 +62,13 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
      * An action changes one aspect of the table's state, for example, adding or removing a file.
      * Note: it is not a valid json, but a list of json's, so we read it in a while cycle.
      */
-    std::set<String> processMetadataFiles(const Configuration & configuration, ContextPtr context)
+    std::set<String> processMetadataFiles(
+        ObjectStoragePtr object_storage,
+        const StorageObjectStorageConfiguration & configuration,
+        ContextPtr context)
     {
         std::set<String> result_files;
-        const auto checkpoint_version = getCheckpointIfExists(result_files, configuration, context);
+        const auto checkpoint_version = getCheckpointIfExists(result_files, object_storage, configuration, context);
 
         if (checkpoint_version)
         {
@@ -78,10 +78,10 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
                 const auto filename = withPadding(++current_version) + metadata_file_suffix;
                 const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename;
 
-                if (!MetadataReadHelper::exists(file_path, configuration))
+                if (!object_storage->exists(StoredObject(file_path)))
                     break;
 
-                processMetadataFile(file_path, result_files, configuration, context);
+                processMetadataFile(file_path, result_files, object_storage, configuration, context);
             }
 
             LOG_TRACE(
@@ -90,16 +90,33 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
         }
         else
         {
-            const auto keys = MetadataReadHelper::listFiles(
-                configuration, deltalake_metadata_directory, metadata_file_suffix);
-
+            const auto keys = listFiles(object_storage, configuration, deltalake_metadata_directory, metadata_file_suffix);
             for (const String & key : keys)
-                processMetadataFile(key, result_files, configuration, context);
+                processMetadataFile(key, result_files, object_storage, configuration, context);
         }
 
         return result_files;
     }
 
+    std::vector<String> listFiles(
+        const ObjectStoragePtr & object_storage,
+        const StorageObjectStorageConfiguration & configuration,
+        const String & prefix, const String & suffix)
+    {
+        auto key = std::filesystem::path(configuration.getPath()) / prefix;
+        RelativePathsWithMetadata files_with_metadata;
+        object_storage->listObjects(key, files_with_metadata, 0);
+        Strings res;
+        for (const auto & file_with_metadata : files_with_metadata)
+        {
+            const auto & filename = file_with_metadata->relative_path;
+            if (filename.ends_with(suffix))
+                res.push_back(filename);
+        }
+        LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
+        return res;
+    }
+
     /**
      * Example of content of a single .json metadata file:
      * "
@@ -132,10 +149,12 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
     void processMetadataFile(
         const String & key,
         std::set<String> & result,
-        const Configuration & configuration,
+        ObjectStoragePtr object_storage,
+        const StorageObjectStorageConfiguration & configuration,
         ContextPtr context)
     {
-        auto buf = MetadataReadHelper::createReadBuffer(key, context, configuration);
+        auto read_settings = context->getReadSettings();
+        auto buf = object_storage->readObject(StoredObject(key), read_settings);
 
         char c;
         while (!buf->eof())
@@ -180,14 +199,18 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
      *
      *  We need to get "version", which is the version of the checkpoint we need to read.
      */
-    size_t readLastCheckpointIfExists(const Configuration & configuration, ContextPtr context)
+    size_t readLastCheckpointIfExists(
+        ObjectStoragePtr object_storage,
+        const StorageObjectStorageConfiguration & configuration,
+        ContextPtr context) const
     {
         const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint";
-        if (!MetadataReadHelper::exists(last_checkpoint_file, configuration))
+        if (!object_storage->exists(StoredObject(last_checkpoint_file)))
             return 0;
 
         String json_str;
-        auto buf = MetadataReadHelper::createReadBuffer(last_checkpoint_file, context, configuration);
+        auto read_settings = context->getReadSettings();
+        auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings);
         readJSONObjectPossiblyInvalid(json_str, *buf);
 
         const JSON json(json_str);
@@ -237,9 +260,13 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \
         } while (false)
 
-    size_t getCheckpointIfExists(std::set<String> & result, const Configuration & configuration, ContextPtr context)
+    size_t getCheckpointIfExists(
+        std::set<String> & result,
+        ObjectStoragePtr object_storage,
+        const StorageObjectStorageConfiguration & configuration,
+        ContextPtr context)
     {
-        const auto version = readLastCheckpointIfExists(configuration, context);
+        const auto version = readLastCheckpointIfExists(object_storage, configuration, context);
         if (!version)
             return 0;
 
@@ -248,7 +275,8 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
 
         LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
 
-        auto buf = MetadataReadHelper::createReadBuffer(checkpoint_path, context, configuration);
+        auto read_settings = context->getReadSettings();
+        auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings);
         auto format_settings = getFormatSettings(context);
 
         /// Force nullable, because this parquet file for some reason does not have nullable
@@ -317,22 +345,17 @@ struct DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::Impl
     LoggerPtr log = getLogger("DeltaLakeMetadataParser");
 };
 
+DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique<Impl>()) {}
 
-template <typename Configuration, typename MetadataReadHelper>
-DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::DeltaLakeMetadataParser() : impl(std::make_unique<Impl>())
+Strings DeltaLakeMetadataParser::getFiles(
+        ObjectStoragePtr object_storage,
+        StorageObjectStorageConfigurationPtr configuration,
+        ContextPtr context)
 {
-}
-
-template <typename Configuration, typename MetadataReadHelper>
-Strings DeltaLakeMetadataParser<Configuration, MetadataReadHelper>::getFiles(const Configuration & configuration, ContextPtr context)
-{
-    auto result = impl->processMetadataFiles(configuration, context);
+    auto result = impl->processMetadataFiles(object_storage, *configuration, context);
     return Strings(result.begin(), result.end());
 }
 
-template DeltaLakeMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::DeltaLakeMetadataParser();
-template Strings DeltaLakeMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::getFiles(
-    const StorageS3::Configuration & configuration, ContextPtr);
 }
 
 #endif
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h
index df7276b90b4..f94024597d6 100644
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h
+++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h
@@ -2,17 +2,21 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/ObjectStorage/Configuration.h>
 
 namespace DB
 {
 
-template <typename Configuration, typename MetadataReadHelper>
 struct DeltaLakeMetadataParser
 {
 public:
-    DeltaLakeMetadataParser<Configuration, MetadataReadHelper>();
+    DeltaLakeMetadataParser();
 
-    Strings getFiles(const Configuration & configuration, ContextPtr context);
+    Strings getFiles(
+        ObjectStoragePtr object_storage,
+        StorageObjectStorageConfigurationPtr configuration,
+        ContextPtr context);
 
 private:
     struct Impl;
diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/DataLakes/HudiMetadataParser.cpp
index 699dfe8fda0..8571c035b32 100644
--- a/src/Storages/DataLakes/HudiMetadataParser.cpp
+++ b/src/Storages/DataLakes/HudiMetadataParser.cpp
@@ -1,16 +1,11 @@
 #include <Storages/DataLakes/HudiMetadataParser.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/logger_useful.h>
-#include <ranges>
 #include <base/find_symbols.h>
 #include <Poco/String.h>
 #include "config.h"
-#include <filesystem>
 #include <IO/ReadHelpers.h>
 
-#if USE_AWS_S3
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <Storages/StorageS3.h>
-
 namespace DB
 {
 
@@ -19,98 +14,98 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-template <typename Configuration, typename MetadataReadHelper>
-struct HudiMetadataParser<Configuration, MetadataReadHelper>::Impl
-{
-    /**
-     * Useful links:
-     * - https://hudi.apache.org/tech-specs/
-     * - https://hudi.apache.org/docs/file_layouts/
-     */
+/**
+    * Useful links:
+    * - https://hudi.apache.org/tech-specs/
+    * - https://hudi.apache.org/docs/file_layouts/
+    */
 
-    /**
-      * Hudi tables store metadata files and data files.
-      * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg,
-      * metadata is not required in order to understand which files we need to read, moreover,
-      * for Hudi metadata does not always exist.
-      *
-      * There can be two types of data files
-      * 1. base files (columnar file formats like Apache Parquet/Orc)
-      * 2. log files
-      * Currently we support reading only `base files`.
-      * Data file name format:
-      * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension]
-      *
-      * To find needed parts we need to find out latest part file for every file group for every partition.
-      * Explanation why:
-      *    Hudi reads in and overwrites the entire table/partition with each update.
-      *    Hudi controls the number of file groups under a single partition according to the
-      *    hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group.
-      *    Each file group is identified by File Id.
-      */
-    Strings processMetadataFiles(const Configuration & configuration)
+/**
+    * Hudi tables store metadata files and data files.
+    * Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg,
+    * metadata is not required in order to understand which files we need to read, moreover,
+    * for Hudi metadata does not always exist.
+    *
+    * There can be two types of data files
+    * 1. base files (columnar file formats like Apache Parquet/Orc)
+    * 2. log files
+    * Currently we support reading only `base files`.
+    * Data file name format:
+    * [File Id]_[File Write Token]_[Transaction timestamp].[File Extension]
+    *
+    * To find needed parts we need to find out latest part file for every file group for every partition.
+    * Explanation why:
+    *    Hudi reads in and overwrites the entire table/partition with each update.
+    *    Hudi controls the number of file groups under a single partition according to the
+    *    hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group.
+    *    Each file group is identified by File Id.
+    */
+std::vector<String> listFiles(
+    const ObjectStoragePtr & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const String & prefix, const String & suffix)
+{
+    auto key = std::filesystem::path(configuration.getPath()) / prefix;
+    RelativePathsWithMetadata files_with_metadata;
+    object_storage->listObjects(key, files_with_metadata, 0);
+    Strings res;
+    for (const auto & file_with_metadata : files_with_metadata)
     {
-        auto log = getLogger("HudiMetadataParser");
-
-        const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format));
-
-        using Partition = std::string;
-        using FileID = std::string;
-        struct FileInfo
-        {
-            String key;
-            UInt64 timestamp = 0;
-        };
-        std::unordered_map<Partition, std::unordered_map<FileID, FileInfo>> data_files;
-
-        for (const auto & key : keys)
-        {
-            auto key_file = std::filesystem::path(key);
-            Strings file_parts;
-            const String stem = key_file.stem();
-            splitInto<'_'>(file_parts, stem);
-            if (file_parts.size() != 3)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key);
-
-            const auto partition = key_file.parent_path().stem();
-            const auto & file_id = file_parts[0];
-            const auto timestamp = parse<UInt64>(file_parts[2]);
-
-            auto & file_info = data_files[partition][file_id];
-            if (file_info.timestamp == 0 || file_info.timestamp < timestamp)
-            {
-                file_info.key = std::move(key);
-                file_info.timestamp = timestamp;
-            }
-        }
-
-        Strings result;
-        for (auto & [partition, partition_data] : data_files)
-        {
-            LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size());
-            for (auto & [file_id, file_data] : partition_data)
-                result.push_back(std::move(file_data.key));
-        }
-        return result;
+        const auto & filename = file_with_metadata->relative_path;
+        if (filename.ends_with(suffix))
+            res.push_back(filename);
     }
-};
+    LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
+    return res;
+}
 
-
-template <typename Configuration, typename MetadataReadHelper>
-HudiMetadataParser<Configuration, MetadataReadHelper>::HudiMetadataParser() : impl(std::make_unique<Impl>())
+Strings HudiMetadataParser::getFiles(
+    ObjectStoragePtr object_storage,
+    StorageObjectStorageConfigurationPtr configuration,
+    ContextPtr)
 {
+    auto log = getLogger("HudiMetadataParser");
+
+    const auto keys = listFiles(object_storage, *configuration, "", Poco::toLower(configuration->format));
+
+    using Partition = std::string;
+    using FileID = std::string;
+    struct FileInfo
+    {
+        String key;
+        UInt64 timestamp = 0;
+    };
+    std::unordered_map<Partition, std::unordered_map<FileID, FileInfo>> data_files;
+
+    for (const auto & key : keys)
+    {
+        auto key_file = std::filesystem::path(key);
+        Strings file_parts;
+        const String stem = key_file.stem();
+        splitInto<'_'>(file_parts, stem);
+        if (file_parts.size() != 3)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key);
+
+        const auto partition = key_file.parent_path().stem();
+        const auto & file_id = file_parts[0];
+        const auto timestamp = parse<UInt64>(file_parts[2]);
+
+        auto & file_info = data_files[partition][file_id];
+        if (file_info.timestamp == 0 || file_info.timestamp < timestamp)
+        {
+            file_info.key = key;
+            file_info.timestamp = timestamp;
+        }
+    }
+
+    Strings result;
+    for (auto & [partition, partition_data] : data_files)
+    {
+        LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size());
+        for (auto & [file_id, file_data] : partition_data)
+            result.push_back(std::move(file_data.key));
+    }
+    return result;
 }
 
-template <typename Configuration, typename MetadataReadHelper>
-Strings HudiMetadataParser<Configuration, MetadataReadHelper>::getFiles(const Configuration & configuration, ContextPtr)
-{
-    return impl->processMetadataFiles(configuration);
 }
-
-template HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::HudiMetadataParser();
-template Strings HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::getFiles(
-    const StorageS3::Configuration & configuration, ContextPtr);
-
-}
-
-#endif
diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h
index 6727ba2f718..2fc004595ca 100644
--- a/src/Storages/DataLakes/HudiMetadataParser.h
+++ b/src/Storages/DataLakes/HudiMetadataParser.h
@@ -1,22 +1,17 @@
 #pragma once
 
 #include <Interpreters/Context_fwd.h>
-#include <Core/Types.h>
+#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
+#include <Storages/ObjectStorage/Configuration.h>
 
 namespace DB
 {
 
-template <typename Configuration, typename MetadataReadHelper>
 struct HudiMetadataParser
 {
-public:
-    HudiMetadataParser<Configuration, MetadataReadHelper>();
-
-    Strings getFiles(const Configuration & configuration, ContextPtr context);
-
-private:
-    struct Impl;
-    std::shared_ptr<Impl> impl;
+    Strings getFiles(
+        ObjectStoragePtr object_storage,
+        StorageObjectStorageConfigurationPtr configuration, ContextPtr context);
 };
 
 }
diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h
index db3f835494f..934bf227c42 100644
--- a/src/Storages/DataLakes/IStorageDataLake.h
+++ b/src/Storages/DataLakes/IStorageDataLake.h
@@ -8,127 +8,91 @@
 #include <Common/logger_useful.h>
 #include <Storages/StorageFactory.h>
 #include <Formats/FormatFactory.h>
-#include <filesystem>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
 
 
 namespace DB
 {
 
-template <typename Storage, typename Name, typename MetadataParser>
-class IStorageDataLake : public Storage
+template <typename StorageSettings, typename Name, typename MetadataParser>
+class IStorageDataLake : public StorageObjectStorage<StorageSettings>
 {
 public:
     static constexpr auto name = Name::name;
-    using Configuration = typename Storage::Configuration;
 
-    template <class ...Args>
-    explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args)
-        : Storage(getConfigurationForDataRead(configuration_, context_, {}, attach), context_, std::forward<Args>(args)...)
-        , base_configuration(configuration_)
-        , log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using ConfigurationPtr = Storage::ConfigurationPtr;
 
-    template <class ...Args>
-    static StoragePtr create(const Configuration & configuration_, ContextPtr context_, bool attach, Args && ...args)
+    static StoragePtr create(
+        ConfigurationPtr base_configuration,
+        ContextPtr context,
+        const String & engine_name_,
+        const StorageID & table_id_,
+        const ColumnsDescription & columns_,
+        const ConstraintsDescription & constraints_,
+        const String & comment_,
+        std::optional<FormatSettings> format_settings_,
+        bool /* attach */)
     {
-        return std::make_shared<IStorageDataLake<Storage, Name, MetadataParser>>(configuration_, context_, attach, std::forward<Args>(args)...);
+        auto object_storage = base_configuration->createOrUpdateObjectStorage(context);
+
+        auto configuration = base_configuration->clone();
+        configuration->getPaths() = MetadataParser().getFiles(object_storage, configuration, context);
+
+        return std::make_shared<IStorageDataLake<StorageSettings, Name, MetadataParser>>(
+            base_configuration, configuration, object_storage, engine_name_, context,
+            table_id_, columns_, constraints_, comment_, format_settings_);
     }
 
     String getName() const override { return name; }
 
     static ColumnsDescription getTableStructureFromData(
-        Configuration & base_configuration,
-        const std::optional<FormatSettings> & format_settings,
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr base_configuration,
+        const std::optional<FormatSettings> &,
         ContextPtr local_context)
     {
-        auto configuration = getConfigurationForDataRead(base_configuration, local_context);
-        return Storage::getTableStructureFromData(configuration, format_settings, local_context);
+        auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context);
+        return ColumnsDescription(metadata->getTableSchema());
     }
 
-    static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context)
+    std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context) override
     {
-        return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
+        std::lock_guard lock(Storage::configuration_update_mutex);
+
+        auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context);
+        bool updated = new_object_storage != nullptr;
+        if (updated)
+            Storage::object_storage = new_object_storage;
+
+        auto new_keys = MetadataParser().getFiles(Storage::object_storage, base_configuration, local_context);
+
+        if (updated || new_keys != Storage::configuration->getPaths())
+        {
+            auto updated_configuration = base_configuration->clone();
+            /// If metadata wasn't changed, we won't list data files again.
+            updated_configuration->getPaths() = new_keys;
+            Storage::configuration = updated_configuration;
+        }
+        return {Storage::configuration, Storage::object_storage};
     }
 
-    Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override
+    template <typename... Args>
+    explicit IStorageDataLake(
+        ConfigurationPtr base_configuration_,
+        Args &&... args)
+        : Storage(std::forward<Args>(args)...)
+        , base_configuration(base_configuration_)
     {
-        std::lock_guard lock(configuration_update_mutex);
-        updateConfigurationImpl(local_context);
-        return Storage::getConfiguration();
-    }
-
-    void updateConfiguration(ContextPtr local_context) override
-    {
-        std::lock_guard lock(configuration_update_mutex);
-        updateConfigurationImpl(local_context);
     }
 
 private:
-    static Configuration getConfigurationForDataRead(
-        const Configuration & base_configuration, ContextPtr local_context, const Strings & keys = {}, bool attach = false)
-    {
-        auto configuration{base_configuration};
-        configuration.update(local_context);
-        configuration.static_configuration = true;
-
-        try
-        {
-            if (keys.empty())
-                configuration.keys = getDataFiles(configuration, local_context);
-            else
-                configuration.keys = keys;
-
-            LOG_TRACE(
-                getLogger("DataLake"),
-                "New configuration path: {}, keys: {}",
-                configuration.getPath(), fmt::join(configuration.keys, ", "));
-
-            configuration.connect(local_context);
-            return configuration;
-        }
-        catch (...)
-        {
-            if (!attach)
-                throw;
-            tryLogCurrentException(__PRETTY_FUNCTION__);
-            return configuration;
-        }
-    }
-
-    static Strings getDataFiles(const Configuration & configuration, ContextPtr local_context)
-    {
-        return MetadataParser().getFiles(configuration, local_context);
-    }
-
-    void updateConfigurationImpl(ContextPtr local_context)
-    {
-        const bool updated = base_configuration.update(local_context);
-        auto new_keys = getDataFiles(base_configuration, local_context);
-
-        if (!updated && new_keys == Storage::getConfiguration().keys)
-            return;
-
-        Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys));
-    }
-
-    Configuration base_configuration;
-    std::mutex configuration_update_mutex;
+    ConfigurationPtr base_configuration;
     LoggerPtr log;
 };
 
 
-template <typename DataLake>
-static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args)
-{
-    auto configuration = DataLake::getConfiguration(args.engine_args, args.getLocalContext());
-
-    /// Data lakes use parquet format, no need for schema inference.
-    if (configuration.format == "auto")
-        configuration.format = "Parquet";
-
-    return DataLake::create(configuration, args.getContext(), args.attach, args.table_id, args.columns, args.constraints,
-        args.comment, getFormatSettings(args.getContext()));
-}
-
 }
 
 #endif
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
index df1536f53fc..08cebb3f396 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -21,11 +21,11 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <Formats/FormatFactory.h>
 #include <IO/ReadBufferFromString.h>
+#include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
 #include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <Storages/StorageS3.h>
+#include <Storages/ObjectStorage/Configuration.h>
 
 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
@@ -44,7 +44,8 @@ namespace ErrorCodes
 }
 
 IcebergMetadata::IcebergMetadata(
-    const StorageS3::Configuration & configuration_,
+    ObjectStoragePtr object_storage_,
+    StorageObjectStorageConfigurationPtr configuration_,
     DB::ContextPtr context_,
     Int32 metadata_version_,
     Int32 format_version_,
@@ -52,6 +53,7 @@ IcebergMetadata::IcebergMetadata(
     Int32 current_schema_id_,
     DB::NamesAndTypesList schema_)
     : WithContext(context_)
+    , object_storage(object_storage_)
     , configuration(configuration_)
     , metadata_version(metadata_version_)
     , format_version(format_version_)
@@ -331,21 +333,42 @@ MutableColumns parseAvro(
     return columns;
 }
 
+std::vector<String> listFiles(
+    const ObjectStoragePtr & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const String & prefix, const String & suffix)
+{
+    auto key = std::filesystem::path(configuration.getPath()) / prefix;
+    RelativePathsWithMetadata files_with_metadata;
+    object_storage->listObjects(key, files_with_metadata, 0);
+    Strings res;
+    for (const auto & file_with_metadata : files_with_metadata)
+    {
+        const auto & filename = file_with_metadata->relative_path;
+        if (filename.ends_with(suffix))
+            res.push_back(filename);
+    }
+    LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
+    return res;
+}
+
 /**
  * Each version of table metadata is stored in a `metadata` directory and
  * has one of 2 formats:
  *   1) v<V>.metadata.json, where V - metadata version.
  *   2) <V>-<random-uuid>.metadata.json, where V - metadata version
  */
-std::pair<Int32, String> getMetadataFileAndVersion(const StorageS3::Configuration & configuration)
+std::pair<Int32, String> getMetadataFileAndVersion(
+    ObjectStoragePtr object_storage,
+    const StorageObjectStorageConfiguration & configuration)
 {
-    const auto metadata_files = S3DataLakeMetadataReadHelper::listFiles(configuration, "metadata", ".metadata.json");
+    const auto metadata_files = listFiles(object_storage, configuration, "metadata", ".metadata.json");
     if (metadata_files.empty())
     {
         throw Exception(
             ErrorCodes::FILE_DOESNT_EXIST,
             "The metadata file for Iceberg table with path {} doesn't exist",
-            configuration.url.key);
+            configuration.getPath());
     }
 
     std::vector<std::pair<UInt32, String>> metadata_files_with_versions;
@@ -372,11 +395,15 @@ std::pair<Int32, String> getMetadataFileAndVersion(const StorageS3::Configuratio
 
 }
 
-std::unique_ptr<IcebergMetadata> parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context_)
+std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
+    ObjectStoragePtr object_storage,
+    StorageObjectStorageConfigurationPtr configuration,
+    ContextPtr context_)
 {
-    const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(configuration);
+    const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration);
     LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path);
-    auto buf = S3DataLakeMetadataReadHelper::createReadBuffer(metadata_file_path, context_, configuration);
+    auto read_settings = context_->getReadSettings();
+    auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
     String json_str;
     readJSONObjectPossiblyInvalid(json_str, *buf);
 
@@ -397,12 +424,12 @@ std::unique_ptr<IcebergMetadata> parseIcebergMetadata(const StorageS3::Configura
         if (snapshot->getValue<Int64>("snapshot-id") == current_snapshot_id)
         {
             const auto path = snapshot->getValue<String>("manifest-list");
-            manifest_list_file = std::filesystem::path(configuration.url.key) / "metadata" / std::filesystem::path(path).filename();
+            manifest_list_file = std::filesystem::path(configuration->getPath()) / "metadata" / std::filesystem::path(path).filename();
             break;
         }
     }
 
-    return std::make_unique<IcebergMetadata>(configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema);
+    return std::make_unique<IcebergMetadata>(object_storage, configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema);
 }
 
 /**
@@ -441,12 +468,14 @@ Strings IcebergMetadata::getDataFiles()
 
     LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file);
 
-    auto manifest_list_buf = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_list_file, getContext(), configuration);
+    auto context = getContext();
+    auto read_settings = context->getReadSettings();
+    auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings);
     auto manifest_list_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*manifest_list_buf));
 
     auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0));
     Block header{{data_type->createColumn(), data_type, "manifest_path"}};
-    auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(getContext()));
+    auto columns = parseAvro(*manifest_list_file_reader, header, getFormatSettings(context));
     auto & col = columns.at(0);
 
     if (col->getDataType() != TypeIndex::String)
@@ -462,7 +491,7 @@ Strings IcebergMetadata::getDataFiles()
     {
         const auto file_path = col_str->getDataAt(i).toView();
         const auto filename = std::filesystem::path(file_path).filename();
-        manifest_files.emplace_back(std::filesystem::path(configuration.url.key) / "metadata" / filename);
+        manifest_files.emplace_back(std::filesystem::path(configuration->getPath()) / "metadata" / filename);
     }
 
     NameSet files;
@@ -471,7 +500,7 @@ Strings IcebergMetadata::getDataFiles()
     {
         LOG_TEST(log, "Process manifest file {}", manifest_file);
 
-        auto buffer = S3DataLakeMetadataReadHelper::createReadBuffer(manifest_file, getContext(), configuration);
+        auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings);
         auto manifest_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*buffer));
 
         /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema,
@@ -482,7 +511,7 @@ Strings IcebergMetadata::getDataFiles()
         Poco::JSON::Parser parser;
         Poco::Dynamic::Var json = parser.parse(schema_json_string);
         Poco::JSON::Object::Ptr schema_object = json.extract<Poco::JSON::Object::Ptr>();
-        if (!getContext()->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue<int>("schema-id") != current_schema_id)
+        if (!context->getSettingsRef().iceberg_engine_ignore_schema_evolution && schema_object->getValue<int>("schema-id") != current_schema_id)
             throw Exception(
                 ErrorCodes::UNSUPPORTED_METHOD,
                 "Cannot read Iceberg table: the table schema has been changed at least 1 time, reading tables with evolved schema is not "
@@ -595,9 +624,9 @@ Strings IcebergMetadata::getDataFiles()
 
             const auto status = status_int_column->getInt(i);
             const auto data_path = std::string(file_path_string_column->getDataAt(i).toView());
-            const auto pos = data_path.find(configuration.url.key);
+            const auto pos = data_path.find(configuration->getPath());
             if (pos == std::string::npos)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration.url.key, data_path);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected to find {} in data path: {}", configuration->getPath(), data_path);
 
             const auto file_path = data_path.substr(pos);
 
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
index 3e6a2ec3415..92946e4192b 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
+++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
@@ -2,9 +2,10 @@
 
 #if USE_AWS_S3 && USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
 
-#include <Storages/StorageS3.h>
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/ObjectStorage/Configuration.h>
 
 namespace DB
 {
@@ -59,13 +60,15 @@ namespace DB
 class IcebergMetadata : WithContext
 {
 public:
-    IcebergMetadata(const StorageS3::Configuration & configuration_,
-                    ContextPtr context_,
-                    Int32 metadata_version_,
-                    Int32 format_version_,
-                    String manifest_list_file_,
-                    Int32 current_schema_id_,
-                    NamesAndTypesList schema_);
+    IcebergMetadata(
+        ObjectStoragePtr object_storage_,
+        StorageObjectStorageConfigurationPtr configuration_,
+        ContextPtr context_,
+        Int32 metadata_version_,
+        Int32 format_version_,
+        String manifest_list_file_,
+        Int32 current_schema_id_,
+        NamesAndTypesList schema_);
 
     /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
     /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file)
@@ -77,7 +80,8 @@ public:
     size_t getVersion() const { return metadata_version; }
 
 private:
-    const StorageS3::Configuration configuration;
+    ObjectStoragePtr object_storage;
+    StorageObjectStorageConfigurationPtr configuration;
     Int32 metadata_version;
     Int32 format_version;
     String manifest_list_file;
@@ -88,7 +92,10 @@ private:
 
 };
 
-std::unique_ptr<IcebergMetadata> parseIcebergMetadata(const StorageS3::Configuration & configuration, ContextPtr context);
+std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
+    ObjectStoragePtr object_storage,
+    StorageObjectStorageConfigurationPtr configuration,
+    ContextPtr context);
 
 }
 
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
index 8a1a2cdbd8f..ad1a27c312b 100644
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
+++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
@@ -5,85 +5,6 @@
 namespace DB
 {
 
-StoragePtr StorageIceberg::create(
-    const DB::StorageIceberg::Configuration & base_configuration,
-    DB::ContextPtr context_,
-    bool attach,
-    const DB::StorageID & table_id_,
-    const DB::ColumnsDescription & columns_,
-    const DB::ConstraintsDescription & constraints_,
-    const String & comment,
-    std::optional<FormatSettings> format_settings_)
-{
-    auto configuration{base_configuration};
-    configuration.update(context_);
-    std::unique_ptr<IcebergMetadata> metadata;
-    NamesAndTypesList schema_from_metadata;
-    try
-    {
-        metadata = parseIcebergMetadata(configuration, context_);
-        schema_from_metadata = metadata->getTableSchema();
-        configuration.keys = metadata->getDataFiles();
-    }
-    catch (...)
-    {
-        if (!attach)
-            throw;
-        tryLogCurrentException(__PRETTY_FUNCTION__);
-    }
-
-    return std::make_shared<StorageIceberg>(
-        std::move(metadata),
-        configuration,
-        context_,
-        table_id_,
-        columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
-        constraints_,
-        comment,
-        format_settings_);
-}
-
-StorageIceberg::StorageIceberg(
-    std::unique_ptr<IcebergMetadata> metadata_,
-    const Configuration & configuration_,
-    ContextPtr context_,
-    const StorageID & table_id_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    std::optional<FormatSettings> format_settings_)
-    : StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_)
-    , current_metadata(std::move(metadata_))
-    , base_configuration(configuration_)
-{
-}
-
-ColumnsDescription StorageIceberg::getTableStructureFromData(
-    Configuration & base_configuration,
-    const std::optional<FormatSettings> &,
-    ContextPtr local_context)
-{
-    auto configuration{base_configuration};
-    configuration.update(local_context);
-    auto metadata = parseIcebergMetadata(configuration, local_context);
-    return ColumnsDescription(metadata->getTableSchema());
-}
-
-void StorageIceberg::updateConfigurationImpl(ContextPtr local_context)
-{
-    const bool updated = base_configuration.update(local_context);
-    auto new_metadata = parseIcebergMetadata(base_configuration, local_context);
-
-    if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion())
-        current_metadata = std::move(new_metadata);
-    else if (!updated)
-        return;
-
-    auto updated_configuration{base_configuration};
-    /// If metadata wasn't changed, we won't list data files again.
-    updated_configuration.keys = current_metadata->getDataFiles();
-    StorageS3::useConfiguration(updated_configuration);
-}
 
 }
 
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/DataLakes/Iceberg/StorageIceberg.h
index 4e63da5508a..bca6e3c868f 100644
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h
+++ b/src/Storages/DataLakes/Iceberg/StorageIceberg.h
@@ -4,13 +4,13 @@
 
 #if USE_AWS_S3 && USE_AVRO
 
-#    include <filesystem>
-#    include <Formats/FormatFactory.h>
-#    include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
-#    include <Storages/IStorage.h>
-#    include <Storages/StorageFactory.h>
-#    include <Storages/StorageS3.h>
-#    include <Common/logger_useful.h>
+#include <Formats/FormatFactory.h>
+#include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
+#include <Storages/IStorage.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Common/logger_useful.h>
 
 
 namespace DB
@@ -21,65 +21,100 @@ namespace DB
 /// many Iceberg features like schema evolution, partitioning, positional and equality deletes.
 /// TODO: Implement Iceberg as a separate storage using IObjectStorage
 /// (to support all object storages, not only S3) and add support for missing Iceberg features.
-class StorageIceberg : public StorageS3
+template <typename StorageSettings>
+class StorageIceberg : public StorageObjectStorage<StorageSettings>
 {
 public:
     static constexpr auto name = "Iceberg";
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using ConfigurationPtr = Storage::ConfigurationPtr;
 
-    using Configuration = StorageS3::Configuration;
-
-    static StoragePtr create(const Configuration & base_configuration,
-        ContextPtr context_,
-        bool attach,
+    static StoragePtr create(
+        ConfigurationPtr base_configuration,
+        ContextPtr context,
+        const String & engine_name_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        const String & comment,
-        std::optional<FormatSettings> format_settings_);
+        const String & comment_,
+        std::optional<FormatSettings> format_settings_,
+        bool attach)
+    {
+        auto object_storage = base_configuration->createOrUpdateObjectStorage(context);
+        std::unique_ptr<IcebergMetadata> metadata;
+        NamesAndTypesList schema_from_metadata;
+        try
+        {
+            metadata = parseIcebergMetadata(object_storage, base_configuration, context);
+            schema_from_metadata = metadata->getTableSchema();
+        }
+        catch (...)
+        {
+            if (!attach)
+                throw;
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
 
-    StorageIceberg(
-        std::unique_ptr<IcebergMetadata> metadata_,
-        const Configuration & configuration_,
-        ContextPtr context_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & comment,
-        std::optional<FormatSettings> format_settings_);
+        auto configuration = base_configuration->clone();
+        configuration->getPaths() = metadata->getDataFiles();
+
+        return std::make_shared<StorageIceberg<StorageSettings>>(
+            base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context,
+            table_id_,
+            columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
+            constraints_, comment_, format_settings_);
+    }
 
     String getName() const override { return name; }
 
     static ColumnsDescription getTableStructureFromData(
-        Configuration & base_configuration,
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr base_configuration,
         const std::optional<FormatSettings> &,
-        ContextPtr local_context);
-
-    static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context)
+        ContextPtr local_context)
     {
-        return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
+        auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context);
+        return ColumnsDescription(metadata->getTableSchema());
     }
 
-    Configuration updateConfigurationAndGetCopy(ContextPtr local_context) override
+    std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context) override
     {
-        std::lock_guard lock(configuration_update_mutex);
-        updateConfigurationImpl(local_context);
-        return StorageS3::getConfiguration();
+        std::lock_guard lock(Storage::configuration_update_mutex);
+
+        auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context);
+        bool updated = new_object_storage != nullptr;
+        if (updated)
+            Storage::object_storage = new_object_storage;
+
+        auto new_metadata = parseIcebergMetadata(Storage::object_storage, base_configuration, local_context);
+
+        if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion())
+            current_metadata = std::move(new_metadata);
+        else if (updated)
+        {
+            auto updated_configuration = base_configuration->clone();
+            /// If metadata wasn't changed, we won't list data files again.
+            updated_configuration->getPaths() = current_metadata->getDataFiles();
+            Storage::configuration = updated_configuration;
+        }
+        return {Storage::configuration, Storage::object_storage};
     }
 
-    void updateConfiguration(ContextPtr local_context) override
+    template <typename... Args>
+    StorageIceberg(
+        ConfigurationPtr base_configuration_,
+        std::unique_ptr<IcebergMetadata> metadata_,
+        Args &&... args)
+        : Storage(std::forward<Args>(args)...)
+        , base_configuration(base_configuration_)
+        , current_metadata(std::move(metadata_))
     {
-        std::lock_guard lock(configuration_update_mutex);
-        updateConfigurationImpl(local_context);
     }
 
 private:
-    void updateConfigurationImpl(ContextPtr local_context);
-
+    ConfigurationPtr base_configuration;
     std::unique_ptr<IcebergMetadata> current_metadata;
-    Configuration base_configuration;
-    std::mutex configuration_update_mutex;
 };
-
 }
 
 #endif
diff --git a/src/Storages/DataLakes/S3MetadataReader.cpp b/src/Storages/DataLakes/S3MetadataReader.cpp
deleted file mode 100644
index d66e21550a3..00000000000
--- a/src/Storages/DataLakes/S3MetadataReader.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include <config.h>
-
-#if USE_AWS_S3
-
-#include <IO/ReadBufferFromS3.h>
-#include <IO/S3/Requests.h>
-#include <Interpreters/Context.h>
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <aws/core/auth/AWSCredentials.h>
-#include <aws/s3/S3Client.h>
-#include <aws/s3/model/ListObjectsV2Request.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int S3_ERROR;
-}
-
-std::shared_ptr<ReadBuffer>
-S3DataLakeMetadataReadHelper::createReadBuffer(const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration)
-{
-    S3Settings::RequestSettings request_settings;
-    request_settings.max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
-    return std::make_shared<ReadBufferFromS3>(
-        base_configuration.client,
-        base_configuration.url.bucket,
-        key,
-        base_configuration.url.version_id,
-        request_settings,
-        context->getReadSettings());
-}
-
-bool S3DataLakeMetadataReadHelper::exists(const String & key, const StorageS3::Configuration & configuration)
-{
-    return S3::objectExists(*configuration.client, configuration.url.bucket, key);
-}
-
-std::vector<String> S3DataLakeMetadataReadHelper::listFiles(
-    const StorageS3::Configuration & base_configuration, const String & prefix, const String & suffix)
-{
-    const auto & table_path = base_configuration.url.key;
-    const auto & bucket = base_configuration.url.bucket;
-    const auto & client = base_configuration.client;
-
-    std::vector<String> res;
-    S3::ListObjectsV2Request request;
-    Aws::S3::Model::ListObjectsV2Outcome outcome;
-
-    request.SetBucket(bucket);
-    request.SetPrefix(std::filesystem::path(table_path) / prefix);
-
-    bool is_finished{false};
-    while (!is_finished)
-    {
-        outcome = client->ListObjectsV2(request);
-        if (!outcome.IsSuccess())
-            throw S3Exception(
-                outcome.GetError().GetErrorType(),
-                "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}",
-                quoteString(bucket),
-                quoteString(base_configuration.url.key),
-                backQuote(outcome.GetError().GetExceptionName()),
-                quoteString(outcome.GetError().GetMessage()));
-
-        const auto & result_batch = outcome.GetResult().GetContents();
-        for (const auto & obj : result_batch)
-        {
-            const auto & filename = obj.GetKey();
-            if (filename.ends_with(suffix))
-                res.push_back(filename);
-        }
-
-        request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
-        is_finished = !outcome.GetResult().GetIsTruncated();
-    }
-
-    LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size());
-
-    return res;
-}
-
-}
-#endif
diff --git a/src/Storages/DataLakes/S3MetadataReader.h b/src/Storages/DataLakes/S3MetadataReader.h
deleted file mode 100644
index cae7dd1fa3d..00000000000
--- a/src/Storages/DataLakes/S3MetadataReader.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#pragma once
-
-#include <config.h>
-
-#if USE_AWS_S3
-
-#include <Storages/StorageS3.h>
-
-class ReadBuffer;
-
-namespace DB
-{
-
-struct S3DataLakeMetadataReadHelper
-{
-    static std::shared_ptr<ReadBuffer> createReadBuffer(
-        const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration);
-
-    static bool exists(const String & key, const StorageS3::Configuration & configuration);
-
-    static std::vector<String> listFiles(const StorageS3::Configuration & configuration, const std::string & prefix = "", const std::string & suffix = "");
-};
-}
-
-#endif
diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h
index 8b4ba28d6f7..07c2205d2df 100644
--- a/src/Storages/DataLakes/StorageDeltaLake.h
+++ b/src/Storages/DataLakes/StorageDeltaLake.h
@@ -5,11 +5,6 @@
 #include <Storages/DataLakes/DeltaLakeMetadataParser.h>
 #include "config.h"
 
-#if USE_AWS_S3
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <Storages/StorageS3.h>
-#endif
-
 namespace DB
 {
 
@@ -19,7 +14,7 @@ struct StorageDeltaLakeName
 };
 
 #if USE_AWS_S3 && USE_PARQUET
-using StorageDeltaLakeS3 = IStorageDataLake<StorageS3, StorageDeltaLakeName, DeltaLakeMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>>;
+using StorageDeltaLakeS3 = IStorageDataLake<S3StorageSettings, StorageDeltaLakeName, DeltaLakeMetadataParser>;
 #endif
 
 }
diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h
index 84666f51405..3fd52c82d32 100644
--- a/src/Storages/DataLakes/StorageHudi.h
+++ b/src/Storages/DataLakes/StorageHudi.h
@@ -5,11 +5,6 @@
 #include <Storages/DataLakes/HudiMetadataParser.h>
 #include "config.h"
 
-#if USE_AWS_S3
-#include <Storages/DataLakes/S3MetadataReader.h>
-#include <Storages/StorageS3.h>
-#endif
-
 namespace DB
 {
 
@@ -19,7 +14,7 @@ struct StorageHudiName
 };
 
 #if USE_AWS_S3
-using StorageHudiS3 = IStorageDataLake<StorageS3, StorageHudiName, HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>>;
+using StorageHudiS3 = IStorageDataLake<S3StorageSettings, StorageHudiName, HudiMetadataParser>;
 #endif
 
 }
diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp
index 118600f7212..2647fbce39d 100644
--- a/src/Storages/DataLakes/registerDataLakes.cpp
+++ b/src/Storages/DataLakes/registerDataLakes.cpp
@@ -6,43 +6,43 @@
 #include <Storages/DataLakes/StorageDeltaLake.h>
 #include <Storages/DataLakes/Iceberg/StorageIceberg.h>
 #include <Storages/DataLakes/StorageHudi.h>
+#include <Storages/DataLakes/DeltaLakeMetadataParser.h>
 
 
 namespace DB
 {
 
-#define REGISTER_DATA_LAKE_STORAGE(STORAGE, NAME)       \
-    factory.registerStorage(                            \
-        NAME,                                           \
-        [](const StorageFactory::Arguments & args)      \
-        {                                               \
-            return createDataLakeStorage<STORAGE>(args);\
-        },                                              \
-        {                                               \
-            .supports_settings = false,                 \
-            .supports_schema_inference = true,          \
-            .source_access_type = AccessType::S3,       \
-        });
-
 #if USE_PARQUET
-void registerStorageDeltaLake(StorageFactory & factory)
+void registerStorageDeltaLake(StorageFactory & )
 {
-    REGISTER_DATA_LAKE_STORAGE(StorageDeltaLakeS3, StorageDeltaLakeName::name)
+    // factory.registerStorage(
+    //     StorageDeltaLakeName::name,
+    //     [&](const StorageFactory::Arguments & args)
+    //     {
+    //         auto configuration = std::make_shared<StorageS3Configuration>();
+    //         return IStorageDataLake<StorageS3Settings, StorageDeltaLakeName, DeltaLakeMetadataParser>::create(
+    //             configuration, args.getContext(), "deltaLake", args.table_id, args.columns,
+    //             args.constraints, args.comment, std::nullopt, args.attach);
+    //     },
+    //     {
+    //         .supports_settings = false,
+    //         .supports_schema_inference = true,
+    //         .source_access_type = AccessType::S3,
+    //     });
 }
 #endif
 
 #if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
 
-void registerStorageIceberg(StorageFactory & factory)
+void registerStorageIceberg(StorageFactory &)
 {
-    REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name)
+    // REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name)
 }
 
 #endif
 
-void registerStorageHudi(StorageFactory & factory)
+void registerStorageHudi(StorageFactory &)
 {
-    REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name)
 }
 
 }
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
deleted file mode 100644
index ab21c4946e4..00000000000
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ /dev/null
@@ -1,1117 +0,0 @@
-#include "config.h"
-
-#if USE_HDFS
-
-#include <Common/parseGlobs.h>
-#include <Common/re2.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Processors/Sinks/SinkToStorage.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <Processors/Sources/ConstChunkGenerator.h>
-#include <Processors/Sources/NullSource.h>
-#include <Processors/QueryPlan/QueryPlan.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-
-#include <IO/WriteHelpers.h>
-#include <IO/CompressionMethod.h>
-#include <IO/WriteSettings.h>
-
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-
-#include <Storages/StorageFactory.h>
-#include <Storages/HDFS/StorageHDFS.h>
-#include <Storages/HDFS/HDFSCommon.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
-#include <Storages/HDFS/WriteBufferFromHDFS.h>
-#include <Storages/PartitionedSink.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-
-#include <Formats/ReadSchemaUtils.h>
-#include <Formats/FormatFactory.h>
-
-#include <QueryPipeline/QueryPipeline.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
-#include <QueryPipeline/Pipe.h>
-
-#include <Poco/URI.h>
-#include <hdfs/hdfs.h>
-
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace ProfileEvents
-{
-    extern const Event EngineFileLikeReadFiles;
-}
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ACCESS_DENIED;
-    extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
-    extern const int CANNOT_COMPILE_REGEXP;
-}
-namespace
-{
-    struct HDFSFileInfoDeleter
-    {
-        /// Can have only one entry (see hdfsGetPathInfo())
-        void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); }
-    };
-    using HDFSFileInfoPtr = std::unique_ptr<hdfsFileInfo, HDFSFileInfoDeleter>;
-
-    /* Recursive directory listing with matched paths as a result.
-     * Have the same method in StorageFile.
-     */
-    std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
-        const String & path_for_ls,
-        const HDFSFSPtr & fs,
-        const String & for_match)
-    {
-        std::vector<StorageHDFS::PathWithInfo> result;
-
-        const size_t first_glob_pos = for_match.find_first_of("*?{");
-
-        if (first_glob_pos == std::string::npos)
-        {
-            const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal();
-            HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str()));
-            if (hdfs_info) // NOLINT
-            {
-                result.push_back(StorageHDFS::PathWithInfo{
-                        String(path),
-                        StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)}});
-            }
-            return result;
-        }
-
-        const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
-        const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
-        const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
-
-        const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1);
-
-        const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
-
-        re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
-        if (!matcher.ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", for_match, matcher.error());
-
-        HDFSFileInfo ls;
-        ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
-        if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
-        {
-            // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
-            throw Exception(
-                ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError()));
-        }
-
-        if (!ls.file_info && ls.length > 0)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
-        for (int i = 0; i < ls.length; ++i)
-        {
-            const String full_path = fs::path(ls.file_info[i].mName).lexically_normal();
-            const size_t last_slash = full_path.rfind('/');
-            const String file_name = full_path.substr(last_slash);
-            const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
-            const bool is_directory = ls.file_info[i].mKind == 'D';
-            /// Condition with type of current file_info means what kind of path is it in current iteration of ls
-            if (!is_directory && !looking_for_directory)
-            {
-                if (re2::RE2::FullMatch(file_name, matcher))
-                    result.push_back(StorageHDFS::PathWithInfo{
-                        String(full_path),
-                        StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)}});
-            }
-            else if (is_directory && looking_for_directory)
-            {
-                if (re2::RE2::FullMatch(file_name, matcher))
-                {
-                    std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs,
-                        suffix_with_globs.substr(next_slash_after_glob_pos));
-                    /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
-                    std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
-                }
-            }
-        }
-
-        return result;
-    }
-
-    std::pair<String, String> getPathFromUriAndUriWithoutPath(const String & uri)
-    {
-        auto pos = uri.find("//");
-        if (pos != std::string::npos && pos + 2 < uri.length())
-        {
-            pos = uri.find('/', pos + 2);
-            if (pos != std::string::npos)
-                return {uri.substr(pos), uri.substr(0, pos)};
-        }
-
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set");
-    }
-
-    std::vector<StorageHDFS::PathWithInfo> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context)
-    {
-        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
-        HDFSFSPtr fs = createHDFSFS(builder.get());
-
-        Strings paths = expandSelectionGlob(path_from_uri);
-
-        std::vector<StorageHDFS::PathWithInfo> res;
-
-        for (const auto & path : paths)
-        {
-            auto part_of_res = LSWithRegexpMatching("/", fs, path);
-            res.insert(res.end(), part_of_res.begin(), part_of_res.end());
-        }
-        return res;
-    }
-}
-
-StorageHDFS::StorageHDFS(
-    const String & uri_,
-    const StorageID & table_id_,
-    const String & format_name_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    ContextPtr context_,
-    const String & compression_method_,
-    const bool distributed_processing_,
-    ASTPtr partition_by_)
-    : IStorage(table_id_)
-    , WithContext(context_)
-    , uris({uri_})
-    , format_name(format_name_)
-    , compression_method(compression_method_)
-    , distributed_processing(distributed_processing_)
-    , partition_by(partition_by_)
-{
-    FormatFactory::instance().checkFormatName(format_name);
-    context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
-    checkHDFSURL(uri_);
-
-    String path = uri_.substr(uri_.find('/', uri_.find("//") + 2));
-    is_path_with_globs = path.find_first_of("*?{") != std::string::npos;
-
-    StorageInMemoryMetadata storage_metadata;
-
-    if (columns_.empty())
-    {
-        auto columns = getTableStructureFromData(format_name, uri_, compression_method, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        /// We don't allow special columns in HDFS storage.
-        if (!columns_.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
-        storage_metadata.setColumns(columns_);
-    }
-
-    storage_metadata.setConstraints(constraints_);
-    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-namespace
-{
-    class ReadBufferIterator : public IReadBufferIterator, WithContext
-    {
-    public:
-        ReadBufferIterator(
-            const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_,
-            const String & uri_without_path_,
-            const String & format_,
-            const String & compression_method_,
-            const ContextPtr & context_)
-        : WithContext(context_)
-        , paths_with_info(paths_with_info_)
-        , uri_without_path(uri_without_path_)
-        , format(format_)
-        , compression_method(compression_method_)
-        {
-        }
-
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
-        {
-            bool is_first = current_index == 0;
-            /// For default mode check cached columns for all paths on first iteration.
-            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
-            {
-                if (auto cached_columns = tryGetColumnsFromCache(paths_with_info))
-                    return {nullptr, cached_columns};
-            }
-
-            StorageHDFS::PathWithInfo path_with_info;
-
-            while (true)
-            {
-                if (current_index == paths_with_info.size())
-                {
-                    if (is_first)
-                        throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                                        "Cannot extract table structure from {} format file, because all files are empty. "
-                                        "You must specify table structure manually", format);
-                    return {nullptr, std::nullopt};
-                }
-
-                path_with_info = paths_with_info[current_index++];
-                if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0)
-                    continue;
-
-                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-                {
-                    std::vector<StorageHDFS::PathWithInfo> paths = {path_with_info};
-                    if (auto cached_columns = tryGetColumnsFromCache(paths))
-                        return {nullptr, cached_columns};
-                }
-
-                auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
-                auto impl = std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
-                if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof())
-                {
-                    const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-                    return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max)), std::nullopt};
-                }
-            }
-        }
-
-        void setNumRowsToLastFile(size_t num_rows) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
-                return;
-
-            String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows);
-        }
-
-        void setSchemaToLastFile(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
-                return;
-
-            String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, format, std::nullopt, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns);
-        }
-
-        void setResultingSchema(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
-                return;
-
-            Strings sources;
-            sources.reserve(paths_with_info.size());
-            std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; });
-            auto cache_keys = getKeysForSchemaCache(sources, format, {}, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
-        }
-
-        String getLastFileName() const override
-        {
-            if (current_index != 0)
-                return paths_with_info[current_index - 1].path;
-
-            return "";
-        }
-
-    private:
-        std::optional<ColumnsDescription> tryGetColumnsFromCache(const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_)
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
-                return std::nullopt;
-
-            auto & schema_cache = StorageHDFS::getSchemaCache(getContext());
-            for (const auto & path_with_info : paths_with_info_)
-            {
-                auto get_last_mod_time = [&]() -> std::optional<time_t>
-                {
-                    if (path_with_info.info)
-                        return path_with_info.info->last_mod_time;
-
-                    auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef());
-                    auto fs = createHDFSFS(builder.get());
-                    HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()));
-                    if (hdfs_info)
-                        return hdfs_info->mLastMod;
-
-                    return std::nullopt;
-                };
-
-                String url = uri_without_path + path_with_info.path;
-                auto cache_key = getKeyForSchemaCache(url, format, {}, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
-            }
-
-            return std::nullopt;
-        }
-
-        const std::vector<StorageHDFS::PathWithInfo> & paths_with_info;
-        const String & uri_without_path;
-        const String & format;
-        const String & compression_method;
-        size_t current_index = 0;
-    };
-}
-
-ColumnsDescription StorageHDFS::getTableStructureFromData(
-    const String & format,
-    const String & uri,
-    const String & compression_method,
-    ContextPtr ctx)
-{
-    const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-    auto paths_with_info = getPathsList(path_from_uri, uri, ctx);
-
-    if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
-        throw Exception(
-            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-            "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path."
-            " You must specify table structure manually", format);
-
-    ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx);
-    return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx);
-}
-
-class HDFSSource::DisclosedGlobIterator::Impl
-{
-public:
-    Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-        uris = getPathsList(path_from_uri, uri_without_path, context);
-        ActionsDAGPtr filter_dag;
-        if (!uris.empty())
-             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & path_with_info : uris)
-                paths.push_back(path_with_info.path);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
-        }
-        auto file_progress_callback = context->getFileProgressCallback();
-
-        for (auto & elem : uris)
-        {
-            elem.path = uri_without_path + elem.path;
-            if (file_progress_callback && elem.info)
-                file_progress_callback(FileProgress(0, elem.info->size));
-        }
-        uris_iter = uris.begin();
-    }
-
-    StorageHDFS::PathWithInfo next()
-    {
-        std::lock_guard lock(mutex);
-        if (uris_iter != uris.end())
-        {
-            auto answer = *uris_iter;
-            ++uris_iter;
-            return answer;
-        }
-        return {};
-    }
-private:
-    std::mutex mutex;
-    std::vector<StorageHDFS::PathWithInfo> uris;
-    std::vector<StorageHDFS::PathWithInfo>::iterator uris_iter;
-};
-
-class HDFSSource::URISIterator::Impl : WithContext
-{
-public:
-    explicit Impl(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
-        : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
-    {
-        ActionsDAGPtr filter_dag;
-        if (!uris.empty())
-            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & uri : uris)
-                paths.push_back(getPathFromUriAndUriWithoutPath(uri).first);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext());
-        }
-
-        if (!uris.empty())
-        {
-            auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]);
-            builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef());
-            fs = createHDFSFS(builder.get());
-        }
-    }
-
-    StorageHDFS::PathWithInfo next()
-    {
-        String uri;
-        HDFSFileInfoPtr hdfs_info;
-        do
-        {
-            size_t current_index = index.fetch_add(1);
-            if (current_index >= uris.size())
-                return {"", {}};
-
-            uri = uris[current_index];
-            auto path_and_uri = getPathFromUriAndUriWithoutPath(uri);
-            hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()));
-        }
-        /// Skip non-existed files.
-        while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos);
-
-        std::optional<StorageHDFS::PathInfo> info;
-        if (hdfs_info)
-        {
-            info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
-            if (file_progress_callback)
-                file_progress_callback(FileProgress(0, hdfs_info->mSize));
-        }
-
-        return {uri, info};
-    }
-
-private:
-    std::atomic_size_t index = 0;
-    Strings uris;
-    HDFSBuilderWrapper builder;
-    HDFSFSPtr fs;
-    std::function<void(FileProgress)> file_progress_callback;
-};
-
-HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
-
-StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next()
-{
-    return pimpl->next();
-}
-
-HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, predicate, virtual_columns, context))
-{
-}
-
-StorageHDFS::PathWithInfo HDFSSource::URISIterator::next()
-{
-    return pimpl->next();
-}
-
-HDFSSource::HDFSSource(
-    const ReadFromFormatInfo & info,
-    StorageHDFSPtr storage_,
-    ContextPtr context_,
-    UInt64 max_block_size_,
-    std::shared_ptr<IteratorWrapper> file_iterator_,
-    bool need_only_count_)
-    : ISource(info.source_header, false)
-    , WithContext(context_)
-    , storage(std::move(storage_))
-    , block_for_format(info.format_header)
-    , requested_columns(info.requested_columns)
-    , requested_virtual_columns(info.requested_virtual_columns)
-    , max_block_size(max_block_size_)
-    , file_iterator(file_iterator_)
-    , columns_description(info.columns_description)
-    , need_only_count(need_only_count_)
-{
-    initialize();
-}
-
-bool HDFSSource::initialize()
-{
-    bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files;
-    StorageHDFS::PathWithInfo path_with_info;
-    while (true)
-    {
-        path_with_info = (*file_iterator)();
-        if (path_with_info.path.empty())
-            return false;
-
-        if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0)
-            continue;
-
-        current_path = path_with_info.path;
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
-
-        std::optional<size_t> file_size;
-        if (!path_with_info.info)
-        {
-            auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef());
-            auto fs = createHDFSFS(builder.get());
-            HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str()));
-            if (hdfs_info)
-                path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
-        }
-
-        if (path_with_info.info)
-            file_size = path_with_info.info->size;
-
-        auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
-        auto impl = std::make_unique<ReadBufferFromHDFS>(
-            uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size);
-        if (!skip_empty_files || !impl->eof())
-        {
-            impl->setProgressCallback(getContext());
-            const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-            read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
-            break;
-        }
-    }
-
-    current_path = path_with_info.path;
-    current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt;
-
-    QueryPipelineBuilder builder;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt;
-    if (num_rows_from_cache)
-    {
-        /// We should not return single chunk with all number of rows,
-        /// because there is a chance that this chunk will be materialized later
-        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
-        /// Instead, we use a special ConstChunkGenerator that will generate chunks
-        /// with max_block_size rows until total number of rows is reached.
-        auto source = std::make_shared<ConstChunkGenerator>(block_for_format, *num_rows_from_cache, max_block_size);
-        builder.init(Pipe(source));
-    }
-    else
-    {
-        std::optional<size_t> max_parsing_threads;
-        if (need_only_count)
-            max_parsing_threads = 1;
-
-        input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads);
-
-        if (need_only_count)
-            input_format->needOnlyCount();
-
-        builder.init(Pipe(input_format));
-        if (columns_description.hasDefaults())
-        {
-            builder.addSimpleTransform([&](const Block & header)
-            {
-                return std::make_shared<AddingDefaultsTransform>(header, columns_description, *input_format, getContext());
-            });
-        }
-    }
-
-    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
-    /// from the chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
-    });
-
-    pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
-    reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
-
-    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
-    return true;
-}
-
-String HDFSSource::getName() const
-{
-    return "HDFSSource";
-}
-
-Chunk HDFSSource::generate()
-{
-    while (true)
-    {
-        if (isCancelled() || !reader)
-        {
-            if (reader)
-                reader->cancel();
-            break;
-        }
-
-        Chunk chunk;
-        if (reader->pull(chunk))
-        {
-            UInt64 num_rows = chunk.getNumRows();
-            total_rows_in_file += num_rows;
-            size_t chunk_size = 0;
-            if (input_format)
-                chunk_size = input_format->getApproxBytesReadForChunk();
-            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size);
-            return chunk;
-        }
-
-        if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(current_path, total_rows_in_file);
-
-        total_rows_in_file = 0;
-
-        reader.reset();
-        pipeline.reset();
-        input_format.reset();
-        read_buf.reset();
-
-        if (!initialize())
-            break;
-    }
-    return {};
-}
-
-void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows)
-{
-    auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext());
-    StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
-}
-
-std::optional<size_t> HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info)
-{
-    auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        if (path_with_info.info)
-            return path_with_info.info->last_mod_time;
-        return std::nullopt;
-    };
-
-    return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
-}
-
-class HDFSSink : public SinkToStorage
-{
-public:
-    HDFSSink(const String & uri,
-        const String & format,
-        const Block & sample_block,
-        ContextPtr context,
-        const CompressionMethod compression_method)
-        : SinkToStorage(sample_block)
-    {
-        const auto & settings = context->getSettingsRef();
-        write_buf = wrapWriteBufferWithCompressionMethod(
-            std::make_unique<WriteBufferFromHDFS>(
-                uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()),
-            compression_method,
-            static_cast<int>(settings.output_format_compression_level),
-            static_cast<int>(settings.output_format_compression_zstd_window_log));
-        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context);
-    }
-
-    String getName() const override { return "HDFSSink"; }
-
-    void consume(Chunk chunk) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        if (cancelled)
-            return;
-        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
-    }
-
-    void onCancel() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-        cancelled = true;
-    }
-
-    void onException(std::exception_ptr exception) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        try
-        {
-            std::rethrow_exception(exception);
-        }
-        catch (...)
-        {
-            /// An exception context is needed to proper delete write buffers without finalization
-            release();
-        }
-    }
-
-    void onFinish() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-    }
-
-private:
-    void finalize()
-    {
-        if (!writer)
-            return;
-
-        try
-        {
-            writer->finalize();
-            writer->flush();
-            write_buf->sync();
-            write_buf->finalize();
-        }
-        catch (...)
-        {
-            /// Stop ParallelFormattingOutputFormat correctly.
-            release();
-            throw;
-        }
-    }
-
-    void release()
-    {
-        writer.reset();
-        write_buf->finalize();
-    }
-
-    std::unique_ptr<WriteBuffer> write_buf;
-    OutputFormatPtr writer;
-    std::mutex cancel_mutex;
-    bool cancelled = false;
-};
-
-class PartitionedHDFSSink : public PartitionedSink
-{
-public:
-    PartitionedHDFSSink(
-        const ASTPtr & partition_by,
-        const String & uri_,
-        const String & format_,
-        const Block & sample_block_,
-        ContextPtr context_,
-        const CompressionMethod compression_method_)
-            : PartitionedSink(partition_by, context_, sample_block_)
-            , uri(uri_)
-            , format(format_)
-            , sample_block(sample_block_)
-            , context(context_)
-            , compression_method(compression_method_)
-    {
-    }
-
-    SinkPtr createSinkForPartition(const String & partition_id) override
-    {
-        auto path = PartitionedSink::replaceWildcards(uri, partition_id);
-        PartitionedSink::validatePartitionKey(path, true);
-        return std::make_shared<HDFSSink>(path, format, sample_block, context, compression_method);
-    }
-
-private:
-    const String uri;
-    const String format;
-    const Block sample_block;
-    ContextPtr context;
-    const CompressionMethod compression_method;
-};
-
-
-bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_);
-}
-
-class ReadFromHDFS : public SourceStepWithFilter
-{
-public:
-    std::string getName() const override { return "ReadFromHDFS"; }
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
-    void applyFilters() override;
-
-    ReadFromHDFS(
-        Block sample_block,
-        ReadFromFormatInfo info_,
-        bool need_only_count_,
-        std::shared_ptr<StorageHDFS> storage_,
-        ContextPtr context_,
-        size_t max_block_size_,
-        size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
-        , info(std::move(info_))
-        , need_only_count(need_only_count_)
-        , storage(std::move(storage_))
-        , context(std::move(context_))
-        , max_block_size(max_block_size_)
-        , num_streams(num_streams_)
-    {
-    }
-
-private:
-    ReadFromFormatInfo info;
-    const bool need_only_count;
-    std::shared_ptr<StorageHDFS> storage;
-
-    ContextPtr context;
-    size_t max_block_size;
-    size_t num_streams;
-
-    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
-
-    void createIterator(const ActionsDAG::Node * predicate);
-};
-
-void ReadFromHDFS::applyFilters()
-{
-    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
-    const ActionsDAG::Node * predicate = nullptr;
-    if (filter_actions_dag)
-        predicate = filter_actions_dag->getOutputs().at(0);
-
-    createIterator(predicate);
-}
-
-void StorageHDFS::read(
-    QueryPlan & query_plan,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr context_,
-    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context_->getSettingsRef().optimize_count_from_files;
-
-    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
-
-    auto reading = std::make_unique<ReadFromHDFS>(
-        read_from_format_info.source_header,
-        std::move(read_from_format_info),
-        need_only_count,
-        std::move(this_ptr),
-        context_,
-        max_block_size,
-        num_streams);
-
-    query_plan.addStep(std::move(reading));
-}
-
-void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
-{
-    if (iterator_wrapper)
-        return;
-
-    if (storage->distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
-            [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
-                return StorageHDFS::PathWithInfo{callback(), std::nullopt};
-        });
-    }
-    else if (storage->is_path_with_globs)
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
-        {
-            return glob_iterator->next();
-        });
-    }
-    else
-    {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
-        {
-            return uris_iterator->next();
-        });
-    }
-}
-
-void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
-{
-    createIterator(nullptr);
-
-    Pipes pipes;
-    for (size_t i = 0; i < num_streams; ++i)
-    {
-        pipes.emplace_back(std::make_shared<HDFSSource>(
-            info,
-            storage,
-            context,
-            max_block_size,
-            iterator_wrapper,
-            need_only_count));
-    }
-
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-    if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
-
-    for (const auto & processor : pipe.getProcessors())
-        processors.emplace_back(processor);
-
-    pipeline.init(std::move(pipe));
-}
-
-SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
-{
-    String current_uri = uris.back();
-
-    bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos;
-    const auto * insert_query = dynamic_cast<const ASTInsertQuery *>(query.get());
-    auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr;
-    bool is_partitioned_implementation = partition_by_ast && has_wildcards;
-
-    if (is_partitioned_implementation)
-    {
-        return std::make_shared<PartitionedHDFSSink>(
-            partition_by_ast,
-            current_uri,
-            format_name,
-            metadata_snapshot->getSampleBlock(),
-            context_,
-            chooseCompressionMethod(current_uri, compression_method));
-    }
-    else
-    {
-        if (is_path_with_globs)
-            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back());
-
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_uri);
-
-        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef());
-        HDFSFSPtr fs = createHDFSFS(builder.get());
-
-        bool truncate_on_insert = context_->getSettingsRef().hdfs_truncate_on_insert;
-        if (!truncate_on_insert && !hdfsExists(fs.get(), path_from_uri.c_str()))
-        {
-            if (context_->getSettingsRef().hdfs_create_new_file_on_insert)
-            {
-                auto pos = uris[0].find_first_of('.', uris[0].find_last_of('/'));
-                size_t index = uris.size();
-                String new_uri;
-                do
-                {
-                    new_uri = uris[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : uris[0].substr(pos));
-                    ++index;
-                }
-                while (!hdfsExists(fs.get(), new_uri.c_str()));
-                uris.push_back(new_uri);
-                current_uri = new_uri;
-            }
-            else
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, "
-                    "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert",
-                    path_from_uri);
-        }
-
-        return std::make_shared<HDFSSink>(current_uri,
-            format_name,
-            metadata_snapshot->getSampleBlock(),
-            context_,
-            chooseCompressionMethod(current_uri, compression_method));
-    }
-}
-
-void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &)
-{
-    const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2);
-    const String url = uris[0].substr(0, begin_of_path);
-
-    HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef());
-    auto fs = createHDFSFS(builder.get());
-
-    for (const auto & uri : uris)
-    {
-        const String path = uri.substr(begin_of_path);
-        int ret = hdfsDelete(fs.get(), path.data(), 0);
-        if (ret)
-            throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError()));
-    }
-}
-
-
-void registerStorageHDFS(StorageFactory & factory)
-{
-    factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args)
-    {
-        ASTs & engine_args = args.engine_args;
-
-        if (engine_args.empty() || engine_args.size() > 3)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Storage HDFS requires 1, 2 or 3 arguments: "
-                            "url, name of used format (taken from file extension by default) and optional compression method.");
-
-        engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext());
-
-        String url = checkAndGetLiteralArgument<String>(engine_args[0], "url");
-
-        String format_name = "auto";
-        if (engine_args.size() > 1)
-        {
-            engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
-            format_name = checkAndGetLiteralArgument<String>(engine_args[1], "format_name");
-        }
-
-        if (format_name == "auto")
-            format_name = FormatFactory::instance().getFormatFromFileName(url, true);
-
-        String compression_method;
-        if (engine_args.size() == 3)
-        {
-            engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext());
-            compression_method = checkAndGetLiteralArgument<String>(engine_args[2], "compression_method");
-        } else compression_method = "auto";
-
-        ASTPtr partition_by;
-        if (args.storage_def->partition_by)
-            partition_by = args.storage_def->partition_by->clone();
-
-        return std::make_shared<StorageHDFS>(
-            url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by);
-    },
-    {
-        .supports_sort_order = true, // for partition by
-        .supports_schema_inference = true,
-        .source_access_type = AccessType::HDFS,
-    });
-}
-
-NamesAndTypesList StorageHDFS::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-Names StorageHDFS::getVirtualColumnNames()
-{
-    return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
-}
-
-SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
-{
-    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
-}
-
-}
-
-#endif
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
deleted file mode 100644
index 7170763c959..00000000000
--- a/src/Storages/HDFS/StorageHDFS.h
+++ /dev/null
@@ -1,179 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_HDFS
-
-#include <Processors/ISource.h>
-#include <Storages/IStorage.h>
-#include <Storages/Cache/SchemaCache.h>
-#include <Storages/prepareReadingFromFormat.h>
-#include <Storages/SelectQueryInfo.h>
-#include <Poco/URI.h>
-
-namespace DB
-{
-
-class IInputFormat;
-
-/**
- * This class represents table engine for external hdfs files.
- * Read method is supported for now.
- */
-class StorageHDFS final : public IStorage, WithContext
-{
-public:
-    struct PathInfo
-    {
-        time_t last_mod_time;
-        size_t size;
-    };
-
-    struct PathWithInfo
-    {
-        PathWithInfo() = default;
-        PathWithInfo(const String & path_, const std::optional<PathInfo> & info_) : path(path_), info(info_) {}
-        String path;
-        std::optional<PathInfo> info;
-    };
-
-    StorageHDFS(
-        const String & uri_,
-        const StorageID & table_id_,
-        const String & format_name_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & comment,
-        ContextPtr context_,
-        const String & compression_method_ = "",
-        bool distributed_processing_ = false,
-        ASTPtr partition_by = nullptr);
-
-    String getName() const override { return "HDFS"; }
-
-    void read(
-        QueryPlan & query_plan,
-        const Names & column_names,
-        const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
-        ContextPtr context,
-        QueryProcessingStage::Enum processed_stage,
-        size_t max_block_size,
-        size_t num_streams) override;
-
-    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override;
-
-    void truncate(
-        const ASTPtr & query,
-        const StorageMetadataPtr & metadata_snapshot,
-        ContextPtr local_context,
-        TableExclusiveLockHolder &) override;
-
-    NamesAndTypesList getVirtuals() const override;
-    static Names getVirtualColumnNames();
-
-    bool supportsPartitionBy() const override { return true; }
-
-    /// Check if the format is column-oriented.
-    /// Is is useful because column oriented formats could effectively skip unknown columns
-    /// So we can create a header of only required columns in read method and ask
-    /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV.
-    bool supportsSubsetOfColumns(const ContextPtr & context_) const;
-
-    bool supportsSubcolumns() const override { return true; }
-
-    static ColumnsDescription getTableStructureFromData(
-        const String & format,
-        const String & uri,
-        const String & compression_method,
-        ContextPtr ctx);
-
-    static SchemaCache & getSchemaCache(const ContextPtr & ctx);
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-protected:
-    friend class HDFSSource;
-    friend class ReadFromHDFS;
-
-private:
-    std::vector<String> uris;
-    String format_name;
-    String compression_method;
-    const bool distributed_processing;
-    ASTPtr partition_by;
-    bool is_path_with_globs;
-    NamesAndTypesList virtual_columns;
-
-    LoggerPtr log = getLogger("StorageHDFS");
-};
-
-class PullingPipelineExecutor;
-
-class HDFSSource : public ISource, WithContext
-{
-public:
-    class DisclosedGlobIterator
-    {
-        public:
-            DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
-            StorageHDFS::PathWithInfo next();
-        private:
-            class Impl;
-            /// shared_ptr to have copy constructor
-            std::shared_ptr<Impl> pimpl;
-    };
-
-    class URISIterator
-    {
-        public:
-            URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
-            StorageHDFS::PathWithInfo next();
-        private:
-            class Impl;
-            /// shared_ptr to have copy constructor
-            std::shared_ptr<Impl> pimpl;
-    };
-
-    using IteratorWrapper = std::function<StorageHDFS::PathWithInfo()>;
-    using StorageHDFSPtr = std::shared_ptr<StorageHDFS>;
-
-    HDFSSource(
-        const ReadFromFormatInfo & info,
-        StorageHDFSPtr storage_,
-        ContextPtr context_,
-        UInt64 max_block_size_,
-        std::shared_ptr<IteratorWrapper> file_iterator_,
-        bool need_only_count_);
-
-    String getName() const override;
-
-    Chunk generate() override;
-
-private:
-    void addNumRowsToCache(const String & path, size_t num_rows);
-    std::optional<size_t> tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info);
-
-    StorageHDFSPtr storage;
-    Block block_for_format;
-    NamesAndTypesList requested_columns;
-    NamesAndTypesList requested_virtual_columns;
-    UInt64 max_block_size;
-    std::shared_ptr<IteratorWrapper> file_iterator;
-    ColumnsDescription columns_description;
-    bool need_only_count;
-    size_t total_rows_in_file = 0;
-
-    std::unique_ptr<ReadBuffer> read_buf;
-    std::shared_ptr<IInputFormat> input_format;
-    std::unique_ptr<QueryPipeline> pipeline;
-    std::unique_ptr<PullingPipelineExecutor> reader;
-    String current_path;
-    std::optional<size_t> current_file_size;
-
-    /// Recreate ReadBuffer and PullingPipelineExecutor for each file.
-    bool initialize();
-};
-}
-
-#endif
diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp
deleted file mode 100644
index fad29436102..00000000000
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "config.h"
-#include "Interpreters/Context_fwd.h"
-
-#if USE_HDFS
-
-#include <Storages/HDFS/StorageHDFSCluster.h>
-
-#include <Core/QueryProcessingStage.h>
-#include <DataTypes/DataTypeString.h>
-#include <Interpreters/getHeaderForProcessingStage.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <QueryPipeline/RemoteQueryExecutor.h>
-
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-
-#include <Processors/Sources/RemoteSource.h>
-#include <Parsers/ASTTablesInSelectQuery.h>
-#include <Parsers/queryToString.h>
-
-#include <Storages/HDFS/HDFSCommon.h>
-#include <Storages/IStorage.h>
-#include <Storages/SelectQueryInfo.h>
-#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
-#include <Storages/VirtualColumnUtils.h>
-
-#include <TableFunctions/TableFunctionHDFSCluster.h>
-#include <memory>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-StorageHDFSCluster::StorageHDFSCluster(
-    ContextPtr context_,
-    const String & cluster_name_,
-    const String & uri_,
-    const StorageID & table_id_,
-    const String & format_name_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & compression_method_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
-    , uri(uri_)
-    , format_name(format_name_)
-    , compression_method(compression_method_)
-{
-    checkHDFSURL(uri_);
-    context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
-
-    StorageInMemoryMetadata storage_metadata;
-
-    if (columns_.empty())
-    {
-        auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-        storage_metadata.setColumns(columns_);
-
-    storage_metadata.setConstraints(constraints_);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
-{
-    ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
-    if (!expression_list)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query));
-
-    TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
-}
-
-
-RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
-{
-    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, virtual_columns, context);
-    auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
-    return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
-}
-
-NamesAndTypesList StorageHDFSCluster::getVirtuals() const
-{
-    return NamesAndTypesList{
-        {"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
-        {"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
-}
-
-}
-
-#endif
diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h
deleted file mode 100644
index 7c4c41a573a..00000000000
--- a/src/Storages/HDFS/StorageHDFSCluster.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_HDFS
-
-#include <memory>
-#include <optional>
-
-#include <Client/Connection.h>
-#include <Interpreters/Cluster.h>
-#include <Storages/IStorageCluster.h>
-#include <Storages/HDFS/StorageHDFS.h>
-
-namespace DB
-{
-
-class Context;
-
-class StorageHDFSCluster : public IStorageCluster
-{
-public:
-    StorageHDFSCluster(
-        ContextPtr context_,
-        const String & cluster_name_,
-        const String & uri_,
-        const StorageID & table_id_,
-        const String & format_name_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & compression_method_,
-        bool structure_argument_was_provided_);
-
-    std::string getName() const override { return "HDFSCluster"; }
-
-    NamesAndTypesList getVirtuals() const override;
-
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
-
-    bool supportsSubcolumns() const override { return true; }
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-private:
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
-
-    String uri;
-    String format_name;
-    String compression_method;
-    NamesAndTypesList virtual_columns;
-};
-
-
-}
-
-#endif
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 4fa6bfdd617..26301472f24 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -98,9 +98,14 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
 public:
     IStorage() = delete;
     /// Storage metadata can be set separately in setInMemoryMetadata method
-    explicit IStorage(StorageID storage_id_)
+    explicit IStorage(StorageID storage_id_, std::unique_ptr<StorageInMemoryMetadata> metadata_ = nullptr)
         : storage_id(std::move(storage_id_))
-        , metadata(std::make_unique<StorageInMemoryMetadata>()) {}
+        {
+            if (metadata_)
+                metadata.set(std::move(metadata_));
+            else
+                metadata.set(std::make_unique<StorageInMemoryMetadata>());
+        }
 
     IStorage(const IStorage &) = delete;
     IStorage & operator=(const IStorage &) = delete;
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureConfiguration.cpp
new file mode 100644
index 00000000000..ba3e796223a
--- /dev/null
+++ b/src/Storages/ObjectStorage/AzureConfiguration.cpp
@@ -0,0 +1,451 @@
+#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <azure/storage/common/storage_credential.hpp>
+#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
+#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
+#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Formats/FormatFactory.h>
+#include <azure/storage/blobs.hpp>
+#include <Interpreters/evaluateConstantExpression.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTFunction.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+const std::unordered_set<std::string_view> required_configuration_keys = {
+    "blob_path",
+    "container",
+};
+
+const std::unordered_set<std::string_view> optional_configuration_keys = {
+    "format",
+    "compression",
+    "structure",
+    "compression_method",
+    "account_name",
+    "account_key",
+    "connection_string",
+    "storage_account_url",
+};
+
+using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
+using AzureClientPtr = std::unique_ptr<Azure::Storage::Blobs::BlobContainerClient>;
+
+namespace
+{
+    bool isConnectionString(const std::string & candidate)
+    {
+        return !candidate.starts_with("http");
+    }
+
+    bool containerExists(std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> & blob_service_client, std::string container_name)
+    {
+        Azure::Storage::Blobs::ListBlobContainersOptions options;
+        options.Prefix = container_name;
+        options.PageSizeHint = 1;
+
+        auto containers_list_response = blob_service_client->ListBlobContainers(options);
+        auto containers_list = containers_list_response.BlobContainers;
+
+        for (const auto & container : containers_list)
+        {
+            if (container_name == container.Name)
+                return true;
+        }
+        return false;
+    }
+}
+
+void StorageAzureBlobConfiguration::check(ContextPtr context) const
+{
+    Poco::URI url_to_check;
+    if (is_connection_string)
+    {
+        auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url);
+        url_to_check = Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl());
+    }
+    else
+        url_to_check = Poco::URI(connection_url);
+
+    context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check);
+}
+
+StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone()
+{
+    auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
+    configuration->connection_url = connection_url;
+    configuration->is_connection_string = is_connection_string;
+    configuration->account_name = account_name;
+    configuration->account_key = account_key;
+    configuration->container = container;
+    configuration->blob_path = blob_path;
+    configuration->blobs_paths = blobs_paths;
+    return configuration;
+}
+
+AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context)
+{
+    const auto & context_settings = context->getSettingsRef();
+    auto settings_ptr = std::make_unique<AzureObjectStorageSettings>();
+    settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size;
+    settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries;
+    settings_ptr->list_object_keys_size = static_cast<int32_t>(context_settings.azure_list_object_keys_size);
+    return settings_ptr;
+}
+
+ObjectStoragePtr StorageAzureBlobConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+{
+    auto client = createClient(is_readonly);
+    auto settings = createSettings(context);
+    return std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings), container);
+}
+
+AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
+{
+    using namespace Azure::Storage::Blobs;
+
+    AzureClientPtr result;
+
+    if (is_connection_string)
+    {
+        auto blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
+        result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(connection_url, container));
+        bool container_exists = containerExists(blob_service_client, container);
+
+        if (!container_exists)
+        {
+            if (is_read_only)
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "AzureBlobStorage container does not exist '{}'",
+                    container);
+
+            try
+            {
+                result->CreateIfNotExists();
+            } catch (const Azure::Storage::StorageException & e)
+            {
+                if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
+                    && e.ReasonPhrase == "The specified container already exists."))
+                {
+                    throw;
+                }
+            }
+        }
+    }
+    else
+    {
+        std::shared_ptr<Azure::Storage::StorageSharedKeyCredential> storage_shared_key_credential;
+        if (account_name.has_value() && account_key.has_value())
+        {
+            storage_shared_key_credential =
+                std::make_shared<Azure::Storage::StorageSharedKeyCredential>(*account_name, *account_key);
+        }
+
+        std::unique_ptr<BlobServiceClient> blob_service_client;
+        if (storage_shared_key_credential)
+        {
+            blob_service_client = std::make_unique<BlobServiceClient>(connection_url, storage_shared_key_credential);
+        }
+        else
+        {
+            blob_service_client = std::make_unique<BlobServiceClient>(connection_url);
+        }
+
+        bool container_exists = containerExists(blob_service_client, container);
+
+        std::string final_url;
+        size_t pos = connection_url.find('?');
+        if (pos != std::string::npos)
+        {
+            auto url_without_sas = connection_url.substr(0, pos);
+            final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + container
+                + connection_url.substr(pos);
+        }
+        else
+            final_url
+                = connection_url + (connection_url.back() == '/' ? "" : "/") + container;
+
+        if (container_exists)
+        {
+            if (storage_shared_key_credential)
+                result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
+            else
+                result = std::make_unique<BlobContainerClient>(final_url);
+        }
+        else
+        {
+            if (is_read_only)
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "AzureBlobStorage container does not exist '{}'",
+                    container);
+            try
+            {
+                result = std::make_unique<BlobContainerClient>(blob_service_client->CreateBlobContainer(container).Value);
+            }
+            catch (const Azure::Storage::StorageException & e)
+            {
+                if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
+                      && e.ReasonPhrase == "The specified container already exists.")
+                {
+                    if (storage_shared_key_credential)
+                        result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
+                    else
+                        result = std::make_unique<BlobContainerClient>(final_url);
+                }
+                else
+                {
+                    throw;
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & collection)
+{
+    validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
+
+    if (collection.has("connection_string"))
+    {
+        connection_url = collection.get<String>("connection_string");
+        is_connection_string = true;
+    }
+
+    if (collection.has("storage_account_url"))
+    {
+        connection_url = collection.get<String>("storage_account_url");
+        is_connection_string = false;
+    }
+
+    container = collection.get<String>("container");
+    blob_path = collection.get<String>("blob_path");
+
+    if (collection.has("account_name"))
+        account_name = collection.get<String>("account_name");
+
+    if (collection.has("account_key"))
+        account_key = collection.get<String>("account_key");
+
+    structure = collection.getOrDefault<String>("structure", "auto");
+    format = collection.getOrDefault<String>("format", format);
+    compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+
+    blobs_paths = {blob_path};
+    if (format == "auto")
+        format = FormatFactory::instance().getFormatFromFileName(blob_path, true);
+}
+
+void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure)
+{
+    if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7))
+    {
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Storage AzureBlobStorage requires 3 to 7 arguments: "
+                        "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, "
+                        "[account_name, account_key, format, compression, structure)])");
+    }
+
+    for (auto & engine_arg : engine_args)
+        engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
+
+    std::unordered_map<std::string_view, size_t> engine_args_to_idx;
+
+    connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
+    is_connection_string = isConnectionString(connection_url);
+
+    container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
+    blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
+
+    auto is_format_arg = [] (const std::string & s) -> bool
+    {
+        return s == "auto" || FormatFactory::instance().getAllFormats().contains(s);
+    };
+
+    if (engine_args.size() == 4)
+    {
+        //'c1 UInt64, c2 UInt64
+        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
+        if (is_format_arg(fourth_arg))
+        {
+            format = fourth_arg;
+        }
+        else
+        {
+            if (with_structure)
+                structure = fourth_arg;
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key");
+        }
+    }
+    else if (engine_args.size() == 5)
+    {
+        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
+        if (is_format_arg(fourth_arg))
+        {
+            format = fourth_arg;
+            compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
+        }
+        else
+        {
+            account_name = fourth_arg;
+            account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
+        }
+    }
+    else if (engine_args.size() == 6)
+    {
+        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
+        if (is_format_arg(fourth_arg))
+        {
+            if (with_structure)
+            {
+                format = fourth_arg;
+                compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
+                structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
+            }
+            else
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
+        }
+        else
+        {
+            account_name = fourth_arg;
+            account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
+            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
+            if (is_format_arg(sixth_arg))
+                format = sixth_arg;
+            else
+            {
+                if (with_structure)
+                    structure = sixth_arg;
+                else
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
+            }
+        }
+    }
+    else if (engine_args.size() == 7)
+    {
+        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
+        if (!with_structure && is_format_arg(fourth_arg))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
+        }
+        else
+        {
+            account_name = fourth_arg;
+            account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
+            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
+            if (!is_format_arg(sixth_arg))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
+            format = sixth_arg;
+            compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
+        }
+    }
+    else if (with_structure && engine_args.size() == 8)
+    {
+        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
+        account_name = fourth_arg;
+        account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
+        auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
+        if (!is_format_arg(sixth_arg))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
+        format = sixth_arg;
+        compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
+        structure = checkAndGetLiteralArgument<String>(engine_args[7], "structure");
+    }
+
+    blobs_paths = {blob_path};
+
+    if (format == "auto")
+        format = FormatFactory::instance().getFormatFromFileName(blob_path, true);
+}
+
+void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
+{
+    if (tryGetNamedCollectionWithOverrides(args, context))
+    {
+        /// In case of named collection, just add key-value pair "structure='...'"
+        /// at the end of arguments to override existed structure.
+        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure_)};
+        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
+        args.push_back(equal_func);
+    }
+    else
+    {
+        if (args.size() < 3 || args.size() > 8)
+        {
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                            "Storage Azure requires 3 to 7 arguments: "
+                            "StorageObjectStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
+        }
+
+        auto structure_literal = std::make_shared<ASTLiteral>(structure_);
+        auto is_format_arg
+            = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
+
+        if (args.size() == 3)
+        {
+            /// Add format=auto & compression=auto before structure argument.
+            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(structure_literal);
+        }
+        else if (args.size() == 4)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
+            if (is_format_arg(fourth_arg))
+            {
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(structure_literal);
+            }
+            else
+            {
+                args.back() = structure_literal;
+            }
+        }
+        else if (args.size() == 5)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
+            if (!is_format_arg(fourth_arg))
+            {
+                /// Add format=auto & compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+            }
+            args.push_back(structure_literal);
+        }
+        else if (args.size() == 6)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
+            if (!is_format_arg(fourth_arg))
+            {
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(structure_literal);
+            }
+            else
+            {
+                args.back() = structure_literal;
+            }
+        }
+        else if (args.size() == 7)
+        {
+            args.push_back(structure_literal);
+        }
+        else if (args.size() == 8)
+        {
+            args.back() = structure_literal;
+        }
+    }
+}
+
+}
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureConfiguration.h
new file mode 100644
index 00000000000..40d718d7690
--- /dev/null
+++ b/src/Storages/ObjectStorage/AzureConfiguration.h
@@ -0,0 +1,54 @@
+#pragma once
+#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
+#include <Storages/ObjectStorage/Configuration.h>
+
+namespace DB
+{
+class BackupFactory;
+
+class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration
+{
+    friend class BackupReaderAzureBlobStorage;
+    friend class BackupWriterAzureBlobStorage;
+    friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory);
+
+public:
+    StorageAzureBlobConfiguration() = default;
+    StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other);
+
+    Path getPath() const override { return blob_path; }
+    void setPath(const Path & path) override { blob_path = path; }
+
+    const Paths & getPaths() const override { return blobs_paths; }
+    Paths & getPaths() override { return blobs_paths; }
+
+    String getDataSourceDescription() override { return fs::path(connection_url) / container; }
+    String getNamespace() const override { return container; }
+
+    void check(ContextPtr context) const override;
+    StorageObjectStorageConfigurationPtr clone() override;
+    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+
+    void fromNamedCollection(const NamedCollection & collection) override;
+    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+    static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context);
+
+protected:
+    using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
+    using AzureClientPtr = std::unique_ptr<Azure::Storage::Blobs::BlobContainerClient>;
+
+    std::string connection_url;
+    bool is_connection_string;
+
+    std::optional<std::string> account_name;
+    std::optional<std::string> account_key;
+
+    std::string container;
+    std::string blob_path;
+    std::vector<String> blobs_paths;
+
+    AzureClientPtr createClient(bool is_read_only);
+    AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
+};
+
+}
diff --git a/src/Storages/ObjectStorage/Configuration.h b/src/Storages/ObjectStorage/Configuration.h
new file mode 100644
index 00000000000..708041980e3
--- /dev/null
+++ b/src/Storages/ObjectStorage/Configuration.h
@@ -0,0 +1,55 @@
+#pragma once
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/NamedCollectionsHelpers.h>
+
+namespace DB
+{
+
+class StorageObjectStorageConfiguration;
+using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
+
+class StorageObjectStorageConfiguration
+{
+public:
+    StorageObjectStorageConfiguration() = default;
+    virtual ~StorageObjectStorageConfiguration() = default;
+
+    using Path = std::string;
+    using Paths = std::vector<Path>;
+
+    virtual Path getPath() const = 0;
+    virtual void setPath(const Path & path) = 0;
+
+    virtual const Paths & getPaths() const = 0;
+    virtual Paths & getPaths() = 0;
+
+    virtual String getDataSourceDescription() = 0;
+    virtual String getNamespace() const = 0;
+
+    bool isPathWithGlobs() const { return getPath().find_first_of("*?{") != std::string::npos; }
+    bool isNamespaceWithGlobs() const { return getNamespace().find_first_of("*?{") != std::string::npos; }
+
+    std::string getPathWithoutGlob() const { return getPath().substr(0, getPath().find_first_of("*?{")); }
+
+    virtual bool withWildcard() const
+    {
+        static const String PARTITION_ID_WILDCARD = "{_partition_id}";
+        return getPath().find(PARTITION_ID_WILDCARD) != String::npos;
+    }
+
+    virtual void check(ContextPtr context) const = 0;
+    virtual StorageObjectStorageConfigurationPtr clone() = 0;
+
+    virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
+
+    virtual void fromNamedCollection(const NamedCollection & collection) = 0;
+    virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
+
+    String format = "auto";
+    String compression_method = "auto";
+    String structure = "auto";
+};
+
+using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
+
+}
diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h
new file mode 100644
index 00000000000..f42cedf459d
--- /dev/null
+++ b/src/Storages/ObjectStorage/HDFSConfiguration.h
@@ -0,0 +1,81 @@
+#pragma once
+#include "config.h"
+
+#if USE_HDFS
+
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/HDFS/HDFSCommon.h>
+#include <Interpreters/Context.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Formats/FormatFactory.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/IAST.h>
+#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
+{
+public:
+    Path getPath() const override { return path; }
+    void setPath(const Path & path_) override { path = path_; }
+
+    const Paths & getPaths() const override { return paths; }
+    Paths & getPaths() override { return paths; }
+
+    String getNamespace() const override { return ""; }
+    String getDataSourceDescription() override { return url; }
+
+    void check(ContextPtr context) const override
+    {
+        context->getRemoteHostFilter().checkURL(Poco::URI(url));
+        checkHDFSURL(url);
+    }
+    StorageObjectStorageConfigurationPtr clone() override
+    {
+        auto configuration = std::make_shared<StorageHDFSConfiguration>();
+        return configuration;
+    }
+
+    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override /// NOLINT
+    {
+        UNUSED(is_readonly);
+        auto settings = std::make_unique<HDFSObjectStorageSettings>();
+        return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
+    }
+
+    void fromNamedCollection(const NamedCollection &) override {}
+    void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override
+    {
+        url = checkAndGetLiteralArgument<String>(args[0], "url");
+
+        String format_name = "auto";
+        if (args.size() > 1)
+            format_name = checkAndGetLiteralArgument<String>(args[1], "format_name");
+
+        if (format_name == "auto")
+            format_name = FormatFactory::instance().getFormatFromFileName(url, true);
+
+        String compression_method;
+        if (args.size() == 3)
+        {
+            compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
+        } else compression_method = "auto";
+
+    }
+    static void addStructureToArgs(ASTs &, const String &, ContextPtr) {}
+
+private:
+    String url;
+    String path;
+    std::vector<String> paths;
+};
+
+}
+
+#endif
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
new file mode 100644
index 00000000000..248700e2edf
--- /dev/null
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -0,0 +1,197 @@
+#pragma once
+#include <Interpreters/Context_fwd.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/Settings.h>
+#include <IO/ReadBufferFromFileBase.h>
+#include <Formats/ReadSchemaUtils.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+
+}
+
+template <typename StorageSettings>
+class ReadBufferIterator : public IReadBufferIterator, WithContext
+{
+public:
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using Source = StorageObjectStorageSource<StorageSettings>;
+    using FileIterator = std::shared_ptr<typename Source::IIterator>;
+    using ObjectInfos = typename Storage::ObjectInfos;
+
+    ReadBufferIterator(
+        ObjectStoragePtr object_storage_,
+        Storage::ConfigurationPtr configuration_,
+        const FileIterator & file_iterator_,
+        const std::optional<FormatSettings> & format_settings_,
+        ObjectInfos & read_keys_,
+        const ContextPtr & context_)
+        : WithContext(context_)
+        , object_storage(object_storage_)
+        , configuration(configuration_)
+        , file_iterator(file_iterator_)
+        , format_settings(format_settings_)
+        , storage_settings(StorageSettings::create(context_->getSettingsRef()))
+        , read_keys(read_keys_)
+        , prev_read_keys_size(read_keys_.size())
+    {
+    }
+
+    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
+    {
+        /// For default mode check cached columns for currently read keys on first iteration.
+        if (first && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
+        {
+            if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                return {nullptr, cached_columns};
+        }
+
+        current_object_info = file_iterator->next(0);
+        if (current_object_info->relative_path.empty())
+        {
+            if (first)
+            {
+                throw Exception(
+                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                    "Cannot extract table structure from {} format file, "
+                    "because there are no files with provided path. "
+                    "You must specify table structure manually",
+                    configuration->format);
+            }
+            return {nullptr, std::nullopt};
+        }
+
+        first = false;
+
+        /// File iterator could get new keys after new iteration,
+        /// check them in schema cache if schema inference mode is default.
+        if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT
+            && read_keys.size() > prev_read_keys_size)
+        {
+            auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+            prev_read_keys_size = read_keys.size();
+            if (columns_from_cache)
+                return {nullptr, columns_from_cache};
+        }
+        else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+        {
+            ObjectInfos paths = {current_object_info};
+            if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
+                return {nullptr, columns_from_cache};
+        }
+
+        first = false;
+
+        std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
+            StoredObject(current_object_info->relative_path),
+            getContext()->getReadSettings(),
+            {},
+            current_object_info->metadata.size_bytes);
+
+        read_buffer = wrapReadBufferWithCompressionMethod(
+            std::move(read_buffer),
+            chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
+            static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
+
+        return {std::move(read_buffer), std::nullopt};
+    }
+
+    void setNumRowsToLastFile(size_t num_rows) override
+    {
+        if (storage_settings.schema_inference_use_cache)
+        {
+            Storage::getSchemaCache(getContext()).addNumRows(
+                getKeyForSchemaCache(current_object_info->relative_path), num_rows);
+        }
+    }
+
+    void setSchemaToLastFile(const ColumnsDescription & columns) override
+    {
+        if (storage_settings.schema_inference_use_cache
+            && storage_settings.schema_inference_mode == SchemaInferenceMode::UNION)
+        {
+            Storage::getSchemaCache(getContext()).addColumns(
+                getKeyForSchemaCache(current_object_info->relative_path), columns);
+        }
+    }
+
+    void setResultingSchema(const ColumnsDescription & columns) override
+    {
+        if (storage_settings.schema_inference_use_cache
+            && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
+        {
+            Storage::getSchemaCache(getContext()).addManyColumns(getPathsForSchemaCache(), columns);
+        }
+    }
+
+    String getLastFileName() const override { return current_object_info->relative_path; }
+
+private:
+    SchemaCache::Key getKeyForSchemaCache(const String & path) const
+    {
+        auto source = fs::path(configuration->getDataSourceDescription()) / path;
+        return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
+    }
+
+    SchemaCache::Keys getPathsForSchemaCache() const
+    {
+        Strings sources;
+        sources.reserve(read_keys.size());
+        std::transform(
+            read_keys.begin(), read_keys.end(),
+            std::back_inserter(sources),
+            [&](const auto & elem)
+            {
+                return fs::path(configuration->getDataSourceDescription()) / elem->relative_path;
+            });
+        return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext());
+    }
+
+    std::optional<ColumnsDescription> tryGetColumnsFromCache(
+        const ObjectInfos::iterator & begin,
+        const ObjectInfos::iterator & end)
+    {
+        if (!storage_settings.schema_inference_use_cache)
+            return std::nullopt;
+
+        auto & schema_cache = Storage::getSchemaCache(getContext());
+        for (auto it = begin; it < end; ++it)
+        {
+            const auto & object_info = (*it);
+            auto get_last_mod_time = [&] -> std::optional<time_t>
+            {
+                if (object_info->metadata.last_modified)
+                    return object_info->metadata.last_modified->epochMicroseconds();
+                else
+                {
+                    object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+                    return object_info->metadata.last_modified->epochMicroseconds();
+                }
+            };
+
+            auto cache_key = getKeyForSchemaCache(object_info->relative_path);
+            auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+            if (columns)
+                return columns;
+        }
+
+        return std::nullopt;
+    }
+
+    ObjectStoragePtr object_storage;
+    const Storage::ConfigurationPtr configuration;
+    const FileIterator file_iterator;
+    const std::optional<FormatSettings> & format_settings;
+    const StorageObjectStorageSettings storage_settings;
+    ObjectInfos & read_keys;
+
+    size_t prev_read_keys_size;
+    Storage::ObjectInfoPtr current_object_info;
+    bool first = true;
+};
+}
diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorage.h
new file mode 100644
index 00000000000..9cb77dcc25e
--- /dev/null
+++ b/src/Storages/ObjectStorage/ReadFromObjectStorage.h
@@ -0,0 +1,105 @@
+#pragma once
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Processors/Sources/NullSource.h>
+
+namespace DB
+{
+
+template <typename StorageSettings>
+class ReadFromStorageObejctStorage : public SourceStepWithFilter
+{
+public:
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using Source = StorageObjectStorageSource<StorageSettings>;
+
+    ReadFromStorageObejctStorage(
+        ObjectStoragePtr object_storage_,
+        Storage::ConfigurationPtr configuration_,
+        const String & name_,
+        const NamesAndTypesList & virtual_columns_,
+        const std::optional<DB::FormatSettings> & format_settings_,
+        bool distributed_processing_,
+        ReadFromFormatInfo info_,
+        const bool need_only_count_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = info_.source_header})
+        , object_storage(object_storage_)
+        , configuration(configuration_)
+        , context(std::move(context_))
+        , info(std::move(info_))
+        , virtual_columns(virtual_columns_)
+        , format_settings(format_settings_)
+        , name(name_ + "Source")
+        , need_only_count(need_only_count_)
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+        , distributed_processing(distributed_processing_)
+    {
+    }
+
+    std::string getName() const override { return name; }
+
+    void applyFilters() override
+    {
+        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
+        const ActionsDAG::Node * predicate = nullptr;
+        if (filter_actions_dag)
+            predicate = filter_actions_dag->getOutputs().at(0);
+
+        createIterator(predicate);
+    }
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
+    {
+        createIterator(nullptr);
+
+        Pipes pipes;
+        for (size_t i = 0; i < num_streams; ++i)
+        {
+            pipes.emplace_back(std::make_shared<Source>(
+                getName(), object_storage, configuration, info, format_settings,
+                context, max_block_size, iterator_wrapper, need_only_count));
+        }
+
+        auto pipe = Pipe::unitePipes(std::move(pipes));
+        if (pipe.empty())
+            pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+        for (const auto & processor : pipe.getProcessors())
+            processors.emplace_back(processor);
+
+        pipeline.init(std::move(pipe));
+    }
+
+private:
+    ObjectStoragePtr object_storage;
+    Storage::ConfigurationPtr configuration;
+    ContextPtr context;
+
+    const ReadFromFormatInfo info;
+    const NamesAndTypesList virtual_columns;
+    const std::optional<DB::FormatSettings> format_settings;
+    const String name;
+    const bool need_only_count;
+    const size_t max_block_size;
+    const size_t num_streams;
+    const bool distributed_processing;
+
+    std::shared_ptr<typename Source::IIterator> iterator_wrapper;
+
+    void createIterator(const ActionsDAG::Node * predicate)
+    {
+        if (!iterator_wrapper)
+        {
+            iterator_wrapper = Source::createFileIterator(
+                configuration, object_storage, distributed_processing, context,
+                predicate, virtual_columns, nullptr, context->getFileProgressCallback());
+        }
+    }
+};
+
+}
diff --git a/src/Storages/ObjectStorage/S3Configuration.cpp b/src/Storages/ObjectStorage/S3Configuration.cpp
new file mode 100644
index 00000000000..5a5412019f5
--- /dev/null
+++ b/src/Storages/ObjectStorage/S3Configuration.cpp
@@ -0,0 +1,491 @@
+#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/StorageURL.h>
+#include <Formats/FormatFactory.h>
+#include <boost/algorithm/string.hpp>
+#include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+static const std::unordered_set<std::string_view> required_configuration_keys = {
+    "url",
+};
+
+static const std::unordered_set<std::string_view> optional_configuration_keys = {
+    "format",
+    "compression",
+    "compression_method",
+    "structure",
+    "access_key_id",
+    "secret_access_key",
+    "session_token",
+    "filename",
+    "use_environment_credentials",
+    "max_single_read_retries",
+    "min_upload_part_size",
+    "upload_part_size_multiply_factor",
+    "upload_part_size_multiply_parts_count_threshold",
+    "max_single_part_upload_size",
+    "max_connections",
+    "expiration_window_seconds",
+    "no_sign_request"
+};
+
+String StorageS3Configuration::getDataSourceDescription()
+{
+    return fs::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket;
+}
+
+void StorageS3Configuration::check(ContextPtr context) const
+{
+    context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri);
+    context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast);
+}
+
+StorageObjectStorageConfigurationPtr StorageS3Configuration::clone()
+{
+    auto configuration = std::make_shared<StorageS3Configuration>();
+    configuration->url = url;
+    configuration->auth_settings = auth_settings;
+    configuration->request_settings = request_settings;
+    configuration->static_configuration = static_configuration;
+    configuration->headers_from_ast = headers_from_ast;
+    configuration->keys = keys;
+    configuration->initialized = initialized;
+    return configuration;
+}
+
+ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
+{
+    auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString());
+    request_settings = s3_settings.request_settings;
+    request_settings.updateFromSettings(context->getSettings());
+
+    if (!initialized || (!static_configuration && auth_settings.hasUpdates(s3_settings.auth_settings)))
+    {
+        auth_settings.updateFrom(s3_settings.auth_settings);
+        keys[0] = url.key;
+        initialized = true;
+    }
+
+    const auto & config = context->getConfigRef();
+    auto s3_capabilities = S3Capabilities
+    {
+        .support_batch_delete = config.getBool("s3.support_batch_delete", true),
+        .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")),
+    };
+
+    auto s3_storage_settings = std::make_unique<S3ObjectStorageSettings>(
+        request_settings,
+        config.getUInt64("s3.min_bytes_for_seek", 1024 * 1024),
+        config.getInt("s3.list_object_keys_size", 1000),
+        config.getInt("s3.objects_chunk_size_to_delete", 1000),
+        config.getBool("s3.readonly", false));
+
+    auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key);
+    auto client = createClient(context);
+    std::string disk_name = "StorageS3";
+
+    return std::make_shared<S3ObjectStorage>(
+        std::move(client), std::move(s3_storage_settings), url, s3_capabilities, key_generator, /*disk_name*/disk_name);
+}
+
+std::unique_ptr<S3::Client> StorageS3Configuration::createClient(ContextPtr context)
+{
+    const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
+    const Settings & local_settings = context->getSettingsRef();
+
+    auto client_configuration = S3::ClientFactory::instance().createClientConfiguration(
+        auth_settings.region,
+        context->getRemoteHostFilter(),
+        static_cast<unsigned>(global_settings.s3_max_redirects),
+        static_cast<unsigned>(global_settings.s3_retry_attempts),
+        global_settings.enable_s3_requests_logging,
+        /* for_disk_s3 = */ false,
+        request_settings.get_request_throttler,
+        request_settings.put_request_throttler,
+        url.uri.getScheme());
+
+    client_configuration.endpointOverride = url.endpoint;
+    client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
+    client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size;
+
+    auto headers = auth_settings.headers;
+    if (!headers_from_ast.empty())
+        headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end());
+
+    client_configuration.requestTimeoutMs = request_settings.request_timeout_ms;
+
+    S3::ClientSettings client_settings{
+        .use_virtual_addressing = url.is_virtual_hosted_style,
+        .disable_checksum = local_settings.s3_disable_checksum,
+        .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
+    };
+
+    auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id,
+                                                 auth_settings.secret_access_key,
+                                                 auth_settings.session_token);
+
+    auto credentials_configuration = S3::CredentialsConfiguration
+    {
+        auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
+        auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
+        auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
+        auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
+    };
+
+    return S3::ClientFactory::instance().create(
+        client_configuration,
+        client_settings,
+        credentials.GetAWSAccessKeyId(),
+        credentials.GetAWSSecretKey(),
+        auth_settings.server_side_encryption_customer_key_base64,
+        auth_settings.server_side_encryption_kms_config,
+        std::move(headers),
+        credentials_configuration);
+}
+
+void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection)
+{
+    validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
+
+    auto filename = collection.getOrDefault<String>("filename", "");
+    if (!filename.empty())
+        url = S3::URI(std::filesystem::path(collection.get<String>("url")) / filename);
+    else
+        url = S3::URI(collection.get<String>("url"));
+
+    auth_settings.access_key_id = collection.getOrDefault<String>("access_key_id", "");
+    auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
+    auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 1);
+    auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
+    auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
+
+    format = collection.getOrDefault<String>("format", format);
+    compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+    structure = collection.getOrDefault<String>("structure", "auto");
+
+    request_settings = S3Settings::RequestSettings(collection);
+
+    static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value();
+
+    keys = {url.key};
+
+    //if (format == "auto" && get_format_from_file)
+    if (format == "auto")
+        format = FormatFactory::instance().getFormatFromFileName(url.key, true);
+}
+
+void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
+{
+    /// Supported signatures: S3('url') S3('url', 'format') S3('url', 'format', 'compression') S3('url', NOSIGN) S3('url', NOSIGN, 'format') S3('url', NOSIGN, 'format', 'compression') S3('url', 'aws_access_key_id', 'aws_secret_access_key') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format') S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
+    /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression')
+    /// with optional headers() function
+
+    size_t count = StorageURL::evalArgsAndCollectHeaders(args, headers_from_ast, context);
+
+    if (count == 0 || count > (with_structure ? 7 : 6))
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Storage S3 requires 1 to 5 arguments: "
+                        "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
+
+    std::unordered_map<std::string_view, size_t> engine_args_to_idx;
+    bool no_sign_request = false;
+
+    /// For 2 arguments we support 2 possible variants:
+    /// - s3(source, format)
+    /// - s3(source, NOSIGN)
+    /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
+    if (count == 2)
+    {
+        auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+        if (boost::iequals(second_arg, "NOSIGN"))
+            no_sign_request = true;
+        else
+            engine_args_to_idx = {{"format", 1}};
+    }
+    /// For 3 arguments we support 2 possible variants:
+    /// - s3(source, format, compression_method)
+    /// - s3(source, access_key_id, secret_access_key)
+    /// - s3(source, NOSIGN, format)
+    /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name.
+    else if (count == 3)
+    {
+        auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/access_key_id/NOSIGN");
+        if (boost::iequals(second_arg, "NOSIGN"))
+        {
+            no_sign_request = true;
+            engine_args_to_idx = {{"format", 2}};
+        }
+        else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
+        {
+            if (with_structure)
+                engine_args_to_idx = {{"format", 1}, {"structure", 2}};
+            else
+                engine_args_to_idx = {{"format", 1}, {"compression_method", 2}};
+        }
+        else
+            engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
+    }
+    /// For 4 arguments we support 3 possible variants:
+    /// if with_structure == 0:
+    /// - s3(source, access_key_id, secret_access_key, session_token)
+    /// - s3(source, access_key_id, secret_access_key, format)
+    /// - s3(source, NOSIGN, format, compression_method)
+    /// if with_structure == 1:
+    /// - s3(source, format, structure, compression_method),
+    /// - s3(source, access_key_id, secret_access_key, format),
+    /// - s3(source, access_key_id, secret_access_key, session_token)
+    /// - s3(source, NOSIGN, format, structure)
+    /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not.
+    else if (count == 4)
+    {
+        auto second_arg = checkAndGetLiteralArgument<String>(args[1], "access_key_id/NOSIGN");
+        if (boost::iequals(second_arg, "NOSIGN"))
+        {
+            no_sign_request = true;
+            if (with_structure)
+                engine_args_to_idx = {{"format", 2}, {"structure", 3}};
+            else
+                engine_args_to_idx = {{"format", 2}, {"compression_method", 3}};
+        }
+        else if (with_structure && (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)))
+        {
+            engine_args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}};
+        }
+        else
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "session_token/format");
+            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
+            }
+            else
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
+            }
+        }
+    }
+    /// For 5 arguments we support 2 possible variants:
+    /// if with_structure == 0:
+    /// - s3(source, access_key_id, secret_access_key, session_token, format)
+    /// - s3(source, access_key_id, secret_access_key, format, compression)
+    /// if with_structure == 1:
+    /// - s3(source, access_key_id, secret_access_key, format, structure)
+    /// - s3(source, access_key_id, secret_access_key, session_token, format)
+    /// - s3(source, NOSIGN, format, structure, compression_method)
+    else if (count == 5)
+    {
+        if (with_structure)
+        {
+            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "NOSIGN/access_key_id");
+            if (boost::iequals(second_arg, "NOSIGN"))
+            {
+                no_sign_request = true;
+                engine_args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}};
+            }
+            else
+            {
+                auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
+                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+                {
+                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}};
+                }
+                else
+                {
+                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
+                }
+            }
+        }
+        else
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "session_token/format");
+            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression_method", 4}};
+            }
+            else
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
+            }
+        }
+    }
+    else if (count == 6)
+    {
+        if (with_structure)
+        {
+            /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method)
+            /// - s3(source, access_key_id, secret_access_key, session_token, format, structure)
+            /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
+            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}};
+            }
+            else
+            {
+                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}};
+            }
+        }
+        else
+        {
+            engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}};
+        }
+    }
+    else if (with_structure && count == 7)
+    {
+        engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}};
+    }
+
+    /// This argument is always the first
+    url = S3::URI(checkAndGetLiteralArgument<String>(args[0], "url"));
+
+    if (engine_args_to_idx.contains("format"))
+    {
+        format = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["format"]], "format");
+        /// Set format to configuration only of it's not 'auto',
+        /// because we can have default format set in configuration.
+        if (format != "auto")
+            format = format;
+    }
+
+    if (engine_args_to_idx.contains("structure"))
+        structure = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["structure"]], "structure");
+
+    if (engine_args_to_idx.contains("compression_method"))
+        compression_method = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["compression_method"]], "compression_method");
+
+    if (engine_args_to_idx.contains("access_key_id"))
+        auth_settings.access_key_id = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["access_key_id"]], "access_key_id");
+
+    if (engine_args_to_idx.contains("secret_access_key"))
+        auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
+
+    if (engine_args_to_idx.contains("session_token"))
+        auth_settings.session_token = checkAndGetLiteralArgument<String>(args[engine_args_to_idx["session_token"]], "session_token");
+
+    if (no_sign_request)
+        auth_settings.no_sign_request = no_sign_request;
+
+    static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value();
+    auth_settings.no_sign_request = no_sign_request;
+
+    keys = {url.key};
+
+    // if (format == "auto" && get_format_from_file)
+    if (format == "auto")
+        format = FormatFactory::instance().getFormatFromFileName(url.key, true);
+}
+
+void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
+{
+    if (tryGetNamedCollectionWithOverrides(args, context))
+    {
+        /// In case of named collection, just add key-value pair "structure='...'"
+        /// at the end of arguments to override existed structure.
+        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure_)};
+        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
+        args.push_back(equal_func);
+    }
+    else
+    {
+        HTTPHeaderEntries tmp_headers;
+        size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context);
+
+        if (count == 0 || count > 6)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count);
+
+        auto structure_literal = std::make_shared<ASTLiteral>(structure_);
+
+        /// s3(s3_url)
+        if (count == 1)
+        {
+            /// Add format=auto before structure argument.
+            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(structure_literal);
+        }
+        /// s3(s3_url, format) or s3(s3_url, NOSIGN)
+        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
+        else if (count == 2)
+        {
+            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            /// If there is NOSIGN, add format=auto before structure.
+            if (boost::iequals(second_arg, "NOSIGN"))
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(structure_literal);
+        }
+        /// s3(source, format, structure) or
+        /// s3(source, access_key_id, secret_access_key) or
+        /// s3(source, NOSIGN, format)
+        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
+        else if (count == 3)
+        {
+            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            if (boost::iequals(second_arg, "NOSIGN"))
+            {
+                args.push_back(structure_literal);
+            }
+            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
+            {
+                args[count - 1] = structure_literal;
+            }
+            else
+            {
+                /// Add format=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(structure_literal);
+            }
+        }
+        /// s3(source, format, structure, compression_method) or
+        /// s3(source, access_key_id, secret_access_key, format) or
+        /// s3(source, NOSIGN, format, structure)
+        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
+        else if (count == 4)
+        {
+            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            if (boost::iequals(second_arg, "NOSIGN"))
+            {
+                args[count - 1] = structure_literal;
+            }
+            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
+            {
+                args[count - 2] = structure_literal;
+            }
+            else
+            {
+                args.push_back(structure_literal);
+            }
+        }
+        /// s3(source, access_key_id, secret_access_key, format, structure) or
+        /// s3(source, NOSIGN, format, structure, compression_method)
+        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not.
+        else if (count == 5)
+        {
+            auto sedond_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
+            if (boost::iequals(sedond_arg, "NOSIGN"))
+            {
+                args[count - 2] = structure_literal;
+            }
+            else
+            {
+                args[count - 1] = structure_literal;
+            }
+        }
+        /// s3(source, access_key_id, secret_access_key, format, structure, compression)
+        else if (count == 6)
+        {
+            args[count - 2] = structure_literal;
+        }
+    }
+}
+
+}
diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3Configuration.h
new file mode 100644
index 00000000000..34f5735e02a
--- /dev/null
+++ b/src/Storages/ObjectStorage/S3Configuration.h
@@ -0,0 +1,46 @@
+#pragma once
+#include <IO/S3/getObjectInfo.h>
+#include <Storages/StorageS3Settings.h>
+#include <Storages/ObjectStorage/Configuration.h>
+
+namespace DB
+{
+
+class StorageS3Configuration : public StorageObjectStorageConfiguration
+{
+public:
+    Path getPath() const override { return url.key; }
+    void setPath(const Path & path) override { url.key = path; }
+
+    const Paths & getPaths() const override { return keys; }
+    Paths & getPaths() override { return keys; }
+
+    String getNamespace() const override { return url.bucket; }
+    String getDataSourceDescription() override;
+
+    void check(ContextPtr context) const override;
+    StorageObjectStorageConfigurationPtr clone() override;
+
+    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+
+    void fromNamedCollection(const NamedCollection & collection) override;
+    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+    static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context);
+
+private:
+    S3::URI url;
+    S3::AuthSettings auth_settings;
+    S3Settings::RequestSettings request_settings;
+    /// If s3 configuration was passed from ast, then it is static.
+    /// If from config - it can be changed with config reload.
+    bool static_configuration = true;
+    /// Headers from ast is a part of static configuration.
+    HTTPHeaderEntries headers_from_ast;
+    std::vector<String> keys;
+
+    std::unique_ptr<S3::Client> createClient(ContextPtr context);
+
+    bool initialized = false;
+};
+
+}
diff --git a/src/Storages/ObjectStorage/Settings.h b/src/Storages/ObjectStorage/Settings.h
new file mode 100644
index 00000000000..015cf9bc01d
--- /dev/null
+++ b/src/Storages/ObjectStorage/Settings.h
@@ -0,0 +1,86 @@
+#pragma once
+#include <Interpreters/Context_fwd.h>
+#include <Core/Settings.h>
+#include <Common/CurrentMetrics.h>
+
+namespace CurrentMetrics
+{
+    extern const Metric ObjectStorageAzureThreads;
+    extern const Metric ObjectStorageAzureThreadsActive;
+    extern const Metric ObjectStorageAzureThreadsScheduled;
+
+    extern const Metric ObjectStorageS3Threads;
+    extern const Metric ObjectStorageS3ThreadsActive;
+    extern const Metric ObjectStorageS3ThreadsScheduled;
+}
+
+namespace DB
+{
+
+struct StorageObjectStorageSettings
+{
+    bool truncate_on_insert;
+    bool create_new_file_on_insert;
+    bool schema_inference_use_cache;
+    SchemaInferenceMode schema_inference_mode;
+};
+
+struct S3StorageSettings
+{
+    static StorageObjectStorageSettings create(const Settings & settings)
+    {
+        return StorageObjectStorageSettings{
+            .truncate_on_insert = settings.s3_truncate_on_insert,
+            .create_new_file_on_insert = settings.s3_create_new_file_on_insert,
+            .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
+            .schema_inference_mode = settings.schema_inference_mode,
+        };
+    }
+
+    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_s3";
+
+    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
+};
+
+struct AzureStorageSettings
+{
+    static StorageObjectStorageSettings create(const Settings & settings)
+    {
+        return StorageObjectStorageSettings{
+            .truncate_on_insert = settings.azure_truncate_on_insert,
+            .create_new_file_on_insert = settings.azure_create_new_file_on_insert,
+            .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
+            .schema_inference_mode = settings.schema_inference_mode,
+        };
+    }
+
+    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_azure";
+
+    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageAzureThreads; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageAzureThreadsActive; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageAzureThreadsScheduled; } /// NOLINT
+};
+
+struct HDFSStorageSettings
+{
+    static StorageObjectStorageSettings create(const Settings & settings)
+    {
+        return StorageObjectStorageSettings{
+            .truncate_on_insert = settings.hdfs_truncate_on_insert,
+            .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
+            .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
+            .schema_inference_mode = settings.schema_inference_mode,
+        };
+    }
+
+    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_hdfs";
+
+    /// TODO: s3 -> hdfs
+    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
+    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
+};
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
new file mode 100644
index 00000000000..9250ab8ecbe
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -0,0 +1,303 @@
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+
+#include <Formats/FormatFactory.h>
+#include <Parsers/ASTInsertQuery.h>
+#include <Processors/Formats/IOutputFormat.h>
+#include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Transforms/ExtractColumnsTransform.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Storages/StorageFactory.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/Settings.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSink.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/ReadBufferIterator.h>
+#include <Storages/ObjectStorage/ReadFromObjectStorage.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int DATABASE_ACCESS_DENIED;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+
+}
+
+template <typename StorageSettings>
+std::unique_ptr<StorageInMemoryMetadata> getStorageMetadata(
+    ObjectStoragePtr object_storage,
+    const StorageObjectStorageConfigurationPtr & configuration,
+    const ColumnsDescription & columns,
+    const ConstraintsDescription & constraints,
+    std::optional<FormatSettings> format_settings,
+    const String & comment,
+    const std::string & engine_name,
+    const ContextPtr & context)
+{
+    auto storage_metadata = std::make_unique<StorageInMemoryMetadata>();
+    if (columns.empty())
+    {
+        auto fetched_columns = StorageObjectStorage<StorageSettings>::getTableStructureFromData(
+            object_storage, configuration, format_settings, context);
+        storage_metadata->setColumns(fetched_columns);
+    }
+    else
+    {
+        /// We don't allow special columns.
+        if (!columns.hasOnlyOrdinary())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                            "Table engine {} doesn't support special columns "
+                            "like MATERIALIZED, ALIAS or EPHEMERAL",
+                            engine_name);
+
+        storage_metadata->setColumns(columns);
+    }
+
+    storage_metadata->setConstraints(constraints);
+    storage_metadata->setComment(comment);
+    return storage_metadata;
+}
+
+template <typename StorageSettings>
+StorageObjectStorage<StorageSettings>::StorageObjectStorage(
+    ConfigurationPtr configuration_,
+    ObjectStoragePtr object_storage_,
+    const String & engine_name_,
+    ContextPtr context,
+    const StorageID & table_id_,
+    const ColumnsDescription & columns_,
+    const ConstraintsDescription & constraints_,
+    const String & comment,
+    std::optional<FormatSettings> format_settings_,
+    bool distributed_processing_,
+    ASTPtr partition_by_)
+    : IStorage(table_id_, getStorageMetadata<StorageSettings>(
+                   object_storage_, configuration_, columns_, constraints_, format_settings_,
+                   comment, engine_name, context))
+    , engine_name(engine_name_)
+    , virtual_columns(VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(
+                          getInMemoryMetadataPtr()->getSampleBlock().getNamesAndTypesList()))
+    , format_settings(format_settings_)
+    , partition_by(partition_by_)
+    , distributed_processing(distributed_processing_)
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+{
+    FormatFactory::instance().checkFormatName(configuration->format);
+    configuration->check(context);
+
+    StoredObjects objects;
+    for (const auto & key : configuration->getPaths())
+        objects.emplace_back(key);
+}
+
+template <typename StorageSettings>
+Names StorageObjectStorage<StorageSettings>::getVirtualColumnNames()
+{
+    return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
+}
+
+template <typename StorageSettings>
+bool StorageObjectStorage<StorageSettings>::supportsSubsetOfColumns(const ContextPtr & context) const
+{
+    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings);
+}
+
+template <typename StorageSettings>
+bool StorageObjectStorage<StorageSettings>::prefersLargeBlocks() const
+{
+    return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format);
+}
+
+template <typename StorageSettings>
+bool StorageObjectStorage<StorageSettings>::parallelizeOutputAfterReading(ContextPtr context) const
+{
+    return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context);
+}
+
+template <typename StorageSettings>
+std::pair<StorageObjectStorageConfigurationPtr, ObjectStoragePtr>
+StorageObjectStorage<StorageSettings>::updateConfigurationAndGetCopy(ContextPtr local_context)
+{
+    std::lock_guard lock(configuration_update_mutex);
+    auto new_object_storage = configuration->createOrUpdateObjectStorage(local_context);
+    if (new_object_storage)
+        object_storage = new_object_storage;
+    return {configuration, object_storage};
+}
+
+template <typename StorageSettings>
+SchemaCache & StorageObjectStorage<StorageSettings>::getSchemaCache(const ContextPtr & context)
+{
+    static SchemaCache schema_cache(
+        context->getConfigRef().getUInt(
+            StorageSettings::SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING,
+            DEFAULT_SCHEMA_CACHE_ELEMENTS));
+    return schema_cache;
+}
+
+template <typename StorageSettings>
+void StorageObjectStorage<StorageSettings>::read(
+    QueryPlan & query_plan,
+    const Names & column_names,
+    const StorageSnapshotPtr & storage_snapshot,
+    SelectQueryInfo & query_info,
+    ContextPtr local_context,
+    QueryProcessingStage::Enum /*processed_stage*/,
+    size_t max_block_size,
+    size_t num_streams)
+{
+    if (partition_by && configuration->withWildcard())
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Reading from a partitioned {} storage is not implemented yet",
+                        getName());
+    }
+
+    auto this_ptr = std::static_pointer_cast<StorageObjectStorage>(shared_from_this());
+    auto read_from_format_info = prepareReadingFromFormat(
+        column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
+    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+        && local_context->getSettingsRef().optimize_count_from_files;
+
+    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
+    auto reading = std::make_unique<ReadFromStorageObejctStorage<StorageSettings>>(
+        query_object_storage,
+        query_configuration,
+        getName(),
+        virtual_columns,
+        format_settings,
+        distributed_processing,
+        std::move(read_from_format_info),
+        need_only_count,
+        local_context,
+        max_block_size,
+        num_streams);
+
+    query_plan.addStep(std::move(reading));
+}
+
+template <typename StorageSettings>
+SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
+    const ASTPtr & query,
+    const StorageMetadataPtr & metadata_snapshot,
+    ContextPtr local_context,
+    bool /* async_insert */)
+{
+    auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query);
+    auto partition_by_ast = insert_query
+        ? (insert_query->partition_by ? insert_query->partition_by : partition_by)
+        : nullptr;
+    bool is_partitioned_implementation = partition_by_ast && configuration->withWildcard();
+
+    auto sample_block = metadata_snapshot->getSampleBlock();
+    auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
+
+    if (is_partitioned_implementation)
+    {
+        return std::make_shared<PartitionedStorageObjectStorageSink>(
+            object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
+    }
+
+    if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs())
+    {
+        throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
+                        "{} key '{}' contains globs, so the table is in readonly mode",
+                        getName(), configuration->getPath());
+    }
+
+    if (!storage_settings.truncate_on_insert
+        && object_storage->exists(StoredObject(configuration->getPath())))
+    {
+        if (storage_settings.create_new_file_on_insert)
+        {
+            size_t index = configuration->getPaths().size();
+            const auto & first_key = configuration->getPaths()[0];
+            auto pos = first_key.find_first_of('.');
+            String new_key;
+
+            do
+            {
+                new_key = first_key.substr(0, pos)
+                    + "."
+                    + std::to_string(index)
+                    + (pos == std::string::npos ? "" : first_key.substr(pos));
+                ++index;
+            }
+            while (object_storage->exists(StoredObject(new_key)));
+
+            configuration->getPaths().push_back(new_key);
+        }
+        else
+        {
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "Object in bucket {} with key {} already exists. "
+                "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you "
+                "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert",
+                configuration->getNamespace(), configuration->getPaths().back());
+        }
+    }
+
+    return std::make_shared<StorageObjectStorageSink>(
+        object_storage, configuration, format_settings, sample_block, local_context);
+}
+
+template <typename StorageSettings>
+void StorageObjectStorage<StorageSettings>::truncate(
+    const ASTPtr &,
+    const StorageMetadataPtr &,
+    ContextPtr,
+    TableExclusiveLockHolder &)
+{
+    if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs())
+    {
+        throw Exception(
+            ErrorCodes::DATABASE_ACCESS_DENIED,
+            "{} key '{}' contains globs, so the table is in readonly mode and cannot be truncated",
+            getName(), configuration->getPath());
+    }
+
+    StoredObjects objects;
+    for (const auto & key : configuration->getPaths())
+        objects.emplace_back(key);
+
+    object_storage->removeObjectsIfExist(objects);
+}
+
+template <typename StorageSettings>
+ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromData(
+    ObjectStoragePtr object_storage,
+    const ConfigurationPtr & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    ContextPtr context)
+{
+    using Source = StorageObjectStorageSource<StorageSettings>;
+
+    ObjectInfos read_keys;
+    auto file_iterator = Source::createFileIterator(
+        configuration, object_storage, /* distributed_processing */false,
+        context, /* predicate */{}, /* virtual_columns */{}, &read_keys);
+
+    ReadBufferIterator<StorageSettings> read_buffer_iterator(
+        object_storage, configuration, file_iterator,
+        format_settings, read_keys, context);
+
+    const bool retry = configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs();
+    return readSchemaFromFormat(
+        configuration->format, format_settings,
+        read_buffer_iterator, retry, context);
+}
+
+template class StorageObjectStorage<S3StorageSettings>;
+template class StorageObjectStorage<AzureStorageSettings>;
+template class StorageObjectStorage<HDFSStorageSettings>;
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
new file mode 100644
index 00000000000..0b29845ba5c
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -0,0 +1,116 @@
+#pragma once
+
+#include <Common/re2.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
+#include <Storages/IStorage.h>
+#include <Storages/prepareReadingFromFormat.h>
+#include <Processors/Formats/IInputFormat.h>
+
+
+namespace DB
+{
+
+struct SelectQueryInfo;
+class StorageObjectStorageConfiguration;
+struct S3StorageSettings;
+struct HDFSStorageSettings;
+struct AzureStorageSettings;
+class PullingPipelineExecutor;
+using ReadTaskCallback = std::function<String()>;
+class IOutputFormat;
+class IInputFormat;
+class SchemaCache;
+
+
+template <typename StorageSettings>
+class StorageObjectStorage : public IStorage
+{
+public:
+    using Configuration = StorageObjectStorageConfiguration;
+    using ConfigurationPtr = std::shared_ptr<Configuration>;
+    using ObjectInfo = RelativePathWithMetadata;
+    using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
+    using ObjectInfos = std::vector<ObjectInfoPtr>;
+
+    StorageObjectStorage(
+        ConfigurationPtr configuration_,
+        ObjectStoragePtr object_storage_,
+        const String & engine_name_,
+        ContextPtr context_,
+        const StorageID & table_id_,
+        const ColumnsDescription & columns_,
+        const ConstraintsDescription & constraints_,
+        const String & comment,
+        std::optional<FormatSettings> format_settings_,
+        bool distributed_processing_ = false,
+        ASTPtr partition_by_ = nullptr);
+
+    String getName() const override { return engine_name; }
+
+    void read(
+        QueryPlan & query_plan,
+        const Names &,
+        const StorageSnapshotPtr &,
+        SelectQueryInfo &,
+        ContextPtr,
+        QueryProcessingStage::Enum,
+        size_t,
+        size_t) override;
+
+    SinkToStoragePtr write(
+        const ASTPtr & query,
+        const StorageMetadataPtr & metadata_snapshot,
+        ContextPtr context,
+        bool async_insert) override;
+
+    void truncate(
+        const ASTPtr & query,
+        const StorageMetadataPtr & metadata_snapshot,
+        ContextPtr local_context,
+        TableExclusiveLockHolder &) override;
+
+    NamesAndTypesList getVirtuals() const override { return virtual_columns; }
+
+    static Names getVirtualColumnNames();
+
+    bool supportsPartitionBy() const override { return true; }
+
+    bool supportsSubcolumns() const override { return true; }
+
+    bool supportsTrivialCountOptimization() const override { return true; }
+
+    bool supportsSubsetOfColumns(const ContextPtr & context) const;
+
+    bool prefersLargeBlocks() const override;
+
+    bool parallelizeOutputAfterReading(ContextPtr context) const override;
+
+    static SchemaCache & getSchemaCache(const ContextPtr & context);
+
+    static ColumnsDescription getTableStructureFromData(
+        ObjectStoragePtr object_storage,
+        const ConfigurationPtr & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        ContextPtr context);
+
+protected:
+    virtual std::pair<ConfigurationPtr, ObjectStoragePtr>
+    updateConfigurationAndGetCopy(ContextPtr local_context);
+
+    const std::string engine_name;
+    const NamesAndTypesList virtual_columns;
+    std::optional<FormatSettings> format_settings;
+    const ASTPtr partition_by;
+    const bool distributed_processing;
+
+    ObjectStoragePtr object_storage;
+    ConfigurationPtr configuration;
+    std::mutex configuration_update_mutex;
+};
+
+using StorageS3 = StorageObjectStorage<S3StorageSettings>;
+using StorageAzureBlobStorage = StorageObjectStorage<AzureStorageSettings>;
+using StorageHDFS = StorageObjectStorage<HDFSStorageSettings>;
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
new file mode 100644
index 00000000000..414932016f4
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -0,0 +1,107 @@
+#include "Storages/ObjectStorage/StorageObjectStorageCluster.h"
+
+#include "config.h"
+#include <Interpreters/AddDefaultDatabaseVisitor.h>
+#include <Interpreters/InterpreterSelectQuery.h>
+#include <Processors/Sources/RemoteSource.h>
+#include <Processors/Transforms/AddingDefaultsTransform.h>
+#include <QueryPipeline/RemoteQueryExecutor.h>
+#include <Storages/IStorage.h>
+#include <Storages/StorageURL.h>
+#include <Storages/StorageDictionary.h>
+#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
+#include <Storages/VirtualColumnUtils.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Common/Exception.h>
+#include <Parsers/queryToString.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::StorageObjectStorageCluster(
+    const String & cluster_name_,
+    const Storage::ConfigurationPtr & configuration_,
+    ObjectStoragePtr object_storage_,
+    const String & engine_name_,
+    const StorageID & table_id_,
+    const ColumnsDescription & columns_,
+    const ConstraintsDescription & constraints_,
+    ContextPtr context_,
+    bool structure_argument_was_provided_)
+    : IStorageCluster(cluster_name_,
+                      table_id_,
+                      getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)),
+                      structure_argument_was_provided_)
+    , engine_name(engine_name_)
+    , configuration{configuration_}
+    , object_storage(object_storage_)
+{
+    configuration->check(context_);
+    StorageInMemoryMetadata storage_metadata;
+
+    if (columns_.empty())
+    {
+        /// `format_settings` is set to std::nullopt, because StorageObjectStorageCluster is used only as table function
+        auto columns = StorageObjectStorage<StorageSettings>::getTableStructureFromData(
+            object_storage, configuration, /*format_settings=*/std::nullopt, context_);
+        storage_metadata.setColumns(columns);
+    }
+    else
+        storage_metadata.setColumns(columns_);
+
+    storage_metadata.setConstraints(constraints_);
+    setInMemoryMetadata(storage_metadata);
+
+    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(
+        storage_metadata.getSampleBlock().getNamesAndTypesList());
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::addColumnsStructureToQuery(
+    ASTPtr & query,
+    const String & structure,
+    const ContextPtr & context)
+{
+    ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
+    if (!expression_list)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "Expected SELECT query from table function {}, got '{}'",
+                        engine_name, queryToString(query));
+    }
+    using TableFunction = TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>;
+    TableFunction::addColumnsStructureToArguments(expression_list->children, structure, context);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+RemoteQueryExecutor::Extension
+StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr &) const
+{
+    auto iterator = std::make_shared<typename Source::GlobIterator>(
+        object_storage, configuration, predicate, virtual_columns, nullptr);
+
+    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; });
+    return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
+}
+
+
+#if USE_AWS_S3
+template class StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+template class StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+#if USE_HDFS
+template class StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
new file mode 100644
index 00000000000..b1f9af14e03
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include "config.h"
+
+#include <Interpreters/Cluster.h>
+#include <Storages/IStorageCluster.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <TableFunctions/TableFunctionObjectStorageCluster.h>
+
+namespace DB
+{
+
+class StorageS3Settings;
+class StorageAzureBlobSettings;
+
+class Context;
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+class StorageObjectStorageCluster : public IStorageCluster
+{
+public:
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using Source = StorageObjectStorageSource<StorageSettings>;
+
+    StorageObjectStorageCluster(
+        const String & cluster_name_,
+        const Storage::ConfigurationPtr & configuration_,
+        ObjectStoragePtr object_storage_,
+        const String & engine_name_,
+        const StorageID & table_id_,
+        const ColumnsDescription & columns_,
+        const ConstraintsDescription & constraints_,
+        ContextPtr context_,
+        bool structure_argument_was_provided_);
+
+    std::string getName() const override { return engine_name; }
+
+    NamesAndTypesList getVirtuals() const override { return virtual_columns; }
+
+    RemoteQueryExecutor::Extension
+    getTaskIteratorExtension(
+        const ActionsDAG::Node * predicate,
+        const ContextPtr & context) const override;
+
+    bool supportsSubcolumns() const override { return true; }
+
+    bool supportsTrivialCountOptimization() const override { return true; }
+
+private:
+    void updateBeforeRead(const ContextPtr & /* context */) override {}
+
+    void addColumnsStructureToQuery(
+        ASTPtr & query,
+        const String & structure,
+        const ContextPtr & context) override;
+
+    const String & engine_name;
+    const Storage::ConfigurationPtr configuration;
+    const ObjectStoragePtr object_storage;
+    NamesAndTypesList virtual_columns;
+};
+
+
+#if USE_AWS_S3
+using StorageS3Cluster = StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+#endif
+#if USE_AZURE_BLOB_STORAGE
+using StorageAzureBlobCluster = StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
new file mode 100644
index 00000000000..34ab8ebec66
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -0,0 +1,155 @@
+#pragma once
+#include <Storages/PartitionedSink.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Formats/FormatFactory.h>
+#include <Processors/Formats/IOutputFormat.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+
+namespace DB
+{
+class StorageObjectStorageSink : public SinkToStorage
+{
+public:
+    StorageObjectStorageSink(
+        ObjectStoragePtr object_storage,
+        StorageObjectStorageConfigurationPtr configuration,
+        std::optional<FormatSettings> format_settings_,
+        const Block & sample_block_,
+        ContextPtr context,
+        const std::string & blob_path = "")
+        : SinkToStorage(sample_block_)
+        , sample_block(sample_block_)
+        , format_settings(format_settings_)
+    {
+        const auto & settings = context->getSettingsRef();
+        const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path;
+        const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method);
+
+        auto buffer = object_storage->writeObject(
+            StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings());
+
+        write_buf = wrapWriteBufferWithCompressionMethod(
+                        std::move(buffer),
+                        chosen_compression_method,
+                        static_cast<int>(settings.output_format_compression_level),
+                        static_cast<int>(settings.output_format_compression_zstd_window_log));
+
+        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(
+            configuration->format, *write_buf, sample_block, context, format_settings);
+    }
+
+    String getName() const override { return "StorageObjectStorageSink"; }
+
+    void consume(Chunk chunk) override
+    {
+        std::lock_guard lock(cancel_mutex);
+        if (cancelled)
+            return;
+        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
+    }
+
+    void onCancel() override
+    {
+        std::lock_guard lock(cancel_mutex);
+        finalize();
+        cancelled = true;
+    }
+
+    void onException(std::exception_ptr exception) override
+    {
+        std::lock_guard lock(cancel_mutex);
+        try
+        {
+            std::rethrow_exception(exception);
+        }
+        catch (...)
+        {
+            /// An exception context is needed to proper delete write buffers without finalization.
+            release();
+        }
+    }
+
+    void onFinish() override
+    {
+        std::lock_guard lock(cancel_mutex);
+        finalize();
+    }
+
+private:
+    const Block sample_block;
+    const std::optional<FormatSettings> format_settings;
+
+    std::unique_ptr<WriteBuffer> write_buf;
+    OutputFormatPtr writer;
+    bool cancelled = false;
+    std::mutex cancel_mutex;
+
+    void finalize()
+    {
+        if (!writer)
+            return;
+
+        try
+        {
+            writer->finalize();
+            writer->flush();
+            write_buf->finalize();
+        }
+        catch (...)
+        {
+            /// Stop ParallelFormattingOutputFormat correctly.
+            release();
+            throw;
+        }
+    }
+
+    void release()
+    {
+        writer.reset();
+        write_buf->finalize();
+    }
+};
+
+class PartitionedStorageObjectStorageSink : public PartitionedSink
+{
+public:
+    PartitionedStorageObjectStorageSink(
+        ObjectStoragePtr object_storage_,
+        StorageObjectStorageConfigurationPtr configuration_,
+        std::optional<FormatSettings> format_settings_,
+        const Block & sample_block_,
+        ContextPtr context_,
+        const ASTPtr & partition_by)
+        : PartitionedSink(partition_by, context_, sample_block_)
+        , object_storage(object_storage_)
+        , configuration(configuration_)
+        , format_settings(format_settings_)
+        , sample_block(sample_block_)
+        , context(context_)
+    {
+    }
+
+    SinkPtr createSinkForPartition(const String & partition_id) override
+    {
+        auto blob = configuration->getPaths().back();
+        auto partition_key = replaceWildcards(blob, partition_id);
+        validatePartitionKey(partition_key, true);
+        return std::make_shared<StorageObjectStorageSink>(
+            object_storage,
+            configuration,
+            format_settings,
+            sample_block,
+            context,
+            partition_key
+        );
+    }
+
+private:
+    ObjectStoragePtr object_storage;
+    StorageObjectStorageConfigurationPtr configuration;
+    const std::optional<FormatSettings> format_settings;
+    const Block sample_block;
+    const ContextPtr context;
+};
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
new file mode 100644
index 00000000000..9fc7925a6d1
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -0,0 +1,464 @@
+#include "StorageObjectStorageSource.h"
+#include <Storages/VirtualColumnUtils.h>
+#include <Disks/ObjectStorages/ObjectStorageIterator.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+#include <Processors/Transforms/AddingDefaultsTransform.h>
+#include <Processors/Sources/ConstChunkGenerator.h>
+#include <Processors/Executors/PullingPipelineExecutor.h>
+#include <Processors/Transforms/ExtractColumnsTransform.h>
+#include <IO/ReadBufferFromFileBase.h>
+#include <Formats/FormatFactory.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/Settings.h>
+#include <Storages/Cache/SchemaCache.h>
+#include <Common/parseGlobs.h>
+
+
+namespace ProfileEvents
+{
+    extern const Event EngineFileLikeReadFiles;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_COMPILE_REGEXP;
+}
+
+template <typename StorageSettings>
+std::shared_ptr<typename StorageObjectStorageSource<StorageSettings>::IIterator>
+StorageObjectStorageSource<StorageSettings>::createFileIterator(
+    Storage::ConfigurationPtr configuration,
+    ObjectStoragePtr object_storage,
+    bool distributed_processing,
+    const ContextPtr & local_context,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns,
+    ObjectInfos * read_keys,
+    std::function<void(FileProgress)> file_progress_callback)
+{
+    if (distributed_processing)
+        return std::make_shared<typename Source::ReadTaskIterator>(local_context->getReadTaskCallback());
+
+    if (configuration->isNamespaceWithGlobs())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
+
+    if (configuration->isPathWithGlobs())
+    {
+        /// Iterate through disclosed globs and make a source for each file
+        return std::make_shared<typename Source::GlobIterator>(
+            object_storage, configuration, predicate, virtual_columns, read_keys, file_progress_callback);
+    }
+    else
+    {
+        return std::make_shared<typename Source::KeysIterator>(
+            object_storage, configuration, virtual_columns, read_keys, file_progress_callback);
+    }
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::GlobIterator::GlobIterator(
+    ObjectStoragePtr object_storage_,
+    Storage::ConfigurationPtr configuration_,
+    const ActionsDAG::Node * predicate,
+    const NamesAndTypesList & virtual_columns_,
+    ObjectInfos * read_keys_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : object_storage(object_storage_)
+    , configuration(configuration_)
+    , virtual_columns(virtual_columns_)
+    , read_keys(read_keys_)
+    , file_progress_callback(file_progress_callback_)
+{
+    if (configuration->isNamespaceWithGlobs())
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
+    }
+    else if (configuration->isPathWithGlobs())
+    {
+        const auto key_with_globs = configuration_->getPath();
+        const auto key_prefix = configuration->getPathWithoutGlob();
+        object_storage_iterator = object_storage->iterate(key_prefix);
+
+        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(key_with_globs));
+        if (matcher->ok())
+        {
+            recursive = key_with_globs == "/**";
+            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        }
+        else
+        {
+            throw Exception(
+                ErrorCodes::CANNOT_COMPILE_REGEXP,
+                "Cannot compile regex from glob ({}): {}", key_with_globs, matcher->error());
+        }
+    }
+    else
+    {
+        const auto key_with_globs = configuration_->getPath();
+        auto object_metadata = object_storage->getObjectMetadata(key_with_globs);
+        auto object_info = std::make_shared<ObjectInfo>(key_with_globs, object_metadata);
+
+        object_infos.emplace_back(object_info);
+        if (read_keys)
+            read_keys->emplace_back(object_info);
+
+        if (file_progress_callback)
+            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
+
+        is_finished = true;
+    }
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::ObjectInfoPtr
+StorageObjectStorageSource<StorageSettings>::GlobIterator::next(size_t /* processor */)
+{
+    std::lock_guard lock(next_mutex);
+
+    if (is_finished && index >= object_infos.size())
+        return {};
+
+    bool need_new_batch = object_infos.empty() || index >= object_infos.size();
+
+    if (need_new_batch)
+    {
+        ObjectInfos new_batch;
+        while (new_batch.empty())
+        {
+            auto result = object_storage_iterator->getCurrentBatchAndScheduleNext();
+            if (result.has_value())
+            {
+                new_batch = result.value();
+            }
+            else
+            {
+                is_finished = true;
+                return {};
+            }
+
+            for (auto it = new_batch.begin(); it != new_batch.end();)
+            {
+                if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher))
+                    it = new_batch.erase(it);
+                else
+                    ++it;
+            }
+        }
+
+        index = 0;
+
+        if (filter_dag)
+        {
+            std::vector<String> paths;
+            paths.reserve(new_batch.size());
+            for (auto & object_info : new_batch)
+                paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path);
+
+            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
+        }
+
+        if (read_keys)
+            read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end());
+
+        object_infos = std::move(new_batch);
+        if (file_progress_callback)
+        {
+            for (const auto & object_info : object_infos)
+            {
+                file_progress_callback(FileProgress(0, object_info->metadata.size_bytes));
+            }
+        }
+    }
+
+    size_t current_index = index++;
+    if (current_index >= object_infos.size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata");
+
+    return object_infos[current_index];
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::KeysIterator::KeysIterator(
+    ObjectStoragePtr object_storage_,
+    Storage::ConfigurationPtr configuration_,
+    const NamesAndTypesList & virtual_columns_,
+    ObjectInfos * read_keys_,
+    std::function<void(FileProgress)> file_progress_callback_)
+    : object_storage(object_storage_)
+    , configuration(configuration_)
+    , virtual_columns(virtual_columns_)
+    , file_progress_callback(file_progress_callback_)
+    , keys(configuration->getPaths())
+{
+    if (read_keys_)
+    {
+        /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed?
+        for (auto && key : keys)
+        {
+            auto object_info = std::make_shared<ObjectInfo>(key, ObjectMetadata{});
+            read_keys_->emplace_back(object_info);
+        }
+    }
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::ObjectInfoPtr
+StorageObjectStorageSource<StorageSettings>::KeysIterator::next(size_t /* processor */)
+{
+    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
+    if (current_index >= keys.size())
+        return {};
+
+    auto key = keys[current_index];
+
+    ObjectMetadata metadata{};
+    if (file_progress_callback)
+    {
+        metadata = object_storage->getObjectMetadata(key);
+        file_progress_callback(FileProgress(0, metadata.size_bytes));
+    }
+
+    return std::make_shared<ObjectInfo>(key, metadata);
+}
+
+template <typename StorageSettings>
+Chunk StorageObjectStorageSource<StorageSettings>::generate()
+{
+    while (true)
+    {
+        if (isCancelled() || !reader)
+        {
+            if (reader)
+                reader->cancel();
+            break;
+        }
+
+        Chunk chunk;
+        if (reader->pull(chunk))
+        {
+            UInt64 num_rows = chunk.getNumRows();
+            total_rows_in_file += num_rows;
+            size_t chunk_size = 0;
+            if (const auto * input_format = reader.getInputFormat())
+                chunk_size = input_format->getApproxBytesReadForChunk();
+            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
+
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
+                chunk,
+                read_from_format_info.requested_virtual_columns,
+                fs::path(configuration->getNamespace()) / reader.getRelativePath(),
+                reader.getObjectInfo().metadata.size_bytes);
+
+            return chunk;
+        }
+
+        if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
+            addNumRowsToCache(reader.getRelativePath(), total_rows_in_file);
+
+        total_rows_in_file = 0;
+
+        assert(reader_future.valid());
+        reader = reader_future.get();
+
+        if (!reader)
+            break;
+
+        /// Even if task is finished the thread may be not freed in pool.
+        /// So wait until it will be freed before scheduling a new task.
+        create_reader_pool.wait();
+        reader_future = createReaderAsync();
+    }
+
+    return {};
+}
+
+template <typename StorageSettings>
+void StorageObjectStorageSource<StorageSettings>::addNumRowsToCache(const String & path, size_t num_rows)
+{
+    String source = fs::path(configuration->getDataSourceDescription()) / path;
+    auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
+    Storage::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
+}
+
+template <typename StorageSettings>
+std::optional<size_t> StorageObjectStorageSource<StorageSettings>::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info)
+{
+    String source = fs::path(configuration->getDataSourceDescription()) / object_info->relative_path;
+    auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
+    auto get_last_mod_time = [&]() -> std::optional<time_t>
+    {
+        auto last_mod = object_info->metadata.last_modified;
+        if (last_mod)
+            return last_mod->epochTime();
+        else
+        {
+            object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+            return object_info->metadata.last_modified->epochMicroseconds();
+        }
+    };
+    return Storage::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::StorageObjectStorageSource(
+    String name_,
+    ObjectStoragePtr object_storage_,
+    Storage::ConfigurationPtr configuration_,
+    const ReadFromFormatInfo & info,
+    std::optional<FormatSettings> format_settings_,
+    ContextPtr context_,
+    UInt64 max_block_size_,
+    std::shared_ptr<IIterator> file_iterator_,
+    bool need_only_count_)
+    :ISource(info.source_header, false)
+    , WithContext(context_)
+    , name(std::move(name_))
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+    , format_settings(format_settings_)
+    , max_block_size(max_block_size_)
+    , need_only_count(need_only_count_)
+    , read_from_format_info(info)
+    , columns_desc(info.columns_description)
+    , file_iterator(file_iterator_)
+    , create_reader_pool(StorageSettings::ObjectStorageThreads(),
+                         StorageSettings::ObjectStorageThreadsActive(),
+                         StorageSettings::ObjectStorageThreadsScheduled(), 1)
+    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "Reader"))
+{
+    reader = createReader();
+    if (reader)
+        reader_future = createReaderAsync();
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::~StorageObjectStorageSource()
+{
+    create_reader_pool.wait();
+}
+
+template <typename StorageSettings>
+StorageObjectStorageSource<StorageSettings>::ReaderHolder
+StorageObjectStorageSource<StorageSettings>::createReader(size_t processor)
+{
+    auto object_info = file_iterator->next(processor);
+    if (object_info->relative_path.empty())
+        return {};
+
+    if (object_info->metadata.size_bytes == 0)
+        object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+
+    QueryPipelineBuilder builder;
+    std::shared_ptr<ISource> source;
+    std::unique_ptr<ReadBuffer> read_buf;
+    std::optional<size_t> num_rows_from_cache = need_only_count
+        && getContext()->getSettingsRef().use_cache_for_count_from_files
+        ? tryGetNumRowsFromCache(object_info)
+        : std::nullopt;
+
+    if (num_rows_from_cache)
+    {
+        /// We should not return single chunk with all number of rows,
+        /// because there is a chance that this chunk will be materialized later
+        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
+        /// Instead, we use special ConstChunkGenerator that will generate chunks
+        /// with max_block_size rows until total number of rows is reached.
+        source = std::make_shared<ConstChunkGenerator>(
+            read_from_format_info.format_header, *num_rows_from_cache, max_block_size);
+        builder.init(Pipe(source));
+    }
+    else
+    {
+        std::optional<size_t> max_parsing_threads;
+        if (need_only_count)
+            max_parsing_threads = 1;
+
+        auto compression_method = chooseCompressionMethod(
+            object_info->relative_path, configuration->compression_method);
+
+        read_buf = createReadBuffer(object_info->relative_path, object_info->metadata.size_bytes);
+
+        auto input_format = FormatFactory::instance().getInput(
+            configuration->format, *read_buf, read_from_format_info.format_header,
+            getContext(), max_block_size, format_settings, max_parsing_threads,
+            std::nullopt, /* is_remote_fs */ true, compression_method);
+
+        if (need_only_count)
+            input_format->needOnlyCount();
+
+        builder.init(Pipe(input_format));
+
+        if (columns_desc.hasDefaults())
+        {
+            builder.addSimpleTransform(
+                [&](const Block & header)
+                {
+                    return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
+                });
+        }
+
+        source = input_format;
+    }
+
+    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
+    /// from chunk read by IInputFormat.
+    builder.addSimpleTransform([&](const Block & header)
+    {
+        return std::make_shared<ExtractColumnsTransform>(header, read_from_format_info.requested_columns);
+    });
+
+    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
+    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
+
+    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
+
+    return ReaderHolder{object_info, std::move(read_buf),
+                        std::move(source), std::move(pipeline), std::move(current_reader)};
+}
+
+template <typename StorageSettings>
+std::future<typename StorageObjectStorageSource<StorageSettings>::ReaderHolder>
+StorageObjectStorageSource<StorageSettings>::createReaderAsync(size_t processor)
+{
+    return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{});
+}
+
+template <typename StorageSettings>
+std::unique_ptr<ReadBuffer> StorageObjectStorageSource<StorageSettings>::createReadBuffer(const String & key, size_t object_size)
+{
+    auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
+    read_settings.enable_filesystem_cache = false;
+    read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size;
+
+    // auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
+    // const bool object_too_small = object_size <= 2 * download_buffer_size;
+
+    // Create a read buffer that will prefetch the first ~1 MB of the file.
+    // When reading lots of tiny files, this prefetching almost doubles the throughput.
+    // For bigger files, parallel reading is more useful.
+    // if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
+    // {
+    //     LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
+
+    //     auto async_reader = object_storage->readObjects(
+    //         StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings);
+
+    //     async_reader->setReadUntilEnd();
+    //     if (read_settings.remote_fs_prefetch)
+    //         async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY);
+
+    //     return async_reader;
+    // }
+    // else
+    return object_storage->readObject(StoredObject(key), read_settings);
+}
+
+template class StorageObjectStorageSource<S3StorageSettings>;
+template class StorageObjectStorageSource<AzureStorageSettings>;
+template class StorageObjectStorageSource<HDFSStorageSettings>;
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
new file mode 100644
index 00000000000..f68a5d47456
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -0,0 +1,217 @@
+#pragma once
+#include <Processors/ISource.h>
+#include <Interpreters/Context_fwd.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+
+
+namespace DB
+{
+template <typename StorageSettings>
+class StorageObjectStorageSource : public ISource, WithContext
+{
+    friend class StorageS3QueueSource;
+public:
+    using Source = StorageObjectStorageSource<StorageSettings>;
+    using Storage = StorageObjectStorage<StorageSettings>;
+    using ObjectInfo = Storage::ObjectInfo;
+    using ObjectInfoPtr = Storage::ObjectInfoPtr;
+    using ObjectInfos = Storage::ObjectInfos;
+
+    class IIterator : public WithContext
+    {
+    public:
+        virtual ~IIterator() = default;
+
+        virtual size_t estimatedKeysCount() = 0;
+        virtual ObjectInfoPtr next(size_t processor) = 0;
+    };
+
+    class ReadTaskIterator;
+    class GlobIterator;
+    class KeysIterator;
+
+    StorageObjectStorageSource(
+        String name_,
+        ObjectStoragePtr object_storage_,
+        Storage::ConfigurationPtr configuration,
+        const ReadFromFormatInfo & info,
+        std::optional<FormatSettings> format_settings_,
+        ContextPtr context_,
+        UInt64 max_block_size_,
+        std::shared_ptr<IIterator> file_iterator_,
+        bool need_only_count_);
+
+    ~StorageObjectStorageSource() override;
+
+    String getName() const override { return name; }
+
+    Chunk generate() override;
+
+    static std::shared_ptr<IIterator> createFileIterator(
+        Storage::ConfigurationPtr configuration,
+        ObjectStoragePtr object_storage,
+        bool distributed_processing,
+        const ContextPtr & local_context,
+        const ActionsDAG::Node * predicate,
+        const NamesAndTypesList & virtual_columns,
+        ObjectInfos * read_keys,
+        std::function<void(FileProgress)> file_progress_callback = {});
+
+protected:
+    void addNumRowsToCache(const String & path, size_t num_rows);
+    std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfoPtr & object_info);
+
+    const String name;
+    ObjectStoragePtr object_storage;
+    const Storage::ConfigurationPtr configuration;
+    const std::optional<FormatSettings> format_settings;
+    const UInt64 max_block_size;
+    const bool need_only_count;
+    const ReadFromFormatInfo read_from_format_info;
+
+    ColumnsDescription columns_desc;
+    std::shared_ptr<IIterator> file_iterator;
+    size_t total_rows_in_file = 0;
+
+    struct ReaderHolder
+    {
+    public:
+        ReaderHolder(
+            ObjectInfoPtr object_info_,
+            std::unique_ptr<ReadBuffer> read_buf_,
+            std::shared_ptr<ISource> source_,
+            std::unique_ptr<QueryPipeline> pipeline_,
+            std::unique_ptr<PullingPipelineExecutor> reader_)
+            : object_info(std::move(object_info_))
+            , read_buf(std::move(read_buf_))
+            , source(std::move(source_))
+            , pipeline(std::move(pipeline_))
+            , reader(std::move(reader_))
+        {
+        }
+
+        ReaderHolder() = default;
+        ReaderHolder(const ReaderHolder & other) = delete;
+        ReaderHolder & operator=(const ReaderHolder & other) = delete;
+        ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); }
+
+        ReaderHolder & operator=(ReaderHolder && other) noexcept
+        {
+            /// The order of destruction is important.
+            /// reader uses pipeline, pipeline uses read_buf.
+            reader = std::move(other.reader);
+            pipeline = std::move(other.pipeline);
+            source = std::move(other.source);
+            read_buf = std::move(other.read_buf);
+            object_info = std::move(other.object_info);
+            return *this;
+        }
+
+        explicit operator bool() const { return reader != nullptr; }
+        PullingPipelineExecutor * operator->() { return reader.get(); }
+        const PullingPipelineExecutor * operator->() const { return reader.get(); }
+        const String & getRelativePath() const { return object_info->relative_path; }
+        const ObjectInfo & getObjectInfo() const { return *object_info; }
+        const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
+
+    private:
+        ObjectInfoPtr object_info;
+        std::unique_ptr<ReadBuffer> read_buf;
+        std::shared_ptr<ISource> source;
+        std::unique_ptr<QueryPipeline> pipeline;
+        std::unique_ptr<PullingPipelineExecutor> reader;
+    };
+
+    ReaderHolder reader;
+    LoggerPtr log = getLogger("StorageObjectStorageSource");
+    ThreadPool create_reader_pool;
+    ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
+    std::future<ReaderHolder> reader_future;
+
+    /// Recreate ReadBuffer and Pipeline for each file.
+    ReaderHolder createReader(size_t processor = 0);
+    std::future<ReaderHolder> createReaderAsync(size_t processor = 0);
+
+    std::unique_ptr<ReadBuffer> createReadBuffer(const String & key, size_t object_size);
+};
+
+template <typename StorageSettings>
+class StorageObjectStorageSource<StorageSettings>::ReadTaskIterator : public IIterator
+{
+public:
+    explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {}
+
+    size_t estimatedKeysCount() override { return 0; } /// TODO FIXME
+
+    ObjectInfoPtr next(size_t) override { return std::make_shared<ObjectInfo>( callback(), ObjectMetadata{} ); }
+
+private:
+    ReadTaskCallback callback;
+};
+
+template <typename StorageSettings>
+class StorageObjectStorageSource<StorageSettings>::GlobIterator : public IIterator
+{
+public:
+    GlobIterator(
+        ObjectStoragePtr object_storage_,
+        Storage::ConfigurationPtr configuration_,
+        const ActionsDAG::Node * predicate,
+        const NamesAndTypesList & virtual_columns_,
+        ObjectInfos * read_keys_,
+        std::function<void(FileProgress)> file_progress_callback_ = {});
+
+    ~GlobIterator() override = default;
+
+    size_t estimatedKeysCount() override { return object_infos.size(); }
+
+    ObjectInfoPtr next(size_t processor) override;
+
+private:
+    ObjectStoragePtr object_storage;
+    Storage::ConfigurationPtr configuration;
+    ActionsDAGPtr filter_dag;
+    NamesAndTypesList virtual_columns;
+
+    size_t index = 0;
+
+    ObjectInfos object_infos;
+    ObjectInfos * read_keys;
+    ObjectStorageIteratorPtr object_storage_iterator;
+    bool recursive{false};
+
+    std::unique_ptr<re2::RE2> matcher;
+
+    void createFilterAST(const String & any_key);
+    bool is_finished = false;
+    std::mutex next_mutex;
+
+    std::function<void(FileProgress)> file_progress_callback;
+};
+
+template <typename StorageSettings>
+class StorageObjectStorageSource<StorageSettings>::KeysIterator : public IIterator
+{
+public:
+    KeysIterator(
+        ObjectStoragePtr object_storage_,
+        Storage::ConfigurationPtr configuration_,
+        const NamesAndTypesList & virtual_columns_,
+        ObjectInfos * read_keys_,
+        std::function<void(FileProgress)> file_progress_callback = {});
+
+    ~KeysIterator() override = default;
+
+    size_t estimatedKeysCount() override { return keys.size(); }
+
+    ObjectInfoPtr next(size_t processor) override;
+
+private:
+    const ObjectStoragePtr object_storage;
+    const Storage::ConfigurationPtr configuration;
+    const NamesAndTypesList virtual_columns;
+    const std::function<void(FileProgress)> file_progress_callback;
+    const std::vector<String> keys;
+    std::atomic<size_t> index = 0;
+};
+}
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
new file mode 100644
index 00000000000..bc9f93690f5
--- /dev/null
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -0,0 +1,166 @@
+#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/ObjectStorage/HDFSConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/StorageFactory.h>
+#include <Formats/FormatFactory.h>
+
+namespace DB
+{
+
+static void initializeConfiguration(
+    StorageObjectStorageConfiguration & configuration,
+    ASTs & engine_args,
+    ContextPtr local_context,
+    bool with_table_structure)
+{
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+        configuration.fromNamedCollection(*named_collection);
+    else
+        configuration.fromAST(engine_args, local_context, with_table_structure);
+}
+
+template <typename StorageSettings>
+static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjectStorage(
+    const StorageFactory::Arguments & args,
+    typename StorageObjectStorage<StorageSettings>::ConfigurationPtr configuration,
+    const String & engine_name,
+    ContextPtr context)
+{
+    auto & engine_args = args.engine_args;
+    if (engine_args.empty())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
+
+    // Use format settings from global server context + settings from
+    // the SETTINGS clause of the create query. Settings from current
+    // session and user are ignored.
+    std::optional<FormatSettings> format_settings;
+    if (args.storage_def->settings)
+    {
+        FormatFactorySettings user_format_settings;
+
+        // Apply changed settings from global context, but ignore the
+        // unknown ones, because we only have the format settings here.
+        const auto & changes = context->getSettingsRef().changes();
+        for (const auto & change : changes)
+        {
+            if (user_format_settings.has(change.name))
+                user_format_settings.set(change.name, change.value);
+        }
+
+        // Apply changes from SETTINGS clause, with validation.
+        user_format_settings.applyChanges(args.storage_def->settings->changes);
+        format_settings = getFormatSettings(context, user_format_settings);
+    }
+    else
+    {
+        format_settings = getFormatSettings(context);
+    }
+
+    ASTPtr partition_by;
+    if (args.storage_def->partition_by)
+        partition_by = args.storage_def->partition_by->clone();
+
+    return std::make_shared<StorageObjectStorage<StorageSettings>>(
+        configuration,
+        configuration->createOrUpdateObjectStorage(context),
+        engine_name,
+        args.getContext(),
+        args.table_id,
+        args.columns,
+        args.constraints,
+        args.comment,
+        format_settings,
+        /* distributed_processing */ false,
+        partition_by);
+}
+
+#if USE_AZURE_BLOB_STORAGE
+void registerStorageAzure(StorageFactory & factory)
+{
+    factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args)
+    {
+        auto context = args.getLocalContext();
+        auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
+        initializeConfiguration(*configuration, args.engine_args, context, false);
+        return createStorageObjectStorage<AzureStorageSettings>(args, configuration, "Azure", context);
+    },
+    {
+        .supports_settings = true,
+        .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
+        .source_access_type = AccessType::AZURE,
+    });
+}
+#endif
+
+#if USE_AWS_S3
+void registerStorageS3Impl(const String & name, StorageFactory & factory)
+{
+    factory.registerStorage(name, [=](const StorageFactory::Arguments & args)
+    {
+        auto context = args.getLocalContext();
+        auto configuration = std::make_shared<StorageS3Configuration>();
+        initializeConfiguration(*configuration, args.engine_args, context, false);
+        return createStorageObjectStorage<S3StorageSettings>(args, configuration, name, context);
+    },
+    {
+        .supports_settings = true,
+        .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
+        .source_access_type = AccessType::S3,
+    });
+}
+
+void registerStorageS3(StorageFactory & factory)
+{
+    return registerStorageS3Impl("S3", factory);
+}
+
+void registerStorageCOS(StorageFactory & factory)
+{
+    return registerStorageS3Impl("COSN", factory);
+}
+
+void registerStorageOSS(StorageFactory & factory)
+{
+    return registerStorageS3Impl("OSS", factory);
+}
+
+#endif
+
+#if USE_HDFS
+void registerStorageHDFS(StorageFactory & factory)
+{
+    factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args)
+    {
+        auto context = args.getLocalContext();
+        auto configuration = std::make_shared<StorageHDFSConfiguration>();
+        initializeConfiguration(*configuration, args.engine_args, context, false);
+        return createStorageObjectStorage<HDFSStorageSettings>(args, configuration, "HDFS", context);
+    },
+    {
+        .supports_settings = true,
+        .supports_sort_order = true, // for partition by
+        .supports_schema_inference = true,
+        .source_access_type = AccessType::HDFS,
+    });
+}
+#endif
+
+void registerStorageObjectStorage(StorageFactory & factory)
+{
+#if USE_AWS_S3
+    registerStorageS3(factory);
+    registerStorageCOS(factory);
+    registerStorageOSS(factory);
+#endif
+#if USE_AZURE_BLOB_STORAGE
+    registerStorageAzure(factory);
+#endif
+#if USE_HDFS
+    registerStorageHDFS(factory);
+#endif
+}
+
+}
diff --git a/src/Storages/ObjectStorageConfiguration.h b/src/Storages/ObjectStorageConfiguration.h
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index b4f5f957f76..bd34d1ec093 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -5,9 +5,9 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Common/logger_useful.h>
-#include <Common/getRandomASCIIString.h>
 #include <Storages/S3Queue/S3QueueSource.h>
 #include <Storages/VirtualColumnUtils.h>
+#include <Processors/Executors/PullingPipelineExecutor.h>
 
 
 namespace CurrentMetrics
@@ -31,11 +31,11 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-StorageS3QueueSource::S3QueueKeyWithInfo::S3QueueKeyWithInfo(
+StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo(
         const std::string & key_,
-        std::optional<S3::ObjectInfo> info_,
+        const ObjectMetadata & object_metadata_,
         Metadata::ProcessingNodeHolderPtr processing_holder_)
-    : StorageS3Source::KeyWithInfo(key_, info_)
+    : Source::ObjectInfo(key_, object_metadata_)
     , processing_holder(processing_holder_)
 {
 }
@@ -55,15 +55,15 @@ StorageS3QueueSource::FileIterator::FileIterator(
     if (sharded_processing)
     {
         for (const auto & id : metadata->getProcessingIdsForShard(current_shard))
-            sharded_keys.emplace(id, std::deque<KeyWithInfoPtr>{});
+            sharded_keys.emplace(id, std::deque<Source::ObjectInfoPtr>{});
     }
 }
 
-StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(size_t idx)
+StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor)
 {
     while (!shutdown_called)
     {
-        KeyWithInfoPtr val{nullptr};
+        Source::ObjectInfoPtr val{nullptr};
 
         {
             std::unique_lock lk(sharded_keys_mutex, std::defer_lock);
@@ -73,7 +73,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si
                 /// we need to check sharded_keys and to next() under lock.
                 lk.lock();
 
-                if (auto it = sharded_keys.find(idx); it != sharded_keys.end())
+                if (auto it = sharded_keys.find(processor); it != sharded_keys.end())
                 {
                     auto & keys = it->second;
                     if (!keys.empty())
@@ -86,24 +86,24 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si
                 {
                     throw Exception(ErrorCodes::LOGICAL_ERROR,
                                     "Processing id {} does not exist (Expected ids: {})",
-                                    idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", "));
+                                    processor, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", "));
                 }
             }
 
             if (!val)
             {
-                val = glob_iterator->next();
+                val = glob_iterator->next(processor);
                 if (val && sharded_processing)
                 {
-                    const auto processing_id_for_key = metadata->getProcessingIdForPath(val->key);
-                    if (idx != processing_id_for_key)
+                    const auto processing_id_for_key = metadata->getProcessingIdForPath(val->relative_path);
+                    if (processor != processing_id_for_key)
                     {
                         if (metadata->isProcessingIdBelongsToShard(processing_id_for_key, current_shard))
                         {
                             LOG_TEST(log, "Putting key {} into queue of processor {} (total: {})",
-                                     val->key, processing_id_for_key, sharded_keys.size());
+                                     val->relative_path, processing_id_for_key, sharded_keys.size());
 
-                            if (auto it = sharded_keys.find(idx); it != sharded_keys.end())
+                            if (auto it = sharded_keys.find(processor); it != sharded_keys.end())
                             {
                                 it->second.push_back(val);
                             }
@@ -111,7 +111,7 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si
                             {
                                 throw Exception(ErrorCodes::LOGICAL_ERROR,
                                                 "Processing id {} does not exist (Expected ids: {})",
-                                                idx, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", "));
+                                                processor, fmt::join(metadata->getProcessingIdsForShard(current_shard), ", "));
                             }
                         }
                         continue;
@@ -129,25 +129,25 @@ StorageS3QueueSource::KeyWithInfoPtr StorageS3QueueSource::FileIterator::next(si
             return {};
         }
 
-        auto processing_holder = metadata->trySetFileAsProcessing(val->key);
+        auto processing_holder = metadata->trySetFileAsProcessing(val->relative_path);
         if (shutdown_called)
         {
             LOG_TEST(log, "Shutdown was called, stopping file iterator");
             return {};
         }
 
-        LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->key, idx);
+        LOG_TEST(log, "Checking if can process key {} for processing_id {}", val->relative_path, processor);
 
         if (processing_holder)
         {
-            return std::make_shared<S3QueueKeyWithInfo>(val->key, val->info, processing_holder);
+            return std::make_shared<S3QueueObjectInfo>(val->relative_path, val->metadata, processing_holder);
         }
         else if (sharded_processing
-                 && metadata->getFileStatus(val->key)->state == S3QueueFilesMetadata::FileStatus::State::Processing)
+                 && metadata->getFileStatus(val->relative_path)->state == S3QueueFilesMetadata::FileStatus::State::Processing)
         {
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                             "File {} is processing by someone else in sharded processing. "
-                            "It is a bug", val->key);
+                            "It is a bug", val->relative_path);
         }
     }
     return {};
@@ -161,7 +161,7 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount()
 StorageS3QueueSource::StorageS3QueueSource(
     String name_,
     const Block & header_,
-    std::unique_ptr<StorageS3Source> internal_source_,
+    std::unique_ptr<Source> internal_source_,
     std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
     size_t processing_id_,
     const S3QueueAction & action_,
@@ -190,38 +190,19 @@ StorageS3QueueSource::StorageS3QueueSource(
 {
 }
 
-StorageS3QueueSource::~StorageS3QueueSource()
-{
-    internal_source->create_reader_pool.wait();
-}
-
 String StorageS3QueueSource::getName() const
 {
     return name;
 }
 
-void StorageS3QueueSource::lazyInitialize()
-{
-    if (initialized)
-        return;
-
-    internal_source->lazyInitialize(processing_id);
-    reader = std::move(internal_source->reader);
-    if (reader)
-        reader_future = std::move(internal_source->reader_future);
-    initialized = true;
-}
-
 Chunk StorageS3QueueSource::generate()
 {
-    lazyInitialize();
-
     while (true)
     {
         if (!reader)
             break;
 
-        const auto * key_with_info = dynamic_cast<const S3QueueKeyWithInfo *>(&reader.getKeyWithInfo());
+        const auto * key_with_info = dynamic_cast<const S3QueueObjectInfo *>(&reader.getObjectInfo());
         auto file_status = key_with_info->processing_holder->getFileStatus();
 
         if (isCancelled())
@@ -239,7 +220,7 @@ Chunk StorageS3QueueSource::generate()
                     tryLogCurrentException(__PRETTY_FUNCTION__);
                 }
 
-                appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false);
+                appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
             }
 
             break;
@@ -254,7 +235,7 @@ Chunk StorageS3QueueSource::generate()
             {
                 LOG_DEBUG(
                     log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed",
-                    processed_rows_from_file, reader.getFile());
+                    processed_rows_from_file, reader.getRelativePath());
 
                 try
                 {
@@ -265,7 +246,7 @@ Chunk StorageS3QueueSource::generate()
                     tryLogCurrentException(__PRETTY_FUNCTION__);
                 }
 
-                appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false);
+                appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
 
                 /// Leave the file half processed. Table is being dropped, so we do not care.
                 break;
@@ -273,7 +254,7 @@ Chunk StorageS3QueueSource::generate()
 
             LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). "
                      "Will process the file fully and then shutdown",
-                     reader.getFile(), processed_rows_from_file);
+                     reader.getRelativePath(), processed_rows_from_file);
         }
 
         auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters);
@@ -287,30 +268,30 @@ Chunk StorageS3QueueSource::generate()
             Chunk chunk;
             if (reader->pull(chunk))
             {
-                LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getPath());
+                LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getRelativePath());
 
                 file_status->processed_rows += chunk.getNumRows();
                 processed_rows_from_file += chunk.getNumRows();
 
-                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getKeyWithInfo().info->size);
+                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata.size_bytes);
                 return chunk;
             }
         }
         catch (...)
         {
             const auto message = getCurrentExceptionMessage(true);
-            LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getFile(), message);
+            LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getRelativePath(), message);
 
             files_metadata->setFileFailed(key_with_info->processing_holder, message);
 
-            appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, false);
+            appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
             throw;
         }
 
         files_metadata->setFileProcessed(key_with_info->processing_holder);
-        applyActionAfterProcessing(reader.getFile());
+        applyActionAfterProcessing(reader.getRelativePath());
 
-        appendLogElement(reader.getFile(), *file_status, processed_rows_from_file, true);
+        appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, true);
         file_status.reset();
         processed_rows_from_file = 0;
 
@@ -326,7 +307,7 @@ Chunk StorageS3QueueSource::generate()
         if (!reader)
             break;
 
-        file_status = files_metadata->getFileStatus(reader.getFile());
+        file_status = files_metadata->getFileStatus(reader.getRelativePath());
 
         /// Even if task is finished the thread may be not freed in pool.
         /// So wait until it will be freed before scheduling a new task.
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index 8fc7305ea08..fcf5c5c0160 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -5,7 +5,9 @@
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Processors/ISource.h>
 #include <Storages/S3Queue/S3QueueFilesMetadata.h>
-#include <Storages/StorageS3.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/Settings.h>
 #include <Interpreters/S3QueueLog.h>
 
 
@@ -14,28 +16,32 @@ namespace Poco { class Logger; }
 namespace DB
 {
 
+struct ObjectMetadata;
+
 class StorageS3QueueSource : public ISource, WithContext
 {
 public:
-    using IIterator = StorageS3Source::IIterator;
-    using KeyWithInfoPtr = StorageS3Source::KeyWithInfoPtr;
-    using GlobIterator = StorageS3Source::DisclosedGlobIterator;
+    using Storage = StorageObjectStorage<S3StorageSettings>;
+    using Source = StorageObjectStorageSource<S3StorageSettings>;
+
+    using ConfigurationPtr = Storage::ConfigurationPtr;
+    using GlobIterator = Source::GlobIterator;
     using ZooKeeperGetter = std::function<zkutil::ZooKeeperPtr()>;
     using RemoveFileFunc = std::function<void(std::string)>;
     using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr;
     using Metadata = S3QueueFilesMetadata;
 
-    struct S3QueueKeyWithInfo : public StorageS3Source::KeyWithInfo
+    struct S3QueueObjectInfo : public Source::ObjectInfo
     {
-        S3QueueKeyWithInfo(
-                const std::string & key_,
-                std::optional<S3::ObjectInfo> info_,
-                Metadata::ProcessingNodeHolderPtr processing_holder_);
+        S3QueueObjectInfo(
+            const std::string & key_,
+            const ObjectMetadata & object_metadata_,
+            Metadata::ProcessingNodeHolderPtr processing_holder_);
 
         Metadata::ProcessingNodeHolderPtr processing_holder;
     };
 
-    class FileIterator : public IIterator
+    class FileIterator : public Source::IIterator
     {
     public:
         FileIterator(
@@ -47,7 +53,7 @@ public:
         /// Note:
         /// List results in s3 are always returned in UTF-8 binary order.
         /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html)
-        KeyWithInfoPtr next(size_t idx) override;
+        Source::ObjectInfoPtr next(size_t processor) override;
 
         size_t estimatedKeysCount() override;
 
@@ -60,14 +66,14 @@ public:
 
         const bool sharded_processing;
         const size_t current_shard;
-        std::unordered_map<size_t, std::deque<KeyWithInfoPtr>> sharded_keys;
+        std::unordered_map<size_t, std::deque<Source::ObjectInfoPtr>> sharded_keys;
         std::mutex sharded_keys_mutex;
     };
 
     StorageS3QueueSource(
         String name_,
         const Block & header_,
-        std::unique_ptr<StorageS3Source> internal_source_,
+        std::unique_ptr<Source> internal_source_,
         std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
         size_t processing_id_,
         const S3QueueAction & action_,
@@ -80,8 +86,6 @@ public:
         const StorageID & storage_id_,
         LoggerPtr log_);
 
-    ~StorageS3QueueSource() override;
-
     static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
 
     String getName() const override;
@@ -93,7 +97,7 @@ private:
     const S3QueueAction action;
     const size_t processing_id;
     const std::shared_ptr<S3QueueFilesMetadata> files_metadata;
-    const std::shared_ptr<StorageS3Source> internal_source;
+    const std::shared_ptr<Source> internal_source;
     const NamesAndTypesList requested_virtual_columns;
     const std::atomic<bool> & shutdown_called;
     const std::atomic<bool> & table_is_being_dropped;
@@ -103,13 +107,11 @@ private:
     RemoveFileFunc remove_file_func;
     LoggerPtr log;
 
-    using ReaderHolder = StorageS3Source::ReaderHolder;
-    ReaderHolder reader;
-    std::future<ReaderHolder> reader_future;
+    Source::ReaderHolder reader;
+    std::future<Source::ReaderHolder> reader_future;
     std::atomic<bool> initialized{false};
     size_t processed_rows_from_file = 0;
 
-    void lazyInitialize();
     void applyActionAfterProcessing(const String & path);
     void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed);
 };
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
index 3ee2594135d..94816619aaa 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
@@ -7,7 +7,6 @@
 #include <Poco/JSON/Parser.h>
 #include <Storages/S3Queue/S3QueueSettings.h>
 #include <Storages/S3Queue/S3QueueTableMetadata.h>
-#include <Storages/StorageS3.h>
 
 
 namespace DB
@@ -33,7 +32,7 @@ namespace
 
 
 S3QueueTableMetadata::S3QueueTableMetadata(
-    const StorageS3::Configuration & configuration,
+    const StorageObjectStorageConfiguration & configuration,
     const S3QueueSettings & engine_settings,
     const StorageInMemoryMetadata & storage_metadata)
 {
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index 30642869930..942ce7973ef 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -3,7 +3,7 @@
 #if USE_AWS_S3
 
 #include <Storages/S3Queue/S3QueueSettings.h>
-#include <Storages/StorageS3.h>
+#include <Storages/ObjectStorage/Configuration.h>
 #include <base/types.h>
 
 namespace DB
@@ -27,7 +27,10 @@ struct S3QueueTableMetadata
     UInt64 s3queue_processing_threads_num;
 
     S3QueueTableMetadata() = default;
-    S3QueueTableMetadata(const StorageS3::Configuration & configuration, const S3QueueSettings & engine_settings, const StorageInMemoryMetadata & storage_metadata);
+    S3QueueTableMetadata(
+        const StorageObjectStorageConfiguration & configuration,
+        const S3QueueSettings & engine_settings,
+        const StorageInMemoryMetadata & storage_metadata);
 
     void read(const String & metadata_str);
     static S3QueueTableMetadata parse(const String & metadata_str);
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 0723205b544..fa7132f705a 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -23,6 +23,7 @@
 #include <Storages/StorageSnapshot.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/prepareReadingFromFormat.h>
+#include <Storages/ObjectStorage/S3Configuration.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <filesystem>
 
@@ -50,11 +51,6 @@ namespace ErrorCodes
 
 namespace
 {
-    bool containsGlobs(const S3::URI & url)
-    {
-        return url.key.find_first_of("*?{") != std::string::npos;
-    }
-
     std::string chooseZooKeeperPath(const StorageID & table_id, const Settings & settings, const S3QueueSettings & s3queue_settings)
     {
         std::string zk_path_prefix = settings.s3queue_default_zookeeper_path.value;
@@ -98,7 +94,7 @@ namespace
 
 StorageS3Queue::StorageS3Queue(
     std::unique_ptr<S3QueueSettings> s3queue_settings_,
-    const StorageS3::Configuration & configuration_,
+    const ConfigurationPtr configuration_,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
@@ -116,29 +112,29 @@ StorageS3Queue::StorageS3Queue(
     , reschedule_processing_interval_ms(s3queue_settings->s3queue_polling_min_timeout_ms)
     , log(getLogger("StorageS3Queue (" + table_id_.table_name + ")"))
 {
-    if (configuration.url.key.empty())
+    if (configuration->getPath().empty())
     {
-        configuration.url.key = "/*";
+        configuration->setPath("/*");
     }
-    else if (configuration.url.key.ends_with('/'))
+    else if (configuration->getPath().ends_with('/'))
     {
-        configuration.url.key += '*';
+        configuration->setPath(configuration->getPath() + '*');
     }
-    else if (!containsGlobs(configuration.url))
+    else if (!configuration->isPathWithGlobs())
     {
         throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "S3Queue url must either end with '/' or contain globs");
     }
 
     checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef());
 
-    configuration.update(context_);
-    FormatFactory::instance().checkFormatName(configuration.format);
-    context_->getRemoteHostFilter().checkURL(configuration.url.uri);
+    object_storage = configuration->createOrUpdateObjectStorage(context_);
+    FormatFactory::instance().checkFormatName(configuration->format);
+    configuration->check(context_);
 
     StorageInMemoryMetadata storage_metadata;
     if (columns_.empty())
     {
-        auto columns = StorageS3::getTableStructureFromDataImpl(configuration, format_settings, context_);
+        auto columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context_);
         storage_metadata.setColumns(columns);
     }
     else
@@ -226,7 +222,7 @@ void StorageS3Queue::drop()
 
 bool StorageS3Queue::supportsSubsetOfColumns(const ContextPtr & context_) const
 {
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context_, format_settings);
+    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context_, format_settings);
 }
 
 class ReadFromS3Queue : public SourceStepWithFilter
@@ -345,38 +341,20 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
     size_t max_block_size,
     ContextPtr local_context)
 {
-    auto configuration_snapshot = updateConfigurationAndGetCopy(local_context);
-
-    auto internal_source = std::make_unique<StorageS3Source>(
-        info, configuration.format, getName(), local_context, format_settings,
+    auto internal_source = std::make_unique<Source>(
+        getName(),
+        object_storage,
+        configuration,
+        info,
+        format_settings,
+        local_context,
         max_block_size,
-        configuration_snapshot.request_settings,
-        configuration_snapshot.compression_method,
-        configuration_snapshot.client,
-        configuration_snapshot.url.bucket,
-        configuration_snapshot.url.version_id,
-        configuration_snapshot.url.uri.getHost() + std::to_string(configuration_snapshot.url.uri.getPort()),
-        file_iterator, local_context->getSettingsRef().max_download_threads, false);
+        file_iterator,
+        false);
 
-    auto file_deleter = [this, bucket = configuration_snapshot.url.bucket, client = configuration_snapshot.client, blob_storage_log = BlobStorageLogWriter::create()](const std::string & path) mutable
+    auto file_deleter = [=, this](const std::string & path) mutable
     {
-        S3::DeleteObjectRequest request;
-        request.WithKey(path).WithBucket(bucket);
-        auto outcome = client->DeleteObject(request);
-        if (blob_storage_log)
-            blob_storage_log->addEvent(
-                BlobStorageLogElement::EventType::Delete,
-                bucket, path, {}, 0, outcome.IsSuccess() ? nullptr : &outcome.GetError());
-
-        if (!outcome.IsSuccess())
-        {
-            const auto & err = outcome.GetError();
-            LOG_ERROR(log, "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
-        }
-        else
-        {
-            LOG_TRACE(log, "Object with path {} was removed from S3", path);
-        }
+        object_storage->removeObject(StoredObject(path));
     };
     auto s3_queue_log = s3queue_settings->s3queue_enable_logging_to_s3queue_log ? local_context->getS3QueueLog() : nullptr;
     return std::make_shared<StorageS3QueueSource>(
@@ -470,7 +448,6 @@ bool StorageS3Queue::streamToViews()
 
     auto s3queue_context = Context::createCopy(getContext());
     s3queue_context->makeQueryContext();
-    auto query_configuration = updateConfigurationAndGetCopy(s3queue_context);
 
     // Create a stream for each consumer and join them in a union stream
     // Only insert into dependent views and expect that input blocks contain virtual columns
@@ -505,12 +482,6 @@ bool StorageS3Queue::streamToViews()
     return rows > 0;
 }
 
-StorageS3Queue::Configuration StorageS3Queue::updateConfigurationAndGetCopy(ContextPtr local_context)
-{
-    configuration.update(local_context);
-    return configuration;
-}
-
 zkutil::ZooKeeperPtr StorageS3Queue::getZooKeeper() const
 {
     return getContext()->getZooKeeper();
@@ -530,7 +501,7 @@ void StorageS3Queue::createOrCheckMetadata(const StorageInMemoryMetadata & stora
         }
         else
         {
-            std::string metadata = S3QueueTableMetadata(configuration, *s3queue_settings, storage_metadata).toString();
+            std::string metadata = S3QueueTableMetadata(*configuration, *s3queue_settings, storage_metadata).toString();
             requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
             requests.emplace_back(zkutil::makeCreateRequest(zk_path / "processed", "", zkutil::CreateMode::Persistent));
             requests.emplace_back(zkutil::makeCreateRequest(zk_path / "failed", "", zkutil::CreateMode::Persistent));
@@ -568,7 +539,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
     String metadata_str = zookeeper->get(fs::path(zookeeper_prefix) / "metadata");
     auto metadata_from_zk = S3QueueTableMetadata::parse(metadata_str);
 
-    S3QueueTableMetadata old_metadata(configuration, *s3queue_settings, storage_metadata);
+    S3QueueTableMetadata old_metadata(*configuration, *s3queue_settings, storage_metadata);
     old_metadata.checkEquals(metadata_from_zk);
 
     auto columns_from_zk = ColumnsDescription::parse(metadata_from_zk.columns);
@@ -584,14 +555,25 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
     }
 }
 
-std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
+std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr , const ActionsDAG::Node * predicate)
 {
-    auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
-        *configuration.client, configuration.url, predicate, virtual_columns, local_context,
-        /* read_keys */nullptr, configuration.request_settings);
+    auto glob_iterator = std::make_unique<Source::GlobIterator>(object_storage, configuration, predicate, virtual_columns, nullptr);
+
     return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called);
 }
 
+static void initializeConfiguration(
+    StorageObjectStorageConfiguration & configuration,
+    ASTs & engine_args,
+    ContextPtr local_context,
+    bool with_table_structure)
+{
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+        configuration.fromNamedCollection(*named_collection);
+    else
+        configuration.fromAST(engine_args, local_context, with_table_structure);
+}
+
 void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
 {
     factory.registerStorage(
@@ -602,7 +584,8 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
             if (engine_args.empty())
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
 
-            auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
+            auto configuration = std::make_shared<StorageS3Configuration>();
+            initializeConfiguration(*configuration, args.engine_args, args.getContext(), false);
 
             // Use format settings from global server context + settings from
             // the SETTINGS clause of the create query. Settings from current
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index fd3b4bb4914..88f9bd65093 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -8,7 +8,7 @@
 #include <Storages/IStorage.h>
 #include <Storages/S3Queue/S3QueueSettings.h>
 #include <Storages/S3Queue/S3QueueSource.h>
-#include <Storages/StorageS3.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Interpreters/Context.h>
 #include <IO/S3/BlobStorageLogWriter.h>
 #include <Storages/StorageFactory.h>
@@ -26,11 +26,13 @@ class S3QueueFilesMetadata;
 class StorageS3Queue : public IStorage, WithContext
 {
 public:
-    using Configuration = typename StorageS3::Configuration;
+    using Storage = StorageObjectStorage<S3StorageSettings>;
+    using Source = StorageObjectStorageSource<S3StorageSettings>;
+    using ConfigurationPtr = Storage::ConfigurationPtr;
 
     StorageS3Queue(
         std::unique_ptr<S3QueueSettings> s3queue_settings_,
-        const Configuration & configuration_,
+        ConfigurationPtr configuration_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
@@ -53,8 +55,6 @@ public:
 
     NamesAndTypesList getVirtuals() const override { return virtual_columns; }
 
-    const auto & getFormatName() const { return configuration.format; }
-
     const fs::path & getZooKeeperPath() const { return zk_path; }
 
     zkutil::ZooKeeperPtr getZooKeeper() const;
@@ -68,7 +68,8 @@ private:
     const S3QueueAction after_processing;
 
     std::shared_ptr<S3QueueFilesMetadata> files_metadata;
-    Configuration configuration;
+    ConfigurationPtr configuration;
+    ObjectStoragePtr object_storage;
 
     const std::optional<FormatSettings> format_settings;
     NamesAndTypesList virtual_columns;
@@ -103,7 +104,6 @@ private:
 
     void createOrCheckMetadata(const StorageInMemoryMetadata & storage_metadata);
     void checkTableStructure(const String & zookeeper_prefix, const StorageInMemoryMetadata & storage_metadata);
-    Configuration updateConfigurationAndGetCopy(ContextPtr local_context);
 };
 
 }
diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp
deleted file mode 100644
index c09db0bfb7b..00000000000
--- a/src/Storages/StorageAzureBlob.cpp
+++ /dev/null
@@ -1,1478 +0,0 @@
-#include <Storages/StorageAzureBlob.h>
-
-#if USE_AZURE_BLOB_STORAGE
-#include <Formats/FormatFactory.h>
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Common/re2.h>
-
-#include <IO/SharedThreadPools.h>
-
-#include <Parsers/ASTCreateQuery.h>
-#include <Formats/ReadSchemaUtils.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <azure/storage/common/storage_credential.hpp>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Sources/ConstChunkGenerator.h>
-#include <Processors/Sources/NullSource.h>
-#include <Processors/QueryPlan/QueryPlan.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-
-#include <Storages/StorageFactory.h>
-#include <Storages/StorageSnapshot.h>
-#include <Storages/PartitionedSink.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/StorageURL.h>
-#include <Storages/NamedCollectionsHelpers.h>
-#include <Common/parseGlobs.h>
-#include <Disks/ObjectStorages/ObjectStorageIterator.h>
-#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
-
-#include <QueryPipeline/QueryPipelineBuilder.h>
-#include <QueryPipeline/Pipe.h>
-
-#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
-#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
-
-using namespace Azure::Storage::Blobs;
-
-namespace CurrentMetrics
-{
-    extern const Metric ObjectStorageAzureThreads;
-    extern const Metric ObjectStorageAzureThreadsActive;
-    extern const Metric ObjectStorageAzureThreadsScheduled;
-}
-
-namespace ProfileEvents
-{
-    extern const Event EngineFileLikeReadFiles;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_ARGUMENTS;
-    extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_COMPILE_REGEXP;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int LOGICAL_ERROR;
-    extern const int NOT_IMPLEMENTED;
-
-}
-
-namespace
-{
-
-const std::unordered_set<std::string_view> required_configuration_keys = {
-    "blob_path",
-    "container",
-};
-
-const std::unordered_set<std::string_view> optional_configuration_keys = {
-    "format",
-    "compression",
-    "structure",
-    "compression_method",
-    "account_name",
-    "account_key",
-    "connection_string",
-    "storage_account_url",
-};
-
-bool isConnectionString(const std::string & candidate)
-{
-    return !candidate.starts_with("http");
-}
-
-}
-
-void StorageAzureBlob::processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection)
-{
-    validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
-
-    if (collection.has("connection_string"))
-    {
-        configuration.connection_url = collection.get<String>("connection_string");
-        configuration.is_connection_string = true;
-    }
-
-    if (collection.has("storage_account_url"))
-    {
-        configuration.connection_url = collection.get<String>("storage_account_url");
-        configuration.is_connection_string = false;
-    }
-
-    configuration.container = collection.get<String>("container");
-    configuration.blob_path = collection.get<String>("blob_path");
-
-    if (collection.has("account_name"))
-        configuration.account_name = collection.get<String>("account_name");
-
-    if (collection.has("account_key"))
-        configuration.account_key = collection.get<String>("account_key");
-
-    configuration.structure = collection.getOrDefault<String>("structure", "auto");
-    configuration.format = collection.getOrDefault<String>("format", configuration.format);
-    configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
-}
-
-
-StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine_args, ContextPtr local_context)
-{
-    StorageAzureBlob::Configuration configuration;
-
-    /// Supported signatures:
-    ///
-    /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])
-    ///
-
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-    {
-        processNamedCollectionResult(configuration, *named_collection);
-
-        configuration.blobs_paths = {configuration.blob_path};
-
-        if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
-
-        return configuration;
-    }
-
-    if (engine_args.size() < 3 || engine_args.size() > 7)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                        "Storage AzureBlobStorage requires 3 to 7 arguments: "
-                        "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression])");
-
-    for (auto & engine_arg : engine_args)
-        engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
-
-    std::unordered_map<std::string_view, size_t> engine_args_to_idx;
-
-    configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
-    configuration.is_connection_string = isConnectionString(configuration.connection_url);
-
-    configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
-    configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
-
-    auto is_format_arg = [] (const std::string & s) -> bool
-    {
-        return s == "auto" || FormatFactory::instance().getAllFormats().contains(s);
-    };
-
-    if (engine_args.size() == 4)
-    {
-        //'c1 UInt64, c2 UInt64
-        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-        if (is_format_arg(fourth_arg))
-        {
-            configuration.format = fourth_arg;
-        }
-        else
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key");
-        }
-    }
-    else if (engine_args.size() == 5)
-    {
-        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-        if (is_format_arg(fourth_arg))
-        {
-            configuration.format = fourth_arg;
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
-        }
-        else
-        {
-            configuration.account_name = fourth_arg;
-            configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-        }
-    }
-    else if (engine_args.size() == 6)
-    {
-        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-        if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
-        }
-        else
-        {
-            configuration.account_name = fourth_arg;
-
-            configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
-            if (!is_format_arg(sixth_arg))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
-            configuration.format = sixth_arg;
-        }
-    }
-    else if (engine_args.size() == 7)
-    {
-        auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-        if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
-        }
-        else
-        {
-            configuration.account_name = fourth_arg;
-            configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
-            if (!is_format_arg(sixth_arg))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
-            configuration.format = sixth_arg;
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
-        }
-    }
-
-    configuration.blobs_paths = {configuration.blob_path};
-
-    if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
-
-    return configuration;
-}
-
-
-AzureObjectStorage::SettingsPtr StorageAzureBlob::createSettings(ContextPtr local_context)
-{
-    const auto & context_settings = local_context->getSettingsRef();
-    auto settings_ptr = std::make_unique<AzureObjectStorageSettings>();
-    settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size;
-    settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries;
-    settings_ptr->list_object_keys_size = static_cast<int32_t>(context_settings.azure_list_object_keys_size);
-
-    return settings_ptr;
-}
-
-void registerStorageAzureBlob(StorageFactory & factory)
-{
-    factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args)
-    {
-        auto & engine_args = args.engine_args;
-        if (engine_args.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
-
-        auto configuration = StorageAzureBlob::getConfiguration(engine_args, args.getLocalContext());
-        auto client = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
-        // Use format settings from global server context + settings from
-        // the SETTINGS clause of the create query. Settings from current
-        // session and user are ignored.
-        std::optional<FormatSettings> format_settings;
-        if (args.storage_def->settings)
-        {
-            FormatFactorySettings user_format_settings;
-
-            // Apply changed settings from global context, but ignore the
-            // unknown ones, because we only have the format settings here.
-            const auto & changes = args.getContext()->getSettingsRef().changes();
-            for (const auto & change : changes)
-            {
-                if (user_format_settings.has(change.name))
-                    user_format_settings.set(change.name, change.value);
-            }
-
-            // Apply changes from SETTINGS clause, with validation.
-            user_format_settings.applyChanges(args.storage_def->settings->changes);
-            format_settings = getFormatSettings(args.getContext(), user_format_settings);
-        }
-        else
-        {
-            format_settings = getFormatSettings(args.getContext());
-        }
-
-        ASTPtr partition_by;
-        if (args.storage_def->partition_by)
-            partition_by = args.storage_def->partition_by->clone();
-
-        auto settings = StorageAzureBlob::createSettings(args.getContext());
-
-        return std::make_shared<StorageAzureBlob>(
-            std::move(configuration),
-            std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings),configuration.container),
-            args.getContext(),
-            args.table_id,
-            args.columns,
-            args.constraints,
-            args.comment,
-            format_settings,
-            /* distributed_processing */ false,
-            partition_by);
-    },
-    {
-        .supports_settings = true,
-        .supports_sort_order = true, // for partition by
-        .supports_schema_inference = true,
-        .source_access_type = AccessType::AZURE,
-    });
-}
-
-static bool containerExists(std::unique_ptr<BlobServiceClient> &blob_service_client, std::string container_name)
-{
-    Azure::Storage::Blobs::ListBlobContainersOptions options;
-    options.Prefix = container_name;
-    options.PageSizeHint = 1;
-
-    auto containers_list_response = blob_service_client->ListBlobContainers(options);
-    auto containers_list = containers_list_response.BlobContainers;
-
-    for (const auto & container : containers_list)
-    {
-        if (container_name == container.Name)
-            return true;
-    }
-    return false;
-}
-
-AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration configuration, bool is_read_only)
-{
-    AzureClientPtr result;
-
-    if (configuration.is_connection_string)
-    {
-        std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(configuration.connection_url));
-        result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(configuration.connection_url, configuration.container));
-        bool container_exists = containerExists(blob_service_client,configuration.container);
-
-        if (!container_exists)
-        {
-            if (is_read_only)
-                throw Exception(
-                    ErrorCodes::DATABASE_ACCESS_DENIED,
-                    "AzureBlobStorage container does not exist '{}'",
-                    configuration.container);
-
-            try
-            {
-                result->CreateIfNotExists();
-            } catch (const Azure::Storage::StorageException & e)
-            {
-                if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
-                    && e.ReasonPhrase == "The specified container already exists."))
-                {
-                    throw;
-                }
-            }
-        }
-    }
-    else
-    {
-        std::shared_ptr<Azure::Storage::StorageSharedKeyCredential> storage_shared_key_credential;
-        if (configuration.account_name.has_value() && configuration.account_key.has_value())
-        {
-            storage_shared_key_credential
-                = std::make_shared<Azure::Storage::StorageSharedKeyCredential>(*configuration.account_name, *configuration.account_key);
-        }
-
-        std::unique_ptr<BlobServiceClient> blob_service_client;
-        if (storage_shared_key_credential)
-        {
-            blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url, storage_shared_key_credential);
-        }
-        else
-        {
-            blob_service_client = std::make_unique<BlobServiceClient>(configuration.connection_url);
-        }
-
-        bool container_exists = containerExists(blob_service_client,configuration.container);
-
-        std::string final_url;
-        size_t pos = configuration.connection_url.find('?');
-        if (pos != std::string::npos)
-        {
-            auto url_without_sas = configuration.connection_url.substr(0, pos);
-            final_url = url_without_sas + (url_without_sas.back() == '/' ? "" : "/") + configuration.container
-                + configuration.connection_url.substr(pos);
-        }
-        else
-            final_url
-                = configuration.connection_url + (configuration.connection_url.back() == '/' ? "" : "/") + configuration.container;
-
-        if (container_exists)
-        {
-            if (storage_shared_key_credential)
-                result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
-            else
-                result = std::make_unique<BlobContainerClient>(final_url);
-        }
-        else
-        {
-            if (is_read_only)
-                throw Exception(
-                    ErrorCodes::DATABASE_ACCESS_DENIED,
-                    "AzureBlobStorage container does not exist '{}'",
-                    configuration.container);
-            try
-            {
-                result = std::make_unique<BlobContainerClient>(blob_service_client->CreateBlobContainer(configuration.container).Value);
-            } catch (const Azure::Storage::StorageException & e)
-            {
-                if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
-                      && e.ReasonPhrase == "The specified container already exists.")
-                {
-                    if (storage_shared_key_credential)
-                        result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
-                    else
-                        result = std::make_unique<BlobContainerClient>(final_url);
-                }
-                else
-                {
-                    throw;
-                }
-            }
-        }
-    }
-
-    return result;
-}
-
-Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const
-{
-    if (!is_connection_string)
-        return Poco::URI(connection_url);
-
-    auto parsed_connection_string = Azure::Storage::_internal::ParseConnectionString(connection_url);
-    return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl());
-}
-
-
-StorageAzureBlob::StorageAzureBlob(
-    const Configuration & configuration_,
-    std::unique_ptr<AzureObjectStorage> && object_storage_,
-    ContextPtr context,
-    const StorageID & table_id_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    std::optional<FormatSettings> format_settings_,
-    bool distributed_processing_,
-    ASTPtr partition_by_)
-    : IStorage(table_id_)
-    , name("AzureBlobStorage")
-    , configuration(configuration_)
-    , object_storage(std::move(object_storage_))
-    , distributed_processing(distributed_processing_)
-    , format_settings(format_settings_)
-    , partition_by(partition_by_)
-{
-    FormatFactory::instance().checkFormatName(configuration.format);
-    context->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.getConnectionURL());
-
-    StorageInMemoryMetadata storage_metadata;
-    if (columns_.empty())
-    {
-        auto columns = getTableStructureFromData(object_storage.get(), configuration, format_settings, context, distributed_processing);
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        /// We don't allow special columns in File storage.
-        if (!columns_.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine AzureBlobStorage doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
-        storage_metadata.setColumns(columns_);
-    }
-
-    storage_metadata.setConstraints(constraints_);
-    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
-
-    StoredObjects objects;
-    for (const auto & key : configuration.blobs_paths)
-        objects.emplace_back(key);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-void StorageAzureBlob::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
-{
-    if (configuration.withGlobs())
-    {
-        throw Exception(
-            ErrorCodes::DATABASE_ACCESS_DENIED,
-            "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode",
-            configuration.blob_path);
-    }
-
-    StoredObjects objects;
-    for (const auto & key : configuration.blobs_paths)
-        objects.emplace_back(key);
-
-    object_storage->removeObjectsIfExist(objects);
-}
-
-namespace
-{
-
-class StorageAzureBlobSink : public SinkToStorage
-{
-public:
-    StorageAzureBlobSink(
-        const String & format,
-        const Block & sample_block_,
-        ContextPtr context,
-        std::optional<FormatSettings> format_settings_,
-        const CompressionMethod compression_method,
-        AzureObjectStorage * object_storage,
-        const String & blob_path)
-        : SinkToStorage(sample_block_)
-        , sample_block(sample_block_)
-        , format_settings(format_settings_)
-    {
-        StoredObject object(blob_path);
-        const auto & settings = context->getSettingsRef();
-        write_buf = wrapWriteBufferWithCompressionMethod(
-            object_storage->writeObject(object, WriteMode::Rewrite),
-            compression_method,
-            static_cast<int>(settings.output_format_compression_level),
-            static_cast<int>(settings.output_format_compression_zstd_window_log));
-        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
-    }
-
-    String getName() const override { return "StorageAzureBlobSink"; }
-
-    void consume(Chunk chunk) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        if (cancelled)
-            return;
-        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
-    }
-
-    void onCancel() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-        cancelled = true;
-    }
-
-    void onException(std::exception_ptr exception) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        try
-        {
-            std::rethrow_exception(exception);
-        }
-        catch (...)
-        {
-            /// An exception context is needed to proper delete write buffers without finalization
-            release();
-        }
-    }
-
-    void onFinish() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-    }
-
-private:
-    void finalize()
-    {
-        if (!writer)
-            return;
-
-        try
-        {
-            writer->finalize();
-            writer->flush();
-            write_buf->finalize();
-        }
-        catch (...)
-        {
-            /// Stop ParallelFormattingOutputFormat correctly.
-            release();
-            throw;
-        }
-    }
-
-    void release()
-    {
-        writer.reset();
-        write_buf->finalize();
-    }
-
-    Block sample_block;
-    std::optional<FormatSettings> format_settings;
-    std::unique_ptr<WriteBuffer> write_buf;
-    OutputFormatPtr writer;
-    bool cancelled = false;
-    std::mutex cancel_mutex;
-};
-
-class PartitionedStorageAzureBlobSink : public PartitionedSink
-{
-public:
-    PartitionedStorageAzureBlobSink(
-        const ASTPtr & partition_by,
-        const String & format_,
-        const Block & sample_block_,
-        ContextPtr context_,
-        std::optional<FormatSettings> format_settings_,
-        const CompressionMethod compression_method_,
-        AzureObjectStorage * object_storage_,
-        const String & blob_)
-        : PartitionedSink(partition_by, context_, sample_block_)
-        , format(format_)
-        , sample_block(sample_block_)
-        , context(context_)
-        , compression_method(compression_method_)
-        , object_storage(object_storage_)
-        , blob(blob_)
-        , format_settings(format_settings_)
-    {
-    }
-
-    SinkPtr createSinkForPartition(const String & partition_id) override
-    {
-        auto partition_key = replaceWildcards(blob, partition_id);
-        validateKey(partition_key);
-
-        return std::make_shared<StorageAzureBlobSink>(
-            format,
-            sample_block,
-            context,
-            format_settings,
-            compression_method,
-            object_storage,
-            partition_key
-        );
-    }
-
-private:
-    const String format;
-    const Block sample_block;
-    const ContextPtr context;
-    const CompressionMethod compression_method;
-    AzureObjectStorage * object_storage;
-    const String blob;
-    const std::optional<FormatSettings> format_settings;
-
-    ExpressionActionsPtr partition_by_expr;
-
-    static void validateKey(const String & str)
-    {
-        validatePartitionKey(str, true);
-    }
-};
-
-}
-
-class ReadFromAzureBlob : public SourceStepWithFilter
-{
-public:
-    std::string getName() const override { return "ReadFromAzureBlob"; }
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
-    void applyFilters() override;
-
-    ReadFromAzureBlob(
-        Block sample_block,
-        std::shared_ptr<StorageAzureBlob> storage_,
-        ReadFromFormatInfo info_,
-        const bool need_only_count_,
-        ContextPtr context_,
-        size_t max_block_size_,
-        size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
-        , storage(std::move(storage_))
-        , info(std::move(info_))
-        , need_only_count(need_only_count_)
-        , context(std::move(context_))
-        , max_block_size(max_block_size_)
-        , num_streams(num_streams_)
-    {
-    }
-
-private:
-    std::shared_ptr<StorageAzureBlob> storage;
-    ReadFromFormatInfo info;
-    const bool need_only_count;
-
-    ContextPtr context;
-
-    size_t max_block_size;
-    const size_t num_streams;
-
-    std::shared_ptr<StorageAzureBlobSource::IIterator> iterator_wrapper;
-
-    void createIterator(const ActionsDAG::Node * predicate);
-};
-
-void ReadFromAzureBlob::applyFilters()
-{
-    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
-    const ActionsDAG::Node * predicate = nullptr;
-    if (filter_actions_dag)
-        predicate = filter_actions_dag->getOutputs().at(0);
-
-    createIterator(predicate);
-}
-
-void StorageAzureBlob::read(
-    QueryPlan & query_plan,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr local_context,
-    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    if (partition_by && configuration.withWildcard())
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned Azure storage is not implemented yet");
-
-    auto this_ptr = std::static_pointer_cast<StorageAzureBlob>(shared_from_this());
-
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
-
-    auto reading = std::make_unique<ReadFromAzureBlob>(
-        read_from_format_info.source_header,
-        std::move(this_ptr),
-        std::move(read_from_format_info),
-        need_only_count,
-        local_context,
-        max_block_size,
-        num_streams);
-
-    query_plan.addStep(std::move(reading));
-}
-
-void ReadFromAzureBlob::createIterator(const ActionsDAG::Node * predicate)
-{
-    if (iterator_wrapper)
-        return;
-
-    const auto & configuration = storage->configuration;
-
-    if (storage->distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::ReadIterator>(context,
-            context->getReadTaskCallback());
-    }
-    else if (configuration.withGlobs())
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::GlobIterator>(
-            storage->object_storage.get(), configuration.container, configuration.blob_path,
-            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
-    }
-    else
-    {
-        iterator_wrapper = std::make_shared<StorageAzureBlobSource::KeysIterator>(
-            storage->object_storage.get(), configuration.container, configuration.blobs_paths,
-            predicate, storage->virtual_columns, context, nullptr, context->getFileProgressCallback());
-    }
-}
-
-void ReadFromAzureBlob::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
-{
-    createIterator(nullptr);
-
-    const auto & configuration = storage->configuration;
-    Pipes pipes;
-
-    for (size_t i = 0; i < num_streams; ++i)
-    {
-        pipes.emplace_back(std::make_shared<StorageAzureBlobSource>(
-            info,
-            configuration.format,
-            getName(),
-            context,
-            storage->format_settings,
-            max_block_size,
-            configuration.compression_method,
-            storage->object_storage.get(),
-            configuration.container,
-            configuration.connection_url,
-            iterator_wrapper,
-            need_only_count));
-    }
-
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-    if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
-
-    for (const auto & processor : pipe.getProcessors())
-        processors.emplace_back(processor);
-
-    pipeline.init(std::move(pipe));
-}
-
-SinkToStoragePtr StorageAzureBlob::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
-{
-    auto sample_block = metadata_snapshot->getSampleBlock();
-    auto chosen_compression_method = chooseCompressionMethod(configuration.blobs_paths.back(), configuration.compression_method);
-    auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query);
-
-    auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr;
-    bool is_partitioned_implementation = partition_by_ast && configuration.withWildcard();
-
-    if (is_partitioned_implementation)
-    {
-        return std::make_shared<PartitionedStorageAzureBlobSink>(
-            partition_by_ast,
-            configuration.format,
-            sample_block,
-            local_context,
-            format_settings,
-            chosen_compression_method,
-            object_storage.get(),
-            configuration.blobs_paths.back());
-    }
-    else
-    {
-        if (configuration.withGlobs())
-            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
-                            "AzureBlobStorage key '{}' contains globs, so the table is in readonly mode", configuration.blob_path);
-
-        bool truncate_in_insert = local_context->getSettingsRef().azure_truncate_on_insert;
-
-        if (!truncate_in_insert && object_storage->exists(StoredObject(configuration.blob_path)))
-        {
-
-            if (local_context->getSettingsRef().azure_create_new_file_on_insert)
-            {
-                size_t index = configuration.blobs_paths.size();
-                const auto & first_key = configuration.blobs_paths[0];
-                auto pos = first_key.find_first_of('.');
-                String new_key;
-
-                do
-                {
-                    new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
-                    ++index;
-                }
-                while (object_storage->exists(StoredObject(new_key)));
-
-                configuration.blobs_paths.push_back(new_key);
-            }
-            else
-            {
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "Object in bucket {} with key {} already exists. "
-                    "If you want to overwrite it, enable setting azure_truncate_on_insert, if you "
-                    "want to create a new file on each insert, enable setting azure_create_new_file_on_insert",
-                    configuration.container, configuration.blobs_paths.back());
-            }
-        }
-
-        return std::make_shared<StorageAzureBlobSink>(
-            configuration.format,
-            sample_block,
-            local_context,
-            format_settings,
-            chosen_compression_method,
-            object_storage.get(),
-            configuration.blobs_paths.back());
-    }
-}
-
-NamesAndTypesList StorageAzureBlob::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-Names StorageAzureBlob::getVirtualColumnNames()
-{
-    return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
-}
-
-bool StorageAzureBlob::supportsPartitionBy() const
-{
-    return true;
-}
-
-bool StorageAzureBlob::supportsSubsetOfColumns(const ContextPtr & context) const
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings);
-}
-
-bool StorageAzureBlob::prefersLargeBlocks() const
-{
-    return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format);
-}
-
-bool StorageAzureBlob::parallelizeOutputAfterReading(ContextPtr context) const
-{
-    return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
-}
-
-StorageAzureBlobSource::GlobIterator::GlobIterator(
-    AzureObjectStorage * object_storage_,
-    const std::string & container_,
-    String blob_path_with_globs_,
-    const ActionsDAG::Node * predicate,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
-    RelativePathsWithMetadata * outer_blobs_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : IIterator(context_)
-    , object_storage(object_storage_)
-    , container(container_)
-    , blob_path_with_globs(blob_path_with_globs_)
-    , virtual_columns(virtual_columns_)
-    , outer_blobs(outer_blobs_)
-    , file_progress_callback(file_progress_callback_)
-{
-
-    const String key_prefix = blob_path_with_globs.substr(0, blob_path_with_globs.find_first_of("*?{"));
-
-    /// We don't have to list bucket, because there is no asterisks.
-    if (key_prefix.size() == blob_path_with_globs.size())
-    {
-        auto object_metadata = object_storage->getObjectMetadata(blob_path_with_globs);
-        blobs_with_metadata.emplace_back(
-            blob_path_with_globs,
-            object_metadata);
-        if (outer_blobs)
-            outer_blobs->emplace_back(blobs_with_metadata.back());
-        if (file_progress_callback)
-            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-        is_finished = true;
-        return;
-    }
-
-    object_storage_iterator = object_storage->iterate(key_prefix);
-
-    matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(blob_path_with_globs));
-
-    if (!matcher->ok())
-        throw Exception(
-            ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile regex from glob ({}): {}", blob_path_with_globs, matcher->error());
-
-    recursive = blob_path_with_globs == "/**" ? true : false;
-
-    filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-}
-
-RelativePathWithMetadata StorageAzureBlobSource::GlobIterator::next()
-{
-    std::lock_guard lock(next_mutex);
-
-    if (is_finished && index >= blobs_with_metadata.size())
-    {
-        return {};
-    }
-
-    bool need_new_batch = blobs_with_metadata.empty() || index >= blobs_with_metadata.size();
-
-    if (need_new_batch)
-    {
-        RelativePathsWithMetadata new_batch;
-        while (new_batch.empty())
-        {
-            auto result = object_storage_iterator->getCurrrentBatchAndScheduleNext();
-            if (result.has_value())
-            {
-                new_batch = result.value();
-            }
-            else
-            {
-                is_finished = true;
-                return {};
-            }
-
-            for (auto it = new_batch.begin(); it != new_batch.end();)
-            {
-                if (!recursive && !re2::RE2::FullMatch(it->relative_path, *matcher))
-                    it = new_batch.erase(it);
-                else
-                    ++it;
-            }
-        }
-
-        index = 0;
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(new_batch.size());
-            for (auto & path_with_metadata : new_batch)
-                paths.push_back(fs::path(container) / path_with_metadata.relative_path);
-
-            VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
-        }
-
-        if (outer_blobs)
-            outer_blobs->insert(outer_blobs->end(), new_batch.begin(), new_batch.end());
-
-        blobs_with_metadata = std::move(new_batch);
-        if (file_progress_callback)
-        {
-            for (const auto & [relative_path, info] : blobs_with_metadata)
-            {
-                file_progress_callback(FileProgress(0, info.size_bytes));
-            }
-        }
-    }
-
-    size_t current_index = index++;
-    if (current_index >= blobs_with_metadata.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata");
-    return blobs_with_metadata[current_index];
-}
-
-StorageAzureBlobSource::KeysIterator::KeysIterator(
-    AzureObjectStorage * object_storage_,
-    const std::string & container_,
-    const Strings & keys_,
-    const ActionsDAG::Node * predicate,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context_,
-    RelativePathsWithMetadata * outer_blobs,
-    std::function<void(FileProgress)> file_progress_callback)
-    : IIterator(context_)
-    , object_storage(object_storage_)
-    , container(container_)
-    , virtual_columns(virtual_columns_)
-{
-    Strings all_keys = keys_;
-
-    ASTPtr filter_ast;
-    if (!all_keys.empty())
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-
-    if (filter_dag)
-    {
-        Strings paths;
-        paths.reserve(all_keys.size());
-        for (const auto & key : all_keys)
-            paths.push_back(fs::path(container) / key);
-
-        VirtualColumnUtils::filterByPathOrFile(all_keys, paths, filter_dag, virtual_columns, getContext());
-    }
-
-    for (auto && key : all_keys)
-    {
-        ObjectMetadata object_metadata = object_storage->getObjectMetadata(key);
-        if (file_progress_callback)
-            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-        keys.emplace_back(key, object_metadata);
-    }
-
-    if (outer_blobs)
-        *outer_blobs = keys;
-}
-
-RelativePathWithMetadata StorageAzureBlobSource::KeysIterator::next()
-{
-    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
-    if (current_index >= keys.size())
-        return {};
-
-    return keys[current_index];
-}
-
-Chunk StorageAzureBlobSource::generate()
-{
-    while (true)
-    {
-        if (isCancelled() || !reader)
-        {
-            if (reader)
-                reader->cancel();
-            break;
-        }
-
-        Chunk chunk;
-        if (reader->pull(chunk))
-        {
-            UInt64 num_rows = chunk.getNumRows();
-            total_rows_in_file += num_rows;
-            size_t chunk_size = 0;
-            if (const auto * input_format = reader.getInputFormat())
-                chunk_size = input_format->getApproxBytesReadForChunk();
-            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
-                chunk,
-                requested_virtual_columns,
-                fs::path(container) / reader.getRelativePath(),
-                reader.getRelativePathWithMetadata().metadata.size_bytes);
-            return chunk;
-        }
-
-        if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(reader.getRelativePath(), total_rows_in_file);
-
-        total_rows_in_file = 0;
-
-        assert(reader_future.valid());
-        reader = reader_future.get();
-
-        if (!reader)
-            break;
-
-        /// Even if task is finished the thread may be not freed in pool.
-        /// So wait until it will be freed before scheduling a new task.
-        create_reader_pool.wait();
-        reader_future = createReaderAsync();
-    }
-
-    return {};
-}
-
-void StorageAzureBlobSource::addNumRowsToCache(const String & path, size_t num_rows)
-{
-    String source = fs::path(connection_url) / container / path;
-    auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    StorageAzureBlob::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
-}
-
-std::optional<size_t> StorageAzureBlobSource::tryGetNumRowsFromCache(const DB::RelativePathWithMetadata & path_with_metadata)
-{
-    String source = fs::path(connection_url) / container / path_with_metadata.relative_path;
-    auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        auto last_mod = path_with_metadata.metadata.last_modified;
-        if (last_mod)
-            return last_mod->epochTime();
-        return std::nullopt;
-    };
-
-    return StorageAzureBlob::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
-}
-
-StorageAzureBlobSource::StorageAzureBlobSource(
-    const ReadFromFormatInfo & info,
-    const String & format_,
-    String name_,
-    ContextPtr context_,
-    std::optional<FormatSettings> format_settings_,
-    UInt64 max_block_size_,
-    String compression_hint_,
-    AzureObjectStorage * object_storage_,
-    const String & container_,
-    const String & connection_url_,
-    std::shared_ptr<IIterator> file_iterator_,
-    bool need_only_count_)
-    :ISource(info.source_header, false)
-    , WithContext(context_)
-    , requested_columns(info.requested_columns)
-    , requested_virtual_columns(info.requested_virtual_columns)
-    , format(format_)
-    , name(std::move(name_))
-    , sample_block(info.format_header)
-    , format_settings(format_settings_)
-    , columns_desc(info.columns_description)
-    , max_block_size(max_block_size_)
-    , compression_hint(compression_hint_)
-    , object_storage(std::move(object_storage_))
-    , container(container_)
-    , connection_url(connection_url_)
-    , file_iterator(file_iterator_)
-    , need_only_count(need_only_count_)
-    , create_reader_pool(CurrentMetrics::ObjectStorageAzureThreads, CurrentMetrics::ObjectStorageAzureThreadsActive, CurrentMetrics::ObjectStorageAzureThreadsScheduled, 1)
-    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "AzureReader"))
-{
-    reader = createReader();
-    if (reader)
-        reader_future = createReaderAsync();
-}
-
-
-StorageAzureBlobSource::~StorageAzureBlobSource()
-{
-    create_reader_pool.wait();
-}
-
-String StorageAzureBlobSource::getName() const
-{
-    return name;
-}
-
-StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader()
-{
-    auto path_with_metadata = file_iterator->next();
-    if (path_with_metadata.relative_path.empty())
-        return {};
-
-    if (path_with_metadata.metadata.size_bytes == 0)
-        path_with_metadata.metadata = object_storage->getObjectMetadata(path_with_metadata.relative_path);
-
-    QueryPipelineBuilder builder;
-    std::shared_ptr<ISource> source;
-    std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files
-        ? tryGetNumRowsFromCache(path_with_metadata) : std::nullopt;
-    if (num_rows_from_cache)
-    {
-        /// We should not return single chunk with all number of rows,
-        /// because there is a chance that this chunk will be materialized later
-        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
-        /// Instead, we use special ConstChunkGenerator that will generate chunks
-        /// with max_block_size rows until total number of rows is reached.
-        source = std::make_shared<ConstChunkGenerator>(sample_block, *num_rows_from_cache, max_block_size);
-        builder.init(Pipe(source));
-    }
-    else
-    {
-        std::optional<size_t> max_parsing_threads;
-        if (need_only_count)
-            max_parsing_threads = 1;
-
-        auto compression_method = chooseCompressionMethod(path_with_metadata.relative_path, compression_hint);
-        read_buf = createAzureReadBuffer(path_with_metadata.relative_path, path_with_metadata.metadata.size_bytes);
-        auto input_format = FormatFactory::instance().getInput(
-                format, *read_buf, sample_block, getContext(), max_block_size,
-                format_settings, max_parsing_threads, std::nullopt,
-                /* is_remote_fs */ true, compression_method);
-
-        if (need_only_count)
-            input_format->needOnlyCount();
-
-        builder.init(Pipe(input_format));
-
-        if (columns_desc.hasDefaults())
-        {
-            builder.addSimpleTransform(
-                [&](const Block & header)
-                { return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
-        }
-
-        source = input_format;
-    }
-
-    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
-    /// from chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
-    });
-
-    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
-    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
-
-    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
-
-    return ReaderHolder{path_with_metadata, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)};
-}
-
-std::future<StorageAzureBlobSource::ReaderHolder> StorageAzureBlobSource::createReaderAsync()
-{
-    return create_reader_scheduler([this] { return createReader(); }, Priority{});
-}
-
-std::unique_ptr<ReadBuffer> StorageAzureBlobSource::createAzureReadBuffer(const String & key, size_t object_size)
-{
-    auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
-    read_settings.enable_filesystem_cache = false;
-    auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
-    const bool object_too_small = object_size <= 2 * download_buffer_size;
-
-    // Create a read buffer that will prefetch the first ~1 MB of the file.
-    // When reading lots of tiny files, this prefetching almost doubles the throughput.
-    // For bigger files, parallel reading is more useful.
-    if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
-    {
-        LOG_TRACE(log, "Downloading object of size {} from Azure with initial prefetch", object_size);
-        return createAsyncAzureReadBuffer(key, read_settings, object_size);
-    }
-
-    return object_storage->readObject(StoredObject(key), read_settings, {}, object_size);
-}
-
-namespace
-{
-    class ReadBufferIterator : public IReadBufferIterator, WithContext
-    {
-    public:
-        ReadBufferIterator(
-            const std::shared_ptr<StorageAzureBlobSource::IIterator> & file_iterator_,
-            AzureObjectStorage * object_storage_,
-            const StorageAzureBlob::Configuration & configuration_,
-            const std::optional<FormatSettings> & format_settings_,
-            const RelativePathsWithMetadata & read_keys_,
-            const ContextPtr & context_)
-            : WithContext(context_)
-            , file_iterator(file_iterator_)
-            , object_storage(object_storage_)
-            , configuration(configuration_)
-            , format_settings(format_settings_)
-            , read_keys(read_keys_)
-            , prev_read_keys_size(read_keys_.size())
-        {
-        }
-
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
-        {
-            /// For default mode check cached columns for currently read keys on first iteration.
-            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
-            {
-                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                    return {nullptr, cached_columns};
-            }
-
-            current_path_with_metadata = file_iterator->next();
-
-            if (current_path_with_metadata.relative_path.empty())
-            {
-                if (first)
-                    throw Exception(
-                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "Cannot extract table structure from {} format file, because there are no files with provided path "
-                        "in AzureBlobStorage. You must specify table structure manually", configuration.format);
-
-                return {nullptr, std::nullopt};
-            }
-
-            first = false;
-
-            /// AzureBlobStorage file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
-            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
-            {
-                auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
-                prev_read_keys_size = read_keys.size();
-                if (columns_from_cache)
-                    return {nullptr, columns_from_cache};
-            }
-            else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-            {
-                RelativePathsWithMetadata paths = {current_path_with_metadata};
-                if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
-                    return {nullptr, columns_from_cache};
-            }
-
-            first = false;
-            int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-            return {wrapReadBufferWithCompressionMethod(
-                object_storage->readObject(StoredObject(current_path_with_metadata.relative_path), getContext()->getReadSettings(), {}, current_path_with_metadata.metadata.size_bytes),
-                chooseCompressionMethod(current_path_with_metadata.relative_path, configuration.compression_method),
-                zstd_window_log_max), std::nullopt};
-        }
-
-        void setNumRowsToLastFile(size_t num_rows) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure)
-                return;
-
-            String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-            StorageAzureBlob::getSchemaCache(getContext()).addNumRows(key, num_rows);
-        }
-
-        void setSchemaToLastFile(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
-                return;
-
-            String source = fs::path(configuration.connection_url) / configuration.container / current_path_with_metadata.relative_path;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-            StorageAzureBlob::getSchemaCache(getContext()).addColumns(key, columns);
-        }
-
-        void setResultingSchema(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_azure
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
-                return;
-
-            auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
-            Strings sources;
-            sources.reserve(read_keys.size());
-            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket + '/' + elem.relative_path; });
-            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
-            StorageAzureBlob::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
-        }
-
-        String getLastFileName() const override { return current_path_with_metadata.relative_path; }
-
-    private:
-        std::optional<ColumnsDescription> tryGetColumnsFromCache(const RelativePathsWithMetadata::const_iterator & begin, const RelativePathsWithMetadata::const_iterator & end)
-        {
-            auto & schema_cache = StorageAzureBlob::getSchemaCache(getContext());
-            for (auto it = begin; it < end; ++it)
-            {
-                auto get_last_mod_time = [&] -> std::optional<time_t>
-                {
-                    if (it->metadata.last_modified)
-                        return it->metadata.last_modified->epochTime();
-                    return std::nullopt;
-                };
-
-                auto host_and_bucket = configuration.connection_url + '/' + configuration.container;
-                String source = host_and_bucket + '/' + it->relative_path;
-                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
-            }
-
-            return std::nullopt;
-        }
-
-        std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
-        AzureObjectStorage * object_storage;
-        const StorageAzureBlob::Configuration & configuration;
-        const std::optional<FormatSettings> & format_settings;
-        const RelativePathsWithMetadata & read_keys;
-        size_t prev_read_keys_size;
-        RelativePathWithMetadata current_path_with_metadata;
-        bool first = true;
-    };
-}
-
-ColumnsDescription StorageAzureBlob::getTableStructureFromData(
-    AzureObjectStorage * object_storage,
-    const Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx,
-    bool distributed_processing)
-{
-    RelativePathsWithMetadata read_keys;
-    std::shared_ptr<StorageAzureBlobSource::IIterator> file_iterator;
-    if (distributed_processing)
-    {
-        file_iterator = std::make_shared<StorageAzureBlobSource::ReadIterator>(ctx,
-            ctx->getReadTaskCallback());
-    }
-    else if (configuration.withGlobs())
-    {
-        file_iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
-            object_storage, configuration.container, configuration.blob_path, nullptr, NamesAndTypesList{}, ctx, &read_keys);
-    }
-    else
-    {
-        file_iterator = std::make_shared<StorageAzureBlobSource::KeysIterator>(
-            object_storage, configuration.container, configuration.blobs_paths, nullptr, NamesAndTypesList{}, ctx, &read_keys);
-    }
-
-    ReadBufferIterator read_buffer_iterator(file_iterator, object_storage, configuration, format_settings, read_keys, ctx);
-    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
-}
-
-SchemaCache & StorageAzureBlob::getSchemaCache(const ContextPtr & ctx)
-{
-    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_azure", DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
-}
-
-
-std::unique_ptr<ReadBuffer> StorageAzureBlobSource::createAsyncAzureReadBuffer(
-    const String & key, const ReadSettings & read_settings, size_t object_size)
-{
-    auto modified_settings{read_settings};
-    modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size;
-    auto async_reader = object_storage->readObjects(StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, modified_settings);
-
-    async_reader->setReadUntilEnd();
-    if (read_settings.remote_fs_prefetch)
-        async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY);
-
-    return async_reader;
-}
-
-}
-
-#endif
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
deleted file mode 100644
index 6fc3c5ce592..00000000000
--- a/src/Storages/StorageAzureBlob.h
+++ /dev/null
@@ -1,339 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <Common/re2.h>
-#include <Storages/IStorage.h>
-#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
-#include <Storages/Cache/SchemaCache.h>
-#include <Storages/StorageConfiguration.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Storages/NamedCollectionsHelpers.h>
-#include <Storages/prepareReadingFromFormat.h>
-#include <Storages/SelectQueryInfo.h>
-
-namespace DB
-{
-
-class StorageAzureBlob : public IStorage
-{
-public:
-
-    using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
-    using AzureClientPtr = std::unique_ptr<Azure::Storage::Blobs::BlobContainerClient>;
-
-    struct Configuration : public StatelessTableEngineConfiguration
-    {
-        Configuration() = default;
-
-        String getPath() const { return blob_path; }
-
-        bool update(ContextPtr context);
-
-        void connect(ContextPtr context);
-
-        bool withGlobs() const { return blob_path.find_first_of("*?{") != std::string::npos; }
-
-        bool withWildcard() const
-        {
-            static const String PARTITION_ID_WILDCARD = "{_partition_id}";
-            return blobs_paths.back().find(PARTITION_ID_WILDCARD) != String::npos;
-        }
-
-        Poco::URI getConnectionURL() const;
-
-        std::string connection_url;
-        bool is_connection_string;
-
-        std::optional<std::string> account_name;
-        std::optional<std::string> account_key;
-
-        std::string container;
-        std::string blob_path;
-        std::vector<String> blobs_paths;
-    };
-
-    StorageAzureBlob(
-        const Configuration & configuration_,
-        std::unique_ptr<AzureObjectStorage> && object_storage_,
-        ContextPtr context_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & comment,
-        std::optional<FormatSettings> format_settings_,
-        bool distributed_processing_,
-        ASTPtr partition_by_);
-
-    static StorageAzureBlob::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
-    static AzureClientPtr createClient(StorageAzureBlob::Configuration configuration, bool is_read_only);
-
-    static AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
-
-    static void processNamedCollectionResult(StorageAzureBlob::Configuration & configuration, const NamedCollection & collection);
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    void read(
-        QueryPlan & query_plan,
-        const Names &,
-        const StorageSnapshotPtr &,
-        SelectQueryInfo &,
-        ContextPtr,
-        QueryProcessingStage::Enum,
-        size_t,
-        size_t) override;
-
-    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /* metadata_snapshot */, ContextPtr context, bool /*async_insert*/) override;
-
-    void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override;
-
-    NamesAndTypesList getVirtuals() const override;
-    static Names getVirtualColumnNames();
-
-    bool supportsPartitionBy() const override;
-
-    bool supportsSubcolumns() const override { return true; }
-
-    bool supportsSubsetOfColumns(const ContextPtr & context) const;
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-    bool prefersLargeBlocks() const override;
-
-    bool parallelizeOutputAfterReading(ContextPtr context) const override;
-
-    static SchemaCache & getSchemaCache(const ContextPtr & ctx);
-
-    static ColumnsDescription getTableStructureFromData(
-        AzureObjectStorage * object_storage,
-        const Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx,
-        bool distributed_processing = false);
-
-private:
-    friend class ReadFromAzureBlob;
-
-    std::string name;
-    Configuration configuration;
-    std::unique_ptr<AzureObjectStorage> object_storage;
-    NamesAndTypesList virtual_columns;
-
-    const bool distributed_processing;
-    std::optional<FormatSettings> format_settings;
-    ASTPtr partition_by;
-};
-
-class StorageAzureBlobSource : public ISource, WithContext
-{
-public:
-    class IIterator : public WithContext
-    {
-    public:
-        IIterator(ContextPtr context_):WithContext(context_) {}
-        virtual ~IIterator() = default;
-        virtual RelativePathWithMetadata next() = 0;
-
-        RelativePathWithMetadata operator ()() { return next(); }
-    };
-
-    class GlobIterator : public IIterator
-    {
-    public:
-        GlobIterator(
-            AzureObjectStorage * object_storage_,
-            const std::string & container_,
-            String blob_path_with_globs_,
-            const ActionsDAG::Node * predicate,
-            const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
-            RelativePathsWithMetadata * outer_blobs_,
-            std::function<void(FileProgress)> file_progress_callback_ = {});
-
-        RelativePathWithMetadata next() override;
-        ~GlobIterator() override = default;
-
-    private:
-        AzureObjectStorage * object_storage;
-        std::string container;
-        String blob_path_with_globs;
-        ActionsDAGPtr filter_dag;
-        NamesAndTypesList virtual_columns;
-
-        size_t index = 0;
-
-        RelativePathsWithMetadata blobs_with_metadata;
-        RelativePathsWithMetadata * outer_blobs;
-        ObjectStorageIteratorPtr object_storage_iterator;
-        bool recursive{false};
-
-        std::unique_ptr<re2::RE2> matcher;
-
-        void createFilterAST(const String & any_key);
-        bool is_finished = false;
-        std::mutex next_mutex;
-
-        std::function<void(FileProgress)> file_progress_callback;
-    };
-
-    class ReadIterator : public IIterator
-    {
-    public:
-        explicit ReadIterator(ContextPtr context_,
-                              const ReadTaskCallback & callback_)
-            : IIterator(context_), callback(callback_) { }
-        RelativePathWithMetadata next() override
-        {
-            return {callback(), {}};
-        }
-
-    private:
-        ReadTaskCallback callback;
-    };
-
-    class KeysIterator : public IIterator
-    {
-    public:
-        KeysIterator(
-            AzureObjectStorage * object_storage_,
-            const std::string & container_,
-            const Strings & keys_,
-            const ActionsDAG::Node * predicate,
-            const NamesAndTypesList & virtual_columns_,
-            ContextPtr context_,
-            RelativePathsWithMetadata * outer_blobs,
-            std::function<void(FileProgress)> file_progress_callback = {});
-
-        RelativePathWithMetadata next() override;
-        ~KeysIterator() override = default;
-
-    private:
-        AzureObjectStorage * object_storage;
-        std::string container;
-        RelativePathsWithMetadata keys;
-
-        ActionsDAGPtr filter_dag;
-        NamesAndTypesList virtual_columns;
-
-        std::atomic<size_t> index = 0;
-    };
-
-    StorageAzureBlobSource(
-        const ReadFromFormatInfo & info,
-        const String & format_,
-        String name_,
-        ContextPtr context_,
-        std::optional<FormatSettings> format_settings_,
-        UInt64 max_block_size_,
-        String compression_hint_,
-        AzureObjectStorage * object_storage_,
-        const String & container_,
-        const String & connection_url_,
-        std::shared_ptr<IIterator> file_iterator_,
-        bool need_only_count_);
-    ~StorageAzureBlobSource() override;
-
-    Chunk generate() override;
-
-    String getName() const override;
-
-private:
-    void addNumRowsToCache(const String & path, size_t num_rows);
-    std::optional<size_t> tryGetNumRowsFromCache(const RelativePathWithMetadata & path_with_metadata);
-
-    NamesAndTypesList requested_columns;
-    NamesAndTypesList requested_virtual_columns;
-    String format;
-    String name;
-    Block sample_block;
-    std::optional<FormatSettings> format_settings;
-    ColumnsDescription columns_desc;
-    UInt64 max_block_size;
-    String compression_hint;
-    AzureObjectStorage * object_storage;
-    String container;
-    String connection_url;
-    std::shared_ptr<IIterator> file_iterator;
-    bool need_only_count;
-    size_t total_rows_in_file = 0;
-
-    struct ReaderHolder
-    {
-    public:
-        ReaderHolder(
-            RelativePathWithMetadata relative_path_with_metadata_,
-            std::unique_ptr<ReadBuffer> read_buf_,
-            std::shared_ptr<ISource> source_,
-            std::unique_ptr<QueryPipeline> pipeline_,
-            std::unique_ptr<PullingPipelineExecutor> reader_)
-            : relative_path_with_metadata(std::move(relative_path_with_metadata_))
-            , read_buf(std::move(read_buf_))
-            , source(std::move(source_))
-            , pipeline(std::move(pipeline_))
-            , reader(std::move(reader_))
-        {
-        }
-
-        ReaderHolder() = default;
-        ReaderHolder(const ReaderHolder & other) = delete;
-        ReaderHolder & operator=(const ReaderHolder & other) = delete;
-
-        ReaderHolder(ReaderHolder && other) noexcept
-        {
-            *this = std::move(other);
-        }
-
-        ReaderHolder & operator=(ReaderHolder && other) noexcept
-        {
-            /// The order of destruction is important.
-            /// reader uses pipeline, pipeline uses read_buf.
-            reader = std::move(other.reader);
-            pipeline = std::move(other.pipeline);
-            source = std::move(other.source);
-            read_buf = std::move(other.read_buf);
-            relative_path_with_metadata = std::move(other.relative_path_with_metadata);
-            return *this;
-        }
-
-        explicit operator bool() const { return reader != nullptr; }
-        PullingPipelineExecutor * operator->() { return reader.get(); }
-        const PullingPipelineExecutor * operator->() const { return reader.get(); }
-        const String & getRelativePath() const { return relative_path_with_metadata.relative_path; }
-        const RelativePathWithMetadata & getRelativePathWithMetadata() const { return relative_path_with_metadata; }
-        const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
-
-    private:
-        RelativePathWithMetadata relative_path_with_metadata;
-        std::unique_ptr<ReadBuffer> read_buf;
-        std::shared_ptr<ISource> source;
-        std::unique_ptr<QueryPipeline> pipeline;
-        std::unique_ptr<PullingPipelineExecutor> reader;
-    };
-
-    ReaderHolder reader;
-
-    LoggerPtr log = getLogger("StorageAzureBlobSource");
-
-    ThreadPool create_reader_pool;
-    ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
-    std::future<ReaderHolder> reader_future;
-
-    /// Recreate ReadBuffer and Pipeline for each file.
-    ReaderHolder createReader();
-    std::future<ReaderHolder> createReaderAsync();
-
-    std::unique_ptr<ReadBuffer> createAzureReadBuffer(const String & key, size_t object_size);
-    std::unique_ptr<ReadBuffer> createAsyncAzureReadBuffer(
-        const String & key, const ReadSettings & read_settings, size_t object_size);
-};
-
-}
-
-#endif
diff --git a/src/Storages/StorageAzureBlobCluster.cpp b/src/Storages/StorageAzureBlobCluster.cpp
deleted file mode 100644
index 1d587512f38..00000000000
--- a/src/Storages/StorageAzureBlobCluster.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#include "Storages/StorageAzureBlobCluster.h"
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Processors/Sources/RemoteSource.h>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <QueryPipeline/RemoteQueryExecutor.h>
-#include <Storages/IStorage.h>
-#include <Storages/StorageURL.h>
-#include <Storages/StorageDictionary.h>
-#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Common/Exception.h>
-#include <Parsers/queryToString.h>
-#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
-
-#include <memory>
-#include <string>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-StorageAzureBlobCluster::StorageAzureBlobCluster(
-    const String & cluster_name_,
-    const StorageAzureBlob::Configuration & configuration_,
-    std::unique_ptr<AzureObjectStorage> && object_storage_,
-    const StorageID & table_id_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    ContextPtr context_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, getLogger("StorageAzureBlobCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
-    , configuration{configuration_}
-    , object_storage(std::move(object_storage_))
-{
-    context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.getConnectionURL());
-    StorageInMemoryMetadata storage_metadata;
-
-    if (columns_.empty())
-    {
-        /// `format_settings` is set to std::nullopt, because StorageAzureBlobCluster is used only as table function
-        auto columns = StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, /*format_settings=*/std::nullopt, context_, false);
-        storage_metadata.setColumns(columns);
-    }
-    else
-        storage_metadata.setColumns(columns_);
-
-    storage_metadata.setConstraints(constraints_);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-void StorageAzureBlobCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
-{
-    ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
-    if (!expression_list)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
-
-    TableFunctionAzureBlobStorageCluster::addColumnsStructureToArguments(expression_list->children, structure, context);
-}
-
-RemoteQueryExecutor::Extension StorageAzureBlobCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
-{
-    auto iterator = std::make_shared<StorageAzureBlobSource::GlobIterator>(
-        object_storage.get(), configuration.container, configuration.blob_path,
-        predicate, virtual_columns, context, nullptr);
-    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next().relative_path; });
-    return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
-}
-
-NamesAndTypesList StorageAzureBlobCluster::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-
-}
-
-#endif
diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h
deleted file mode 100644
index 2831b94f825..00000000000
--- a/src/Storages/StorageAzureBlobCluster.h
+++ /dev/null
@@ -1,56 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <memory>
-#include <optional>
-
-#include "Client/Connection.h"
-#include <Interpreters/Cluster.h>
-#include <Storages/IStorageCluster.h>
-#include <Storages/StorageAzureBlob.h>
-
-namespace DB
-{
-
-class Context;
-
-class StorageAzureBlobCluster : public IStorageCluster
-{
-public:
-    StorageAzureBlobCluster(
-        const String & cluster_name_,
-        const StorageAzureBlob::Configuration & configuration_,
-        std::unique_ptr<AzureObjectStorage> && object_storage_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        ContextPtr context_,
-        bool structure_argument_was_provided_);
-
-    std::string getName() const override { return "AzureBlobStorageCluster"; }
-
-    NamesAndTypesList getVirtuals() const override;
-
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
-
-    bool supportsSubcolumns() const override { return true; }
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-private:
-    void updateBeforeRead(const ContextPtr & /*context*/) override {}
-
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
-
-    StorageAzureBlob::Configuration configuration;
-    NamesAndTypesList virtual_columns;
-    std::unique_ptr<AzureObjectStorage> object_storage;
-};
-
-
-}
-
-#endif
diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
deleted file mode 100644
index 4fde6fd04f3..00000000000
--- a/src/Storages/StorageS3.cpp
+++ /dev/null
@@ -1,1905 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <Common/isValidUTF8.h>
-
-#include <IO/S3Common.h>
-#include <IO/S3/Requests.h>
-#include <IO/ParallelReadBuffer.h>
-#include <IO/SharedThreadPools.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/WriteHelpers.h>
-
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/evaluateConstantExpression.h>
-
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-
-#include <Storages/StorageFactory.h>
-#include <Storages/StorageS3.h>
-#include <Storages/StorageS3Settings.h>
-#include <Storages/StorageSnapshot.h>
-#include <Storages/PartitionedSink.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Storages/StorageURL.h>
-#include <Storages/NamedCollectionsHelpers.h>
-
-#include <Disks/IO/AsynchronousBoundedReadBuffer.h>
-#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
-#include <Disks/ObjectStorages/StoredObject.h>
-
-#include <IO/ReadBufferFromS3.h>
-#include <IO/WriteBufferFromS3.h>
-
-#include <Formats/FormatFactory.h>
-#include <Formats/ReadSchemaUtils.h>
-
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Processors/Sources/ConstChunkGenerator.h>
-#include <Processors/Sources/NullSource.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-
-
-#include <QueryPipeline/QueryPipelineBuilder.h>
-#include <Planner/Utils.h>
-#include <Analyzer/QueryNode.h>
-
-#include <DataTypes/DataTypeString.h>
-
-#include <aws/core/auth/AWSCredentials.h>
-
-#include <Common/NamedCollections/NamedCollections.h>
-#include <Common/parseGlobs.h>
-#include <Common/quoteString.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/ProfileEvents.h>
-
-#include <Processors/ISource.h>
-#include <Processors/Sinks/SinkToStorage.h>
-#include <QueryPipeline/Pipe.h>
-#include <filesystem>
-
-#include <boost/algorithm/string.hpp>
-
-#ifdef __clang__
-#  pragma clang diagnostic push
-#  pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
-#endif
-#include <re2/re2.h>
-#ifdef __clang__
-#  pragma clang diagnostic pop
-#endif
-
-namespace fs = std::filesystem;
-
-
-namespace CurrentMetrics
-{
-    extern const Metric StorageS3Threads;
-    extern const Metric StorageS3ThreadsActive;
-    extern const Metric StorageS3ThreadsScheduled;
-}
-
-namespace ProfileEvents
-{
-    extern const Event S3DeleteObjects;
-    extern const Event S3ListObjects;
-    extern const Event EngineFileLikeReadFiles;
-}
-
-namespace DB
-{
-
-static const std::unordered_set<std::string_view> required_configuration_keys = {
-    "url",
-};
-static const std::unordered_set<std::string_view> optional_configuration_keys = {
-    "format",
-    "compression",
-    "compression_method",
-    "structure",
-    "access_key_id",
-    "secret_access_key",
-    "session_token",
-    "filename",
-    "use_environment_credentials",
-    "max_single_read_retries",
-    "min_upload_part_size",
-    "upload_part_size_multiply_factor",
-    "upload_part_size_multiply_parts_count_threshold",
-    "max_single_part_upload_size",
-    "max_connections",
-    "expiration_window_seconds",
-    "no_sign_request"
-};
-
-namespace ErrorCodes
-{
-    extern const int CANNOT_PARSE_TEXT;
-    extern const int BAD_ARGUMENTS;
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int S3_ERROR;
-    extern const int UNEXPECTED_EXPRESSION;
-    extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int NOT_IMPLEMENTED;
-    extern const int CANNOT_COMPILE_REGEXP;
-    extern const int FILE_DOESNT_EXIST;
-}
-
-
-class ReadFromStorageS3Step : public SourceStepWithFilter
-{
-public:
-    std::string getName() const override { return "ReadFromStorageS3Step"; }
-
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
-
-    void applyFilters() override;
-
-    ReadFromStorageS3Step(
-        Block sample_block,
-        const Names & column_names_,
-        StorageSnapshotPtr storage_snapshot_,
-        StorageS3 & storage_,
-        ReadFromFormatInfo read_from_format_info_,
-        bool need_only_count_,
-        ContextPtr context_,
-        size_t max_block_size_,
-        size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)})
-        , column_names(column_names_)
-        , storage_snapshot(std::move(storage_snapshot_))
-        , storage(storage_)
-        , read_from_format_info(std::move(read_from_format_info_))
-        , need_only_count(need_only_count_)
-        , local_context(std::move(context_))
-        , max_block_size(max_block_size_)
-        , num_streams(num_streams_)
-    {
-        query_configuration = storage.updateConfigurationAndGetCopy(local_context);
-        virtual_columns = storage.getVirtuals();
-    }
-
-private:
-    Names column_names;
-    StorageSnapshotPtr storage_snapshot;
-    StorageS3 & storage;
-    ReadFromFormatInfo read_from_format_info;
-    bool need_only_count;
-    StorageS3::Configuration query_configuration;
-    NamesAndTypesList virtual_columns;
-
-    ContextPtr local_context;
-
-    size_t max_block_size;
-    size_t num_streams;
-
-    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper;
-
-    void createIterator(const ActionsDAG::Node * predicate);
-};
-
-
-class IOutputFormat;
-using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
-
-class StorageS3Source::DisclosedGlobIterator::Impl : WithContext
-{
-public:
-    Impl(
-        const S3::Client & client_,
-        const S3::URI & globbed_uri_,
-        const ActionsDAG::Node * predicate,
-        const NamesAndTypesList & virtual_columns_,
-        ContextPtr context_,
-        KeysWithInfo * read_keys_,
-        const S3Settings::RequestSettings & request_settings_,
-        std::function<void(FileProgress)> file_progress_callback_)
-        : WithContext(context_)
-        , client(client_.clone())
-        , globbed_uri(globbed_uri_)
-        , virtual_columns(virtual_columns_)
-        , read_keys(read_keys_)
-        , request_settings(request_settings_)
-        , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<ListObjectsOutcome>(list_objects_pool, "ListObjects"))
-        , file_progress_callback(file_progress_callback_)
-    {
-        if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
-            throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name");
-
-        const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
-
-        /// We don't have to list bucket, because there is no asterisks.
-        if (key_prefix.size() == globbed_uri.key.size())
-        {
-            buffer.emplace_back(std::make_shared<KeyWithInfo>(globbed_uri.key, std::nullopt));
-            buffer_iter = buffer.begin();
-            is_finished = true;
-            return;
-        }
-
-        request.SetBucket(globbed_uri.bucket);
-        request.SetPrefix(key_prefix);
-        request.SetMaxKeys(static_cast<int>(request_settings.list_object_keys_size));
-
-        outcome_future = listObjectsAsync();
-
-        matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
-        if (!matcher->ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", globbed_uri.key, matcher->error());
-
-        recursive = globbed_uri.key == "/**" ? true : false;
-
-        filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        fillInternalBufferAssumeLocked();
-    }
-
-    KeyWithInfoPtr next(size_t)
-    {
-        std::lock_guard lock(mutex);
-        return nextAssumeLocked();
-    }
-
-    size_t objectsCount()
-    {
-        return buffer.size();
-    }
-
-    ~Impl()
-    {
-        list_objects_pool.wait();
-    }
-
-private:
-    using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome;
-
-    KeyWithInfoPtr nextAssumeLocked()
-    {
-        if (buffer_iter != buffer.end())
-        {
-            auto answer = *buffer_iter;
-            ++buffer_iter;
-
-            /// If url doesn't contain globs, we didn't list s3 bucket and didn't get object info for the key.
-            /// So we get object info lazily here on 'next()' request.
-            if (!answer->info)
-            {
-                answer->info = S3::getObjectInfo(*client, globbed_uri.bucket, answer->key, globbed_uri.version_id, request_settings);
-                if (file_progress_callback)
-                    file_progress_callback(FileProgress(0, answer->info->size));
-            }
-
-            return answer;
-        }
-
-        if (is_finished)
-            return {};
-
-        try
-        {
-            fillInternalBufferAssumeLocked();
-        }
-        catch (...)
-        {
-            /// In case of exception thrown while listing new batch of files
-            /// iterator may be partially initialized and its further using may lead to UB.
-            /// Iterator is used by several processors from several threads and
-            /// it may take some time for threads to stop processors and they
-            /// may still use this iterator after exception is thrown.
-            /// To avoid this UB, reset the buffer and return defaults for further calls.
-            is_finished = true;
-            buffer.clear();
-            buffer_iter = buffer.begin();
-            throw;
-        }
-
-        return nextAssumeLocked();
-    }
-
-    void fillInternalBufferAssumeLocked()
-    {
-        buffer.clear();
-        assert(outcome_future.valid());
-        auto outcome = outcome_future.get();
-
-        if (!outcome.IsSuccess())
-        {
-            throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                            quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
-                            backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
-        }
-
-        const auto & result_batch = outcome.GetResult().GetContents();
-
-        /// It returns false when all objects were returned
-        is_finished = !outcome.GetResult().GetIsTruncated();
-
-        if (!is_finished)
-        {
-            /// Even if task is finished the thread may be not freed in pool.
-            /// So wait until it will be freed before scheduling a new task.
-            list_objects_pool.wait();
-            outcome_future = listObjectsAsync();
-        }
-
-        if (request_settings.throw_on_zero_files_match && result_batch.empty())
-            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files using prefix {}", request.GetPrefix());
-
-        KeysWithInfo temp_buffer;
-        temp_buffer.reserve(result_batch.size());
-
-        for (const auto & row : result_batch)
-        {
-            String key = row.GetKey();
-            if (recursive || re2::RE2::FullMatch(key, *matcher))
-            {
-                S3::ObjectInfo info =
-                {
-                    .size = size_t(row.GetSize()),
-                    .last_modification_time = row.GetLastModified().Millis() / 1000,
-                };
-
-                temp_buffer.emplace_back(std::make_shared<KeyWithInfo>(std::move(key), std::move(info)));
-            }
-        }
-
-        if (temp_buffer.empty())
-        {
-            buffer_iter = buffer.begin();
-            return;
-        }
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(temp_buffer.size());
-            for (const auto & key_with_info : temp_buffer)
-                paths.push_back(fs::path(globbed_uri.bucket) / key_with_info->key);
-
-            VirtualColumnUtils::filterByPathOrFile(temp_buffer, paths, filter_dag, virtual_columns, getContext());
-        }
-
-        buffer = std::move(temp_buffer);
-
-        if (file_progress_callback)
-        {
-            for (const auto & key_with_info : buffer)
-                file_progress_callback(FileProgress(0, key_with_info->info->size));
-        }
-
-        /// Set iterator only after the whole batch is processed
-        buffer_iter = buffer.begin();
-
-        if (read_keys)
-            read_keys->insert(read_keys->end(), buffer.begin(), buffer.end());
-    }
-
-    std::future<ListObjectsOutcome> listObjectsAsync()
-    {
-        return list_objects_scheduler([this]
-        {
-            ProfileEvents::increment(ProfileEvents::S3ListObjects);
-            auto outcome = client->ListObjectsV2(request);
-
-            /// Outcome failure will be handled on the caller side.
-            if (outcome.IsSuccess())
-                request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
-
-            return outcome;
-        }, Priority{});
-    }
-
-    std::mutex mutex;
-
-    KeysWithInfo buffer;
-    KeysWithInfo::iterator buffer_iter;
-
-    std::unique_ptr<S3::Client> client;
-    S3::URI globbed_uri;
-    ASTPtr query;
-    NamesAndTypesList virtual_columns;
-    ActionsDAGPtr filter_dag;
-    std::unique_ptr<re2::RE2> matcher;
-    bool recursive{false};
-    bool is_finished{false};
-    KeysWithInfo * read_keys;
-
-    S3::ListObjectsV2Request request;
-    S3Settings::RequestSettings request_settings;
-
-    ThreadPool list_objects_pool;
-    ThreadPoolCallbackRunner<ListObjectsOutcome> list_objects_scheduler;
-    std::future<ListObjectsOutcome> outcome_future;
-    std::function<void(FileProgress)> file_progress_callback;
-};
-
-StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
-    const S3::Client & client_,
-    const S3::URI & globbed_uri_,
-    const ActionsDAG::Node * predicate,
-    const NamesAndTypesList & virtual_columns_,
-    ContextPtr context,
-    KeysWithInfo * read_keys_,
-    const S3Settings::RequestSettings & request_settings_,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_, predicate, virtual_columns_, context, read_keys_, request_settings_, file_progress_callback_))
-{
-}
-
-StorageS3Source::KeyWithInfoPtr StorageS3Source::DisclosedGlobIterator::next(size_t idx) /// NOLINT
-{
-    return pimpl->next(idx);
-}
-
-size_t StorageS3Source::DisclosedGlobIterator::estimatedKeysCount()
-{
-    return pimpl->objectsCount();
-}
-
-class StorageS3Source::KeysIterator::Impl
-{
-public:
-    explicit Impl(
-        const S3::Client & client_,
-        const std::string & version_id_,
-        const std::vector<String> & keys_,
-        const String & bucket_,
-        const S3Settings::RequestSettings & request_settings_,
-        KeysWithInfo * read_keys_,
-        std::function<void(FileProgress)> file_progress_callback_)
-        : keys(keys_)
-        , client(client_.clone())
-        , version_id(version_id_)
-        , bucket(bucket_)
-        , request_settings(request_settings_)
-        , file_progress_callback(file_progress_callback_)
-    {
-        if (read_keys_)
-        {
-            for (const auto & key : keys)
-                read_keys_->push_back(std::make_shared<KeyWithInfo>(key));
-        }
-    }
-
-    KeyWithInfoPtr next(size_t)
-    {
-        size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
-        if (current_index >= keys.size())
-            return {};
-        auto key = keys[current_index];
-        std::optional<S3::ObjectInfo> info;
-        if (file_progress_callback)
-        {
-            info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings);
-            file_progress_callback(FileProgress(0, info->size));
-        }
-
-        return std::make_shared<KeyWithInfo>(key, info);
-    }
-
-    size_t objectsCount()
-    {
-        return keys.size();
-    }
-
-private:
-    Strings keys;
-    std::atomic_size_t index = 0;
-    std::unique_ptr<S3::Client> client;
-    String version_id;
-    String bucket;
-    S3Settings::RequestSettings request_settings;
-    std::function<void(FileProgress)> file_progress_callback;
-};
-
-StorageS3Source::KeysIterator::KeysIterator(
-    const S3::Client & client_,
-    const std::string & version_id_,
-    const std::vector<String> & keys_,
-    const String & bucket_,
-    const S3Settings::RequestSettings & request_settings_,
-    KeysWithInfo * read_keys,
-    std::function<void(FileProgress)> file_progress_callback_)
-    : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
-        client_, version_id_, keys_, bucket_, request_settings_,
-        read_keys, file_progress_callback_))
-{
-}
-
-StorageS3Source::KeyWithInfoPtr StorageS3Source::KeysIterator::next(size_t idx) /// NOLINT
-{
-    return pimpl->next(idx);
-}
-
-size_t StorageS3Source::KeysIterator::estimatedKeysCount()
-{
-    return pimpl->objectsCount();
-}
-
-StorageS3Source::ReadTaskIterator::ReadTaskIterator(
-    const DB::ReadTaskCallback & callback_,
-    size_t max_threads_count)
-    : callback(callback_)
-{
-    ThreadPool pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, max_threads_count);
-    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "S3ReadTaskItr");
-
-    std::vector<std::future<String>> keys;
-    keys.reserve(max_threads_count);
-    for (size_t i = 0; i < max_threads_count; ++i)
-        keys.push_back(pool_scheduler([this] { return callback(); }, Priority{}));
-
-    pool.wait();
-    buffer.reserve(max_threads_count);
-    for (auto & key_future : keys)
-        buffer.emplace_back(std::make_shared<KeyWithInfo>(key_future.get(), std::nullopt));
-}
-
-StorageS3Source::KeyWithInfoPtr StorageS3Source::ReadTaskIterator::next(size_t) /// NOLINT
-{
-    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
-    if (current_index >= buffer.size())
-        return std::make_shared<KeyWithInfo>(callback());
-
-    return buffer[current_index];
-}
-
-size_t StorageS3Source::ReadTaskIterator::estimatedKeysCount()
-{
-    return buffer.size();
-}
-
-StorageS3Source::StorageS3Source(
-    const ReadFromFormatInfo & info,
-    const String & format_,
-    String name_,
-    ContextPtr context_,
-    std::optional<FormatSettings> format_settings_,
-    UInt64 max_block_size_,
-    const S3Settings::RequestSettings & request_settings_,
-    String compression_hint_,
-    const std::shared_ptr<const S3::Client> & client_,
-    const String & bucket_,
-    const String & version_id_,
-    const String & url_host_and_port_,
-    std::shared_ptr<IIterator> file_iterator_,
-    const size_t max_parsing_threads_,
-    bool need_only_count_)
-    : SourceWithKeyCondition(info.source_header, false)
-    , WithContext(context_)
-    , name(std::move(name_))
-    , bucket(bucket_)
-    , version_id(version_id_)
-    , url_host_and_port(url_host_and_port_)
-    , format(format_)
-    , columns_desc(info.columns_description)
-    , requested_columns(info.requested_columns)
-    , max_block_size(max_block_size_)
-    , request_settings(request_settings_)
-    , compression_hint(std::move(compression_hint_))
-    , client(client_)
-    , sample_block(info.format_header)
-    , format_settings(format_settings_)
-    , requested_virtual_columns(info.requested_virtual_columns)
-    , file_iterator(file_iterator_)
-    , max_parsing_threads(max_parsing_threads_)
-    , need_only_count(need_only_count_)
-    , create_reader_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, CurrentMetrics::StorageS3ThreadsScheduled, 1)
-    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "CreateS3Reader"))
-{
-}
-
-void StorageS3Source::lazyInitialize(size_t idx)
-{
-    if (initialized)
-        return;
-
-    reader = createReader(idx);
-    if (reader)
-        reader_future = createReaderAsync(idx);
-    initialized = true;
-}
-
-StorageS3Source::ReaderHolder StorageS3Source::createReader(size_t idx)
-{
-    KeyWithInfoPtr key_with_info;
-    do
-    {
-        key_with_info = file_iterator->next(idx);
-        if (!key_with_info || key_with_info->key.empty())
-            return {};
-
-        if (!key_with_info->info)
-            key_with_info->info = S3::getObjectInfo(*client, bucket, key_with_info->key, version_id, request_settings);
-    }
-    while (getContext()->getSettingsRef().s3_skip_empty_files && key_with_info->info->size == 0);
-
-    QueryPipelineBuilder builder;
-    std::shared_ptr<ISource> source;
-    std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(*key_with_info) : std::nullopt;
-    if (num_rows_from_cache)
-    {
-        /// We should not return single chunk with all number of rows,
-        /// because there is a chance that this chunk will be materialized later
-        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
-        /// Instead, we use special ConstChunkGenerator that will generate chunks
-        /// with max_block_size rows until total number of rows is reached.
-        source = std::make_shared<ConstChunkGenerator>(sample_block, *num_rows_from_cache, max_block_size);
-        builder.init(Pipe(source));
-    }
-    else
-    {
-        auto compression_method = chooseCompressionMethod(key_with_info->key, compression_hint);
-        read_buf = createS3ReadBuffer(key_with_info->key, key_with_info->info->size);
-
-        auto input_format = FormatFactory::instance().getInput(
-            format,
-            *read_buf,
-            sample_block,
-            getContext(),
-            max_block_size,
-            format_settings,
-            max_parsing_threads,
-            /* max_download_threads= */ std::nullopt,
-            /* is_remote_fs */ true,
-            compression_method,
-            need_only_count);
-
-        if (key_condition)
-            input_format->setKeyCondition(key_condition);
-
-        if (need_only_count)
-            input_format->needOnlyCount();
-
-        builder.init(Pipe(input_format));
-
-        if (columns_desc.hasDefaults())
-        {
-            builder.addSimpleTransform(
-                [&](const Block & header)
-                { return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext()); });
-        }
-
-        source = input_format;
-    }
-
-    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
-    /// from chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
-    });
-
-    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
-    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
-
-    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
-
-    return ReaderHolder{key_with_info, bucket, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader)};
-}
-
-std::future<StorageS3Source::ReaderHolder> StorageS3Source::createReaderAsync(size_t idx)
-{
-    return create_reader_scheduler([=, this] { return createReader(idx); }, Priority{});
-}
-
-std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size)
-{
-    auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
-    read_settings.enable_filesystem_cache = false;
-    auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
-    const bool object_too_small = object_size <= 2 * download_buffer_size;
-
-    // Create a read buffer that will prefetch the first ~1 MB of the file.
-    // When reading lots of tiny files, this prefetching almost doubles the throughput.
-    // For bigger files, parallel reading is more useful.
-    if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
-    {
-        LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size);
-        return createAsyncS3ReadBuffer(key, read_settings, object_size);
-    }
-
-    return std::make_unique<ReadBufferFromS3>(
-        client, bucket, key, version_id, request_settings, read_settings,
-        /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0,
-        /*restricted_seek_*/ false, object_size);
-}
-
-std::unique_ptr<ReadBuffer> StorageS3Source::createAsyncS3ReadBuffer(
-    const String & key, const ReadSettings & read_settings, size_t object_size)
-{
-    auto context = getContext();
-    auto read_buffer_creator =
-        [this, read_settings, object_size]
-        (const std::string & path, size_t read_until_position) -> std::unique_ptr<ReadBufferFromFileBase>
-    {
-        return std::make_unique<ReadBufferFromS3>(
-            client,
-            bucket,
-            path,
-            version_id,
-            request_settings,
-            read_settings,
-            /* use_external_buffer */true,
-            /* offset */0,
-            read_until_position,
-            /* restricted_seek */true,
-            object_size);
-    };
-
-    auto s3_impl = std::make_unique<ReadBufferFromRemoteFSGather>(
-        std::move(read_buffer_creator),
-        StoredObjects{StoredObject{key, /* local_path */ "", object_size}},
-        read_settings,
-        /* cache_log */nullptr, /* use_external_buffer */true);
-
-    auto modified_settings{read_settings};
-    /// FIXME: Changing this setting to default value breaks something around parquet reading
-    modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size;
-
-    auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
-    auto async_reader = std::make_unique<AsynchronousBoundedReadBuffer>(
-        std::move(s3_impl), pool_reader, modified_settings,
-        context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog());
-
-    async_reader->setReadUntilEnd();
-    if (read_settings.remote_fs_prefetch)
-        async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY);
-
-    return async_reader;
-}
-
-StorageS3Source::~StorageS3Source()
-{
-    create_reader_pool.wait();
-}
-
-String StorageS3Source::getName() const
-{
-    return name;
-}
-
-Chunk StorageS3Source::generate()
-{
-    lazyInitialize();
-
-    while (true)
-    {
-        if (isCancelled() || !reader)
-        {
-            if (reader)
-                reader->cancel();
-            break;
-        }
-
-        Chunk chunk;
-        if (reader->pull(chunk))
-        {
-            UInt64 num_rows = chunk.getNumRows();
-            total_rows_in_file += num_rows;
-            size_t chunk_size = 0;
-            if (const auto * input_format = reader.getInputFormat())
-                chunk_size = reader.getInputFormat()->getApproxBytesReadForChunk();
-            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getPath(), reader.getFileSize());
-            return chunk;
-        }
-
-        if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(reader.getFile(), total_rows_in_file);
-
-        total_rows_in_file = 0;
-
-        assert(reader_future.valid());
-        reader = reader_future.get();
-
-        if (!reader)
-            break;
-
-        /// Even if task is finished the thread may be not freed in pool.
-        /// So wait until it will be freed before scheduling a new task.
-        create_reader_pool.wait();
-        reader_future = createReaderAsync();
-    }
-
-    return {};
-}
-
-void StorageS3Source::addNumRowsToCache(const String & key, size_t num_rows)
-{
-    String source = fs::path(url_host_and_port) / bucket / key;
-    auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    StorageS3::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
-}
-
-std::optional<size_t> StorageS3Source::tryGetNumRowsFromCache(const KeyWithInfo & key_with_info)
-{
-    String source = fs::path(url_host_and_port) / bucket / key_with_info.key;
-    auto cache_key = getKeyForSchemaCache(source, format, format_settings, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        return key_with_info.info->last_modification_time;
-    };
-
-    return StorageS3::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
-}
-
-class StorageS3Sink : public SinkToStorage
-{
-public:
-    StorageS3Sink(
-        const String & format,
-        const Block & sample_block_,
-        ContextPtr context,
-        std::optional<FormatSettings> format_settings_,
-        const CompressionMethod compression_method,
-        const StorageS3::Configuration & configuration_,
-        const String & bucket,
-        const String & key)
-        : SinkToStorage(sample_block_)
-        , sample_block(sample_block_)
-        , format_settings(format_settings_)
-    {
-        BlobStorageLogWriterPtr blob_log = nullptr;
-        if (auto blob_storage_log = context->getBlobStorageLog())
-        {
-            blob_log = std::make_shared<BlobStorageLogWriter>(std::move(blob_storage_log));
-            blob_log->query_id = context->getCurrentQueryId();
-        }
-
-        const auto & settings = context->getSettingsRef();
-        write_buf = wrapWriteBufferWithCompressionMethod(
-            std::make_unique<WriteBufferFromS3>(
-                configuration_.client,
-                bucket,
-                key,
-                DBMS_DEFAULT_BUFFER_SIZE,
-                configuration_.request_settings,
-                std::move(blob_log),
-                std::nullopt,
-                threadPoolCallbackRunner<void>(getIOThreadPool().get(), "S3ParallelWrite"),
-                context->getWriteSettings()),
-            compression_method,
-            static_cast<int>(settings.output_format_compression_level),
-            static_cast<int>(settings.output_format_compression_zstd_window_log));
-        writer
-            = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, format_settings);
-    }
-
-    String getName() const override { return "StorageS3Sink"; }
-
-    void consume(Chunk chunk) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        if (cancelled)
-            return;
-        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
-    }
-
-    void onCancel() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-        cancelled = true;
-    }
-
-    void onException(std::exception_ptr exception) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        try
-        {
-            std::rethrow_exception(exception);
-        }
-        catch (...)
-        {
-            /// An exception context is needed to proper delete write buffers without finalization
-            release();
-        }
-    }
-
-    void onFinish() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-    }
-
-private:
-    void finalize()
-    {
-        if (!writer)
-            return;
-
-        try
-        {
-            writer->finalize();
-            writer->flush();
-            write_buf->finalize();
-        }
-        catch (...)
-        {
-            /// Stop ParallelFormattingOutputFormat correctly.
-            release();
-            throw;
-        }
-    }
-
-    void release()
-    {
-        writer.reset();
-        write_buf.reset();
-    }
-
-    Block sample_block;
-    std::optional<FormatSettings> format_settings;
-    std::unique_ptr<WriteBuffer> write_buf;
-    OutputFormatPtr writer;
-    bool cancelled = false;
-    std::mutex cancel_mutex;
-};
-
-
-class PartitionedStorageS3Sink : public PartitionedSink
-{
-public:
-    PartitionedStorageS3Sink(
-        const ASTPtr & partition_by,
-        const String & format_,
-        const Block & sample_block_,
-        ContextPtr context_,
-        std::optional<FormatSettings> format_settings_,
-        const CompressionMethod compression_method_,
-        const StorageS3::Configuration & configuration_,
-        const String & bucket_,
-        const String & key_)
-        : PartitionedSink(partition_by, context_, sample_block_)
-        , format(format_)
-        , sample_block(sample_block_)
-        , context(context_)
-        , compression_method(compression_method_)
-        , configuration(configuration_)
-        , bucket(bucket_)
-        , key(key_)
-        , format_settings(format_settings_)
-    {
-    }
-
-    SinkPtr createSinkForPartition(const String & partition_id) override
-    {
-        auto partition_bucket = replaceWildcards(bucket, partition_id);
-        validateBucket(partition_bucket);
-
-        auto partition_key = replaceWildcards(key, partition_id);
-        validateKey(partition_key);
-
-        return std::make_shared<StorageS3Sink>(
-            format,
-            sample_block,
-            context,
-            format_settings,
-            compression_method,
-            configuration,
-            partition_bucket,
-            partition_key
-        );
-    }
-
-private:
-    const String format;
-    const Block sample_block;
-    const ContextPtr context;
-    const CompressionMethod compression_method;
-    const StorageS3::Configuration configuration;
-    const String bucket;
-    const String key;
-    const std::optional<FormatSettings> format_settings;
-
-    static void validateBucket(const String & str)
-    {
-        S3::URI::validateBucket(str, {});
-
-        if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
-            throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name");
-
-        validatePartitionKey(str, false);
-    }
-
-    static void validateKey(const String & str)
-    {
-        /// See:
-        /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
-        /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
-
-        if (str.empty() || str.size() > 1024)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size());
-
-        if (!DB::UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
-            throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key");
-
-        validatePartitionKey(str, true);
-    }
-};
-
-
-StorageS3::StorageS3(
-    const Configuration & configuration_,
-    ContextPtr context_,
-    const StorageID & table_id_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    std::optional<FormatSettings> format_settings_,
-    bool distributed_processing_,
-    ASTPtr partition_by_)
-    : IStorage(table_id_)
-    , configuration(configuration_)
-    , name(configuration.url.storage_name)
-    , distributed_processing(distributed_processing_)
-    , format_settings(format_settings_)
-    , partition_by(partition_by_)
-{
-    updateConfiguration(context_); // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
-
-    FormatFactory::instance().checkFormatName(configuration.format);
-    context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration.url.uri);
-    context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration.headers_from_ast);
-
-    StorageInMemoryMetadata storage_metadata;
-    if (columns_.empty())
-    {
-        auto columns = getTableStructureFromDataImpl(configuration, format_settings, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        /// We don't allow special columns in S3 storage.
-        if (!columns_.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine S3 doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
-        storage_metadata.setColumns(columns_);
-    }
-
-    storage_metadata.setConstraints(constraints_);
-    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
-    const StorageS3::Configuration & configuration,
-    bool distributed_processing,
-    ContextPtr local_context,
-    const ActionsDAG::Node * predicate,
-    const NamesAndTypesList & virtual_columns,
-    StorageS3::KeysWithInfo * read_keys = nullptr,
-    std::function<void(FileProgress)> file_progress_callback = {})
-{
-    if (distributed_processing)
-    {
-        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback(), local_context->getSettingsRef().max_threads);
-    }
-    else if (configuration.withGlobs())
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *configuration.client, configuration.url, predicate, virtual_columns,
-            local_context, read_keys, configuration.request_settings, file_progress_callback);
-    }
-    else
-    {
-        Strings keys = configuration.keys;
-        auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(keys.size());
-            for (const auto & key : keys)
-                paths.push_back(fs::path(configuration.url.bucket) / key);
-            VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context);
-        }
-
-        return std::make_shared<StorageS3Source::KeysIterator>(
-            *configuration.client, configuration.url.version_id, keys,
-            configuration.url.bucket, configuration.request_settings, read_keys, file_progress_callback);
-    }
-}
-
-bool StorageS3::supportsSubsetOfColumns(const ContextPtr & context) const
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context, format_settings);
-}
-
-bool StorageS3::prefersLargeBlocks() const
-{
-    return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration.format);
-}
-
-bool StorageS3::parallelizeOutputAfterReading(ContextPtr context) const
-{
-    return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration.format, context);
-}
-
-void StorageS3::read(
-    QueryPlan & query_plan,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr local_context,
-    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), virtual_columns);
-
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && local_context->getSettingsRef().optimize_count_from_files;
-
-    auto reading = std::make_unique<ReadFromStorageS3Step>(
-        read_from_format_info.source_header,
-        column_names,
-        storage_snapshot,
-        *this,
-        std::move(read_from_format_info),
-        need_only_count,
-        local_context,
-        max_block_size,
-        num_streams);
-
-    query_plan.addStep(std::move(reading));
-}
-
-void ReadFromStorageS3Step::applyFilters()
-{
-    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
-    const ActionsDAG::Node * predicate = nullptr;
-    if (filter_actions_dag)
-        predicate = filter_actions_dag->getOutputs().at(0);
-
-    createIterator(predicate);
-}
-
-void ReadFromStorageS3Step::createIterator(const ActionsDAG::Node * predicate)
-{
-    if (iterator_wrapper)
-        return;
-
-    iterator_wrapper = createFileIterator(
-        query_configuration, storage.distributed_processing, local_context, predicate,
-        virtual_columns, nullptr, local_context->getFileProgressCallback());
-}
-
-void ReadFromStorageS3Step::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
-{
-    if (storage.partition_by && query_configuration.withWildcard())
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Reading from a partitioned S3 storage is not implemented yet");
-
-    createIterator(nullptr);
-
-    size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
-    if (estimated_keys_count > 1)
-        num_streams = std::min(num_streams, estimated_keys_count);
-    else
-        /// Disclosed glob iterator can underestimate the amount of keys in some cases. We will keep one stream for this particular case.
-        num_streams = 1;
-
-    const size_t max_threads = local_context->getSettingsRef().max_threads;
-    const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
-    LOG_DEBUG(getLogger("StorageS3"), "Reading in {} streams, {} threads per stream", num_streams, max_parsing_threads);
-
-    Pipes pipes;
-    pipes.reserve(num_streams);
-    for (size_t i = 0; i < num_streams; ++i)
-    {
-        auto source = std::make_shared<StorageS3Source>(
-            read_from_format_info,
-            query_configuration.format,
-            storage.getName(),
-            local_context,
-            storage.format_settings,
-            max_block_size,
-            query_configuration.request_settings,
-            query_configuration.compression_method,
-            query_configuration.client,
-            query_configuration.url.bucket,
-            query_configuration.url.version_id,
-            query_configuration.url.uri.getHost() + std::to_string(query_configuration.url.uri.getPort()),
-            iterator_wrapper,
-            max_parsing_threads,
-            need_only_count);
-
-        source->setKeyCondition(filter_nodes.nodes, local_context);
-        pipes.emplace_back(std::move(source));
-    }
-
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-    if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(read_from_format_info.source_header));
-
-    for (const auto & processor : pipe.getProcessors())
-        processors.emplace_back(processor);
-
-    pipeline.init(std::move(pipe));
-}
-
-SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
-{
-    auto query_configuration = updateConfigurationAndGetCopy(local_context);
-
-    auto sample_block = metadata_snapshot->getSampleBlock();
-    auto chosen_compression_method = chooseCompressionMethod(query_configuration.keys.back(), query_configuration.compression_method);
-    auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query);
-
-    auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr;
-    bool is_partitioned_implementation = partition_by_ast && query_configuration.withWildcard();
-
-    if (is_partitioned_implementation)
-    {
-        return std::make_shared<PartitionedStorageS3Sink>(
-            partition_by_ast,
-            query_configuration.format,
-            sample_block,
-            local_context,
-            format_settings,
-            chosen_compression_method,
-            query_configuration,
-            query_configuration.url.bucket,
-            query_configuration.keys.back());
-    }
-    else
-    {
-        if (query_configuration.withGlobs())
-            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
-                            "S3 key '{}' contains globs, so the table is in readonly mode", query_configuration.url.key);
-
-        bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert;
-
-        if (!truncate_in_insert && S3::objectExists(*query_configuration.client, query_configuration.url.bucket, query_configuration.keys.back(), query_configuration.url.version_id, query_configuration.request_settings))
-        {
-            if (local_context->getSettingsRef().s3_create_new_file_on_insert)
-            {
-                size_t index = query_configuration.keys.size();
-                const auto & first_key = query_configuration.keys[0];
-                auto pos = first_key.find_first_of('.');
-                String new_key;
-                do
-                {
-                    new_key = first_key.substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : first_key.substr(pos));
-                    ++index;
-                }
-                while (S3::objectExists(*query_configuration.client, query_configuration.url.bucket, new_key, query_configuration.url.version_id, query_configuration.request_settings));
-
-                query_configuration.keys.push_back(new_key);
-                configuration.keys.push_back(new_key);
-            }
-            else
-            {
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "Object in bucket {} with key {} already exists. "
-                    "If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
-                    "want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
-                    query_configuration.url.bucket, query_configuration.keys.back());
-            }
-        }
-
-        return std::make_shared<StorageS3Sink>(
-            query_configuration.format,
-            sample_block,
-            local_context,
-            format_settings,
-            chosen_compression_method,
-            query_configuration,
-            query_configuration.url.bucket,
-            query_configuration.keys.back());
-    }
-}
-
-void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &)
-{
-    auto query_configuration = updateConfigurationAndGetCopy(local_context);
-
-    if (query_configuration.withGlobs())
-    {
-        throw Exception(
-            ErrorCodes::DATABASE_ACCESS_DENIED,
-            "S3 key '{}' contains globs, so the table is in readonly mode",
-            query_configuration.url.key);
-    }
-
-    Aws::S3::Model::Delete delkeys;
-
-    for (const auto & key : query_configuration.keys)
-    {
-        Aws::S3::Model::ObjectIdentifier obj;
-        obj.SetKey(key);
-        delkeys.AddObjects(std::move(obj));
-    }
-
-    ProfileEvents::increment(ProfileEvents::S3DeleteObjects);
-    S3::DeleteObjectsRequest request;
-    request.SetBucket(query_configuration.url.bucket);
-    request.SetDelete(delkeys);
-
-    auto response = query_configuration.client->DeleteObjects(request);
-
-    const auto * response_error = response.IsSuccess() ? nullptr : &response.GetError();
-    auto time_now = std::chrono::system_clock::now();
-    if (auto blob_storage_log = BlobStorageLogWriter::create())
-    {
-        for (const auto & key : query_configuration.keys)
-            blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, query_configuration.url.bucket, key, {}, 0, response_error, time_now);
-    }
-
-    if (!response.IsSuccess())
-    {
-        const auto & err = response.GetError();
-        throw S3Exception(err.GetMessage(), err.GetErrorType());
-    }
-
-    for (const auto & error : response.GetResult().GetErrors())
-        LOG_WARNING(getLogger("StorageS3"), "Failed to delete {}, error: {}", error.GetKey(), error.GetMessage());
-}
-
-StorageS3::Configuration StorageS3::updateConfigurationAndGetCopy(ContextPtr local_context)
-{
-    std::lock_guard lock(configuration_update_mutex);
-    configuration.update(local_context);
-    return configuration;
-}
-
-void StorageS3::updateConfiguration(ContextPtr local_context)
-{
-    std::lock_guard lock(configuration_update_mutex);
-    configuration.update(local_context);
-}
-
-void StorageS3::useConfiguration(const Configuration & new_configuration)
-{
-    std::lock_guard lock(configuration_update_mutex);
-    configuration = new_configuration;
-}
-
-const StorageS3::Configuration & StorageS3::getConfiguration()
-{
-    std::lock_guard lock(configuration_update_mutex);
-    return configuration;
-}
-
-bool StorageS3::Configuration::update(ContextPtr context)
-{
-    auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString());
-    request_settings = s3_settings.request_settings;
-    request_settings.updateFromSettings(context->getSettings());
-
-    if (client && (static_configuration || !auth_settings.hasUpdates(s3_settings.auth_settings)))
-        return false;
-
-    auth_settings.updateFrom(s3_settings.auth_settings);
-    keys[0] = url.key;
-    connect(context);
-    return true;
-}
-
-void StorageS3::Configuration::connect(ContextPtr context)
-{
-    const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
-    const Settings & local_settings = context->getSettingsRef();
-
-    S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
-        auth_settings.region,
-        context->getRemoteHostFilter(),
-        static_cast<unsigned>(global_settings.s3_max_redirects),
-        static_cast<unsigned>(global_settings.s3_retry_attempts),
-        global_settings.enable_s3_requests_logging,
-        /* for_disk_s3 = */ false,
-        request_settings.get_request_throttler,
-        request_settings.put_request_throttler,
-        url.uri.getScheme());
-
-    client_configuration.endpointOverride = url.endpoint;
-    client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
-    client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size;
-    auto headers = auth_settings.headers;
-    if (!headers_from_ast.empty())
-        headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end());
-
-    client_configuration.requestTimeoutMs = request_settings.request_timeout_ms;
-
-    S3::ClientSettings client_settings{
-        .use_virtual_addressing = url.is_virtual_hosted_style,
-        .disable_checksum = local_settings.s3_disable_checksum,
-        .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
-    };
-
-    auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token);
-    client = S3::ClientFactory::instance().create(
-        client_configuration,
-        client_settings,
-        credentials.GetAWSAccessKeyId(),
-        credentials.GetAWSSecretKey(),
-        auth_settings.server_side_encryption_customer_key_base64,
-        auth_settings.server_side_encryption_kms_config,
-        std::move(headers),
-        S3::CredentialsConfiguration{
-            auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
-            auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
-            auth_settings.expiration_window_seconds.value_or(
-                context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
-            auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
-        });
-}
-
-void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection)
-{
-    validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
-
-    auto filename = collection.getOrDefault<String>("filename", "");
-    if (!filename.empty())
-        configuration.url = S3::URI(std::filesystem::path(collection.get<String>("url")) / filename);
-    else
-        configuration.url = S3::URI(collection.get<String>("url"));
-
-    configuration.auth_settings.access_key_id = collection.getOrDefault<String>("access_key_id", "");
-    configuration.auth_settings.secret_access_key = collection.getOrDefault<String>("secret_access_key", "");
-    configuration.auth_settings.use_environment_credentials = collection.getOrDefault<UInt64>("use_environment_credentials", 1);
-    configuration.auth_settings.no_sign_request = collection.getOrDefault<bool>("no_sign_request", false);
-    configuration.auth_settings.expiration_window_seconds = collection.getOrDefault<UInt64>("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS);
-
-    configuration.format = collection.getOrDefault<String>("format", configuration.format);
-    configuration.compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
-    configuration.structure = collection.getOrDefault<String>("structure", "auto");
-
-    configuration.request_settings = S3Settings::RequestSettings(collection);
-}
-
-StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file)
-{
-    StorageS3::Configuration configuration;
-
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-    {
-        processNamedCollectionResult(configuration, *named_collection);
-    }
-    else
-    {
-        /// Supported signatures:
-        ///
-        /// S3('url')
-        /// S3('url', 'format')
-        /// S3('url', 'format', 'compression')
-        /// S3('url', NOSIGN)
-        /// S3('url', NOSIGN, 'format')
-        /// S3('url', NOSIGN, 'format', 'compression')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
-        /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'session_token', 'format', 'compression')
-        /// with optional headers() function
-
-        size_t count = StorageURL::evalArgsAndCollectHeaders(engine_args, configuration.headers_from_ast, local_context);
-
-        if (count == 0 || count > 6)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Storage S3 requires 1 to 5 arguments: "
-                            "url, [NOSIGN | access_key_id, secret_access_key], name of used format and [compression_method]");
-
-        std::unordered_map<std::string_view, size_t> engine_args_to_idx;
-        bool no_sign_request = false;
-
-        /// For 2 arguments we support 2 possible variants:
-        /// - s3(source, format)
-        /// - s3(source, NOSIGN)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
-        if (count == 2)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(engine_args[1], "format/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-                no_sign_request = true;
-            else
-                engine_args_to_idx = {{"format", 1}};
-        }
-        /// For 3 arguments we support 2 possible variants:
-        /// - s3(source, format, compression_method)
-        /// - s3(source, access_key_id, secret_access_key)
-        /// - s3(source, NOSIGN, format)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or format name.
-        else if (count == 3)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(engine_args[1], "format/access_key_id/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                no_sign_request = true;
-                engine_args_to_idx = {{"format", 2}};
-            }
-            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
-                engine_args_to_idx = {{"format", 1}, {"compression_method", 2}};
-            else
-                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
-        }
-        /// For 4 arguments we support 3 possible variants:
-        /// - s3(source, access_key_id, secret_access_key, session_token)
-        /// - s3(source, access_key_id, secret_access_key, format)
-        /// - s3(source, NOSIGN, format, compression_method)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN or not.
-        else if (count == 4)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(engine_args[1], "access_key_id/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                no_sign_request = true;
-                engine_args_to_idx = {{"format", 2}, {"compression_method", 3}};
-            }
-            else
-            {
-                auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
-                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-                {
-                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
-                }
-                else
-                {
-                    engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
-                }
-            }
-        }
-        /// For 5 arguments we support 2 possible variants:
-        /// - s3(source, access_key_id, secret_access_key, session_token, format)
-        /// - s3(source, access_key_id, secret_access_key, format, compression)
-        else if (count == 5)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
-            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-            {
-                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}};
-            }
-            else
-            {
-                engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
-            }
-        }
-        else if (count == 6)
-        {
-            engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"compression_method", 5}};
-        }
-
-        /// This argument is always the first
-        configuration.url = S3::URI(checkAndGetLiteralArgument<String>(engine_args[0], "url"));
-
-        if (engine_args_to_idx.contains("format"))
-            configuration.format = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["format"]], "format");
-
-        if (engine_args_to_idx.contains("compression_method"))
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["compression_method"]], "compression_method");
-
-        if (engine_args_to_idx.contains("access_key_id"))
-            configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["access_key_id"]], "access_key_id");
-
-        if (engine_args_to_idx.contains("secret_access_key"))
-            configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["secret_access_key"]], "secret_access_key");
-
-        if (engine_args_to_idx.contains("session_token"))
-            configuration.auth_settings.session_token = checkAndGetLiteralArgument<String>(engine_args[engine_args_to_idx["session_token"]], "session_token");
-
-        if (no_sign_request)
-            configuration.auth_settings.no_sign_request = no_sign_request;
-    }
-
-    configuration.static_configuration = !configuration.auth_settings.access_key_id.empty() || configuration.auth_settings.no_sign_request.has_value();
-
-    configuration.keys = {configuration.url.key};
-
-    if (configuration.format == "auto" && get_format_from_file)
-        configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.key, true);
-
-    return configuration;
-}
-
-ColumnsDescription StorageS3::getTableStructureFromData(
-    const StorageS3::Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
-{
-    return getTableStructureFromDataImpl(configuration, format_settings, ctx);
-}
-
-namespace
-{
-    class ReadBufferIterator : public IReadBufferIterator, WithContext
-    {
-    public:
-        ReadBufferIterator(
-            std::shared_ptr<StorageS3Source::IIterator> file_iterator_,
-            const StorageS3Source::KeysWithInfo & read_keys_,
-            const StorageS3::Configuration & configuration_,
-            const std::optional<FormatSettings> & format_settings_,
-            const ContextPtr & context_)
-            : WithContext(context_)
-            , file_iterator(file_iterator_)
-            , read_keys(read_keys_)
-            , configuration(configuration_)
-            , format_settings(format_settings_)
-            , prev_read_keys_size(read_keys_.size())
-        {
-        }
-
-        std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
-        {
-            /// For default mode check cached columns for currently read keys on first iteration.
-            if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
-            {
-                if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                    return {nullptr, cached_columns};
-            }
-
-            while (true)
-            {
-                current_key_with_info = (*file_iterator)();
-
-                if (!current_key_with_info || current_key_with_info->key.empty())
-                {
-                    if (first)
-                        throw Exception(
-                            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                            "Cannot extract table structure from {} format file, because there are no files with provided path "
-                            "in S3 or all files are empty. You must specify table structure manually",
-                            configuration.format);
-
-                    return {nullptr, std::nullopt};
-                }
-
-                /// S3 file iterator could get new keys after new iteration, check them in schema cache if schema inference mode is default.
-                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT && read_keys.size() > prev_read_keys_size)
-                {
-                    auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
-                    prev_read_keys_size = read_keys.size();
-                    if (columns_from_cache)
-                        return {nullptr, columns_from_cache};
-                }
-
-                if (getContext()->getSettingsRef().s3_skip_empty_files && current_key_with_info->info && current_key_with_info->info->size == 0)
-                    continue;
-
-                /// In union mode, check cached columns only for current key.
-                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-                {
-                    StorageS3::KeysWithInfo keys = {current_key_with_info};
-                    if (auto columns_from_cache = tryGetColumnsFromCache(keys.begin(), keys.end()))
-                    {
-                        first = false;
-                        return {nullptr, columns_from_cache};
-                    }
-                }
-
-                int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-                auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, current_key_with_info->key, configuration.url.version_id, configuration.request_settings, getContext()->getReadSettings());
-                if (!getContext()->getSettingsRef().s3_skip_empty_files || !impl->eof())
-                {
-                    first = false;
-                    return {wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(current_key_with_info->key, configuration.compression_method), zstd_window_log_max), std::nullopt};
-                }
-            }
-        }
-
-        void setNumRowsToLastFile(size_t num_rows) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
-                return;
-
-            String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
-            auto key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-            StorageS3::getSchemaCache(getContext()).addNumRows(key, num_rows);
-        }
-
-        void setSchemaToLastFile(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
-                return;
-
-            String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket / current_key_with_info->key;
-            auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-            StorageS3::getSchemaCache(getContext()).addColumns(cache_key, columns);
-        }
-
-        void setResultingSchema(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
-                return;
-
-            auto host_and_bucket = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / configuration.url.bucket;
-            Strings sources;
-            sources.reserve(read_keys.size());
-            std::transform(read_keys.begin(), read_keys.end(), std::back_inserter(sources), [&](const auto & elem){ return host_and_bucket / elem->key; });
-            auto cache_keys = getKeysForSchemaCache(sources, configuration.format, format_settings, getContext());
-            StorageS3::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
-        }
-
-        String getLastFileName() const override
-        {
-            if (current_key_with_info)
-                return current_key_with_info->key;
-            return "";
-        }
-
-    private:
-        std::optional<ColumnsDescription> tryGetColumnsFromCache(
-            const StorageS3::KeysWithInfo::const_iterator & begin,
-            const StorageS3::KeysWithInfo::const_iterator & end)
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_s3)
-                return std::nullopt;
-
-            auto & schema_cache = StorageS3::getSchemaCache(getContext());
-            for (auto it = begin; it < end; ++it)
-            {
-                auto get_last_mod_time = [&]
-                {
-                    time_t last_modification_time = 0;
-                    if ((*it)->info)
-                    {
-                        last_modification_time = (*it)->info->last_modification_time;
-                    }
-                    else
-                    {
-                        /// Note that in case of exception in getObjectInfo returned info will be empty,
-                        /// but schema cache will handle this case and won't return columns from cache
-                        /// because we can't say that it's valid without last modification time.
-                        last_modification_time = S3::getObjectInfo(
-                             *configuration.client,
-                             configuration.url.bucket,
-                             (*it)->key,
-                             configuration.url.version_id,
-                             configuration.request_settings,
-                             /*with_metadata=*/ false,
-                             /*for_disk_s3=*/ false,
-                             /*throw_on_error= */ false).last_modification_time;
-                    }
-
-                    return last_modification_time ? std::make_optional(last_modification_time) : std::nullopt;
-                };
-
-                String path = fs::path(configuration.url.bucket) / (*it)->key;
-                String source = fs::path(configuration.url.uri.getHost() + std::to_string(configuration.url.uri.getPort())) / path;
-                auto cache_key = getKeyForSchemaCache(source, configuration.format, format_settings, getContext());
-                auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-                if (columns)
-                    return columns;
-            }
-
-            return std::nullopt;
-        }
-
-        std::shared_ptr<StorageS3Source::IIterator> file_iterator;
-        const StorageS3Source::KeysWithInfo & read_keys;
-        const StorageS3::Configuration & configuration;
-        const std::optional<FormatSettings> & format_settings;
-        StorageS3Source::KeyWithInfoPtr current_key_with_info;
-        size_t prev_read_keys_size;
-        bool first = true;
-    };
-
-}
-
-ColumnsDescription StorageS3::getTableStructureFromDataImpl(
-    const Configuration & configuration,
-    const std::optional<FormatSettings> & format_settings,
-    ContextPtr ctx)
-{
-    KeysWithInfo read_keys;
-
-    auto file_iterator = createFileIterator(configuration, false, ctx, {}, {}, &read_keys);
-
-    ReadBufferIterator read_buffer_iterator(file_iterator, read_keys, configuration, format_settings, ctx);
-    return readSchemaFromFormat(configuration.format, format_settings, read_buffer_iterator, configuration.withGlobs(), ctx);
-}
-
-void registerStorageS3Impl(const String & name, StorageFactory & factory)
-{
-    factory.registerStorage(name, [](const StorageFactory::Arguments & args)
-    {
-        auto & engine_args = args.engine_args;
-        if (engine_args.empty())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
-
-        auto configuration = StorageS3::getConfiguration(engine_args, args.getLocalContext());
-        // Use format settings from global server context + settings from
-        // the SETTINGS clause of the create query. Settings from current
-        // session and user are ignored.
-        std::optional<FormatSettings> format_settings;
-        if (args.storage_def->settings)
-        {
-            FormatFactorySettings user_format_settings;
-
-            // Apply changed settings from global context, but ignore the
-            // unknown ones, because we only have the format settings here.
-            const auto & changes = args.getContext()->getSettingsRef().changes();
-            for (const auto & change : changes)
-            {
-                if (user_format_settings.has(change.name))
-                    user_format_settings.set(change.name, change.value);
-            }
-
-            // Apply changes from SETTINGS clause, with validation.
-            user_format_settings.applyChanges(args.storage_def->settings->changes);
-            format_settings = getFormatSettings(args.getContext(), user_format_settings);
-        }
-        else
-        {
-            format_settings = getFormatSettings(args.getContext());
-        }
-
-        ASTPtr partition_by;
-        if (args.storage_def->partition_by)
-            partition_by = args.storage_def->partition_by->clone();
-
-        return std::make_shared<StorageS3>(
-            std::move(configuration),
-            args.getContext(),
-            args.table_id,
-            args.columns,
-            args.constraints,
-            args.comment,
-            format_settings,
-            /* distributed_processing_ */false,
-            partition_by);
-    },
-    {
-        .supports_settings = true,
-        .supports_sort_order = true, // for partition by
-        .supports_schema_inference = true,
-        .source_access_type = AccessType::S3,
-    });
-}
-
-void registerStorageS3(StorageFactory & factory)
-{
-    return registerStorageS3Impl("S3", factory);
-}
-
-void registerStorageCOS(StorageFactory & factory)
-{
-    return registerStorageS3Impl("COSN", factory);
-}
-
-void registerStorageOSS(StorageFactory & factory)
-{
-    return registerStorageS3Impl("OSS", factory);
-}
-
-NamesAndTypesList StorageS3::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-Names StorageS3::getVirtualColumnNames()
-{
-    return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
-}
-
-bool StorageS3::supportsPartitionBy() const
-{
-    return true;
-}
-
-SchemaCache & StorageS3::getSchemaCache(const ContextPtr & ctx)
-{
-    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_s3", DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
-}
-
-}
-
-#endif
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
deleted file mode 100644
index 81a03cc5ad5..00000000000
--- a/src/Storages/StorageS3.h
+++ /dev/null
@@ -1,399 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <Core/Types.h>
-
-#include <Compression/CompressionInfo.h>
-
-#include <Storages/IStorage.h>
-#include <Storages/StorageS3Settings.h>
-
-#include <Processors/SourceWithKeyCondition.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Poco/URI.h>
-#include <IO/S3/getObjectInfo.h>
-#include <IO/CompressionMethod.h>
-#include <IO/SeekableReadBuffer.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/threadPoolCallbackRunner.h>
-#include <Storages/Cache/SchemaCache.h>
-#include <Storages/SelectQueryInfo.h>
-#include <Storages/StorageConfiguration.h>
-#include <Storages/prepareReadingFromFormat.h>
-#include <IO/S3/BlobStorageLogWriter.h>
-
-namespace Aws::S3
-{
-    class Client;
-}
-
-namespace DB
-{
-
-class PullingPipelineExecutor;
-class NamedCollection;
-
-class StorageS3Source : public SourceWithKeyCondition, WithContext
-{
-public:
-
-    struct KeyWithInfo
-    {
-        KeyWithInfo() = default;
-
-        explicit KeyWithInfo(String key_, std::optional<S3::ObjectInfo> info_ = std::nullopt)
-            : key(std::move(key_)), info(std::move(info_)) {}
-
-        virtual ~KeyWithInfo() = default;
-
-        String key;
-        std::optional<S3::ObjectInfo> info;
-    };
-    using KeyWithInfoPtr = std::shared_ptr<KeyWithInfo>;
-
-    using KeysWithInfo = std::vector<KeyWithInfoPtr>;
-
-    class IIterator
-    {
-    public:
-        virtual ~IIterator() = default;
-        virtual KeyWithInfoPtr next(size_t idx = 0) = 0; /// NOLINT
-
-        /// Estimates how many streams we need to process all files.
-        /// If keys count >= max_threads_count, the returned number may not represent the actual number of the keys.
-        /// Intended to be called before any next() calls, may underestimate otherwise
-        /// fixme: May underestimate if the glob has a strong filter, so there are few matches among the first 1000 ListObjects results.
-        virtual size_t estimatedKeysCount() = 0;
-
-        KeyWithInfoPtr operator ()() { return next(); }
-    };
-
-    class DisclosedGlobIterator : public IIterator
-    {
-    public:
-        DisclosedGlobIterator(
-            const S3::Client & client_,
-            const S3::URI & globbed_uri_,
-            const ActionsDAG::Node * predicate,
-            const NamesAndTypesList & virtual_columns,
-            ContextPtr context,
-            KeysWithInfo * read_keys_ = nullptr,
-            const S3Settings::RequestSettings & request_settings_ = {},
-            std::function<void(FileProgress)> progress_callback_ = {});
-
-        KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT
-        size_t estimatedKeysCount() override;
-
-    private:
-        class Impl;
-        /// shared_ptr to have copy constructor
-        std::shared_ptr<Impl> pimpl;
-    };
-
-    class KeysIterator : public IIterator
-    {
-    public:
-        explicit KeysIterator(
-            const S3::Client & client_,
-            const std::string & version_id_,
-            const std::vector<String> & keys_,
-            const String & bucket_,
-            const S3Settings::RequestSettings & request_settings_,
-            KeysWithInfo * read_keys = nullptr,
-            std::function<void(FileProgress)> progress_callback_ = {});
-
-        KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT
-        size_t estimatedKeysCount() override;
-
-    private:
-        class Impl;
-        /// shared_ptr to have copy constructor
-        std::shared_ptr<Impl> pimpl;
-    };
-
-    class ReadTaskIterator : public IIterator
-    {
-    public:
-        explicit ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count);
-
-        KeyWithInfoPtr next(size_t idx = 0) override; /// NOLINT
-        size_t estimatedKeysCount() override;
-
-    private:
-        KeysWithInfo buffer;
-        std::atomic_size_t index = 0;
-
-        ReadTaskCallback callback;
-    };
-
-    StorageS3Source(
-        const ReadFromFormatInfo & info,
-        const String & format,
-        String name_,
-        ContextPtr context_,
-        std::optional<FormatSettings> format_settings_,
-        UInt64 max_block_size_,
-        const S3Settings::RequestSettings & request_settings_,
-        String compression_hint_,
-        const std::shared_ptr<const S3::Client> & client_,
-        const String & bucket,
-        const String & version_id,
-        const String & url_host_and_port,
-        std::shared_ptr<IIterator> file_iterator_,
-        size_t max_parsing_threads,
-        bool need_only_count_);
-
-    ~StorageS3Source() override;
-
-    String getName() const override;
-
-    void setKeyCondition(const ActionsDAG::NodeRawConstPtrs & nodes, ContextPtr context_) override
-    {
-        setKeyConditionImpl(nodes, context_, sample_block);
-    }
-
-    Chunk generate() override;
-
-private:
-    friend class StorageS3QueueSource;
-
-    String name;
-    String bucket;
-    String version_id;
-    String url_host_and_port;
-    String format;
-    ColumnsDescription columns_desc;
-    NamesAndTypesList requested_columns;
-    UInt64 max_block_size;
-    S3Settings::RequestSettings request_settings;
-    String compression_hint;
-    std::shared_ptr<const S3::Client> client;
-    Block sample_block;
-    std::optional<FormatSettings> format_settings;
-
-    struct ReaderHolder
-    {
-    public:
-        ReaderHolder(
-            KeyWithInfoPtr key_with_info_,
-            String bucket_,
-            std::unique_ptr<ReadBuffer> read_buf_,
-            std::shared_ptr<ISource> source_,
-            std::unique_ptr<QueryPipeline> pipeline_,
-            std::unique_ptr<PullingPipelineExecutor> reader_)
-            : key_with_info(key_with_info_)
-            , bucket(std::move(bucket_))
-            , read_buf(std::move(read_buf_))
-            , source(std::move(source_))
-            , pipeline(std::move(pipeline_))
-            , reader(std::move(reader_))
-        {
-        }
-
-        ReaderHolder() = default;
-        ReaderHolder(const ReaderHolder & other) = delete;
-        ReaderHolder & operator=(const ReaderHolder & other) = delete;
-
-        ReaderHolder(ReaderHolder && other) noexcept
-        {
-            *this = std::move(other);
-        }
-
-        ReaderHolder & operator=(ReaderHolder && other) noexcept
-        {
-            /// The order of destruction is important.
-            /// reader uses pipeline, pipeline uses read_buf.
-            reader = std::move(other.reader);
-            pipeline = std::move(other.pipeline);
-            source = std::move(other.source);
-            read_buf = std::move(other.read_buf);
-            key_with_info = std::move(other.key_with_info);
-            bucket = std::move(other.bucket);
-            return *this;
-        }
-
-        explicit operator bool() const { return reader != nullptr; }
-        PullingPipelineExecutor * operator->() { return reader.get(); }
-        const PullingPipelineExecutor * operator->() const { return reader.get(); }
-        String getPath() const { return fs::path(bucket) / key_with_info->key; }
-        const String & getFile() const { return key_with_info->key; }
-        const KeyWithInfo & getKeyWithInfo() const { return *key_with_info; }
-        std::optional<size_t> getFileSize() const { return key_with_info->info ? std::optional(key_with_info->info->size) : std::nullopt; }
-
-        const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
-
-    private:
-        KeyWithInfoPtr key_with_info;
-        String bucket;
-        std::unique_ptr<ReadBuffer> read_buf;
-        std::shared_ptr<ISource> source;
-        std::unique_ptr<QueryPipeline> pipeline;
-        std::unique_ptr<PullingPipelineExecutor> reader;
-    };
-
-    ReaderHolder reader;
-
-    NamesAndTypesList requested_virtual_columns;
-    std::shared_ptr<IIterator> file_iterator;
-    size_t max_parsing_threads = 1;
-    bool need_only_count;
-
-    LoggerPtr log = getLogger("StorageS3Source");
-
-    ThreadPool create_reader_pool;
-    ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
-    std::future<ReaderHolder> reader_future;
-    std::atomic<bool> initialized{false};
-
-    size_t total_rows_in_file = 0;
-
-    /// Notice: we should initialize reader and future_reader lazily in generate to make sure key_condition
-    /// is set before createReader is invoked for key_condition is read in createReader.
-    void lazyInitialize(size_t idx = 0);
-
-    /// Recreate ReadBuffer and Pipeline for each file.
-    ReaderHolder createReader(size_t idx = 0);
-    std::future<ReaderHolder> createReaderAsync(size_t idx = 0);
-
-    std::unique_ptr<ReadBuffer> createS3ReadBuffer(const String & key, size_t object_size);
-    std::unique_ptr<ReadBuffer> createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size);
-
-    void addNumRowsToCache(const String & key, size_t num_rows);
-    std::optional<size_t> tryGetNumRowsFromCache(const KeyWithInfo & key_with_info);
-};
-
-/**
- * This class represents table engine for external S3 urls.
- * It sends HTTP GET to server when select is called and
- * HTTP PUT when insert is called.
- */
-class StorageS3 : public IStorage
-{
-public:
-    struct Configuration : public StatelessTableEngineConfiguration
-    {
-        Configuration() = default;
-
-        String getPath() const { return url.key; }
-
-        bool update(ContextPtr context);
-
-        void connect(ContextPtr context);
-
-        bool withGlobs() const { return url.key.find_first_of("*?{") != std::string::npos; }
-
-        bool withWildcard() const
-        {
-            static const String PARTITION_ID_WILDCARD = "{_partition_id}";
-            return url.bucket.find(PARTITION_ID_WILDCARD) != String::npos
-                || keys.back().find(PARTITION_ID_WILDCARD) != String::npos;
-        }
-
-        S3::URI url;
-        S3::AuthSettings auth_settings;
-        S3Settings::RequestSettings request_settings;
-        /// If s3 configuration was passed from ast, then it is static.
-        /// If from config - it can be changed with config reload.
-        bool static_configuration = true;
-        /// Headers from ast is a part of static configuration.
-        HTTPHeaderEntries headers_from_ast;
-
-        std::shared_ptr<const S3::Client> client;
-        std::vector<String> keys;
-    };
-
-    StorageS3(
-        const Configuration & configuration_,
-        ContextPtr context_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & comment,
-        std::optional<FormatSettings> format_settings_,
-        bool distributed_processing_ = false,
-        ASTPtr partition_by_ = nullptr);
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    void read(
-        QueryPlan & query_plan,
-        const Names & column_names,
-        const StorageSnapshotPtr & storage_snapshot,
-        SelectQueryInfo & query_info,
-        ContextPtr context,
-        QueryProcessingStage::Enum processed_stage,
-        size_t max_block_size,
-        size_t num_streams) override;
-
-    SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
-
-    void truncate(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, TableExclusiveLockHolder &) override;
-
-    NamesAndTypesList getVirtuals() const override;
-    static Names getVirtualColumnNames();
-
-    bool supportsPartitionBy() const override;
-
-    static void processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection);
-
-    static SchemaCache & getSchemaCache(const ContextPtr & ctx);
-
-    static StorageS3::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context, bool get_format_from_file = true);
-
-    static ColumnsDescription getTableStructureFromData(
-        const StorageS3::Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx);
-
-    using KeysWithInfo = StorageS3Source::KeysWithInfo;
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-protected:
-    virtual Configuration updateConfigurationAndGetCopy(ContextPtr local_context);
-
-    virtual void updateConfiguration(ContextPtr local_context);
-
-    void useConfiguration(const Configuration & new_configuration);
-
-    const Configuration & getConfiguration();
-
-private:
-    friend class StorageS3Cluster;
-    friend class TableFunctionS3Cluster;
-    friend class StorageS3Queue;
-    friend class ReadFromStorageS3Step;
-
-    Configuration configuration;
-    std::mutex configuration_update_mutex;
-    NamesAndTypesList virtual_columns;
-
-    String name;
-    const bool distributed_processing;
-    std::optional<FormatSettings> format_settings;
-    ASTPtr partition_by;
-
-    static ColumnsDescription getTableStructureFromDataImpl(
-        const Configuration & configuration,
-        const std::optional<FormatSettings> & format_settings,
-        ContextPtr ctx);
-
-    bool supportsSubcolumns() const override { return true; }
-
-    bool supportsSubsetOfColumns(const ContextPtr & context) const;
-
-    bool prefersLargeBlocks() const override;
-
-    bool parallelizeOutputAfterReading(ContextPtr context) const override;
-};
-
-}
-
-#endif
diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp
deleted file mode 100644
index 25c2b42b766..00000000000
--- a/src/Storages/StorageS3Cluster.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-#include "Storages/StorageS3Cluster.h"
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <DataTypes/DataTypeString.h>
-#include <IO/ConnectionTimeouts.h>
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Processors/Sources/RemoteSource.h>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <QueryPipeline/RemoteQueryExecutor.h>
-#include <Storages/IStorage.h>
-#include <Storages/StorageURL.h>
-#include <Storages/SelectQueryInfo.h>
-#include <Storages/StorageDictionary.h>
-#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Common/Exception.h>
-#include <Parsers/queryToString.h>
-#include <TableFunctions/TableFunctionS3Cluster.h>
-
-#include <memory>
-#include <string>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-StorageS3Cluster::StorageS3Cluster(
-    const String & cluster_name_,
-    const StorageS3::Configuration & configuration_,
-    const StorageID & table_id_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    ContextPtr context_,
-    bool structure_argument_was_provided_)
-    : IStorageCluster(cluster_name_, table_id_, getLogger("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_)
-    , s3_configuration{configuration_}
-{
-    context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri);
-    context_->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(configuration_.headers_from_ast);
-
-    StorageInMemoryMetadata storage_metadata;
-    updateConfigurationIfChanged(context_);
-
-    if (columns_.empty())
-    {
-        /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function
-        auto columns = StorageS3::getTableStructureFromDataImpl(s3_configuration, /*format_settings=*/std::nullopt, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-        storage_metadata.setColumns(columns_);
-
-    storage_metadata.setConstraints(constraints_);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context)
-{
-    ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
-    if (!expression_list)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query));
-
-    TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context);
-}
-
-void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context)
-{
-    s3_configuration.update(local_context);
-}
-
-RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
-{
-    auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-        *s3_configuration.client, s3_configuration.url, predicate, virtual_columns, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback());
-
-    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
-    {
-        if (auto next = iterator->next())
-            return next->key;
-        return "";
-    });
-    return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
-}
-
-NamesAndTypesList StorageS3Cluster::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-
-}
-
-#endif
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
deleted file mode 100644
index c526f14834a..00000000000
--- a/src/Storages/StorageS3Cluster.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <memory>
-#include <optional>
-
-#include "Client/Connection.h"
-#include <Interpreters/Cluster.h>
-#include <IO/S3Common.h>
-#include <Storages/IStorageCluster.h>
-#include <Storages/StorageS3.h>
-
-namespace DB
-{
-
-class Context;
-
-class StorageS3Cluster : public IStorageCluster
-{
-public:
-    StorageS3Cluster(
-        const String & cluster_name_,
-        const StorageS3::Configuration & configuration_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        ContextPtr context_,
-        bool structure_argument_was_provided_);
-
-    std::string getName() const override { return "S3Cluster"; }
-
-    NamesAndTypesList getVirtuals() const override;
-
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
-
-    bool supportsSubcolumns() const override { return true; }
-
-    bool supportsTrivialCountOptimization() const override { return true; }
-
-protected:
-    void updateConfigurationIfChanged(ContextPtr local_context);
-
-private:
-    void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); }
-
-    void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override;
-
-    StorageS3::Configuration s3_configuration;
-    NamesAndTypesList virtual_columns;
-};
-
-
-}
-
-#endif
diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
index 1426ea83800..77d5be3698c 100644
--- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
+++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
@@ -1,9 +1,7 @@
 #include <Storages/System/StorageSystemSchemaInferenceCache.h>
 #include <Storages/StorageFile.h>
-#include <Storages/StorageS3.h>
 #include <Storages/StorageURL.h>
-#include <Storages/HDFS/StorageHDFS.h>
-#include <Storages/StorageAzureBlob.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypesNumber.h>
@@ -83,7 +81,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C
 #endif
     fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL");
 #if USE_AZURE_BLOB_STORAGE
-    fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure");
+    fillDataImpl(res_columns, StorageAzureBlobStorage::getSchemaCache(context), "Azure"); /// FIXME
 #endif
 }
 
diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp
index dea9feaf28b..0b72d7e94fd 100644
--- a/src/Storages/registerStorages.cpp
+++ b/src/Storages/registerStorages.cpp
@@ -45,8 +45,6 @@ void registerStorageIceberg(StorageFactory & factory);
 #endif
 
 #if USE_HDFS
-void registerStorageHDFS(StorageFactory & factory);
-
 #if USE_HIVE
 void registerStorageHive(StorageFactory & factory);
 #endif
@@ -99,9 +97,7 @@ void registerStorageSQLite(StorageFactory & factory);
 
 void registerStorageKeeperMap(StorageFactory & factory);
 
-#if USE_AZURE_BLOB_STORAGE
-void registerStorageAzureBlob(StorageFactory & factory);
-#endif
+void registerStorageObjectStorage(StorageFactory & factory);
 
 void registerStorages()
 {
@@ -131,9 +127,7 @@ void registerStorages()
 #endif
 
 #if USE_AWS_S3
-    registerStorageS3(factory);
-    registerStorageCOS(factory);
-    registerStorageOSS(factory);
+    // registerStorageS3(factory);
     registerStorageHudi(factory);
     registerStorageS3Queue(factory);
 
@@ -148,12 +142,9 @@ void registerStorages()
     #endif
 
     #if USE_HDFS
-    registerStorageHDFS(factory);
-
     #if USE_HIVE
     registerStorageHive(factory);
     #endif
-
     #endif
 
     registerStorageODBC(factory);
@@ -201,9 +192,7 @@ void registerStorages()
 
     registerStorageKeeperMap(factory);
 
-    #if USE_AZURE_BLOB_STORAGE
-    registerStorageAzureBlob(factory);
-    #endif
+    registerStorageObjectStorage(factory);
 }
 
 }
diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h
index 7e81d6d21b7..0559472325b 100644
--- a/src/TableFunctions/ITableFunctionCluster.h
+++ b/src/TableFunctions/ITableFunctionCluster.h
@@ -1,14 +1,10 @@
 #pragma once
 
-#include "config.h"
-
 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
-#include <Storages/StorageS3Cluster.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/TableFunctionAzureBlobStorage.h>
-#include <TableFunctions/TableFunctionS3.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
 
 
 namespace DB
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 961e5683fe2..884e1f5c4a2 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -10,6 +10,9 @@
 #    include <Interpreters/parseColumnsListForTableFunction.h>
 #    include <Storages/IStorage.h>
 #    include <TableFunctions/ITableFunction.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
 
 namespace DB
 {
@@ -30,12 +33,13 @@ protected:
         bool /*is_insert_query*/) const override
     {
         ColumnsDescription columns;
-        if (TableFunction::configuration.structure != "auto")
-            columns = parseColumnsListFromString(TableFunction::configuration.structure, context);
+        if (TableFunction::configuration->structure != "auto")
+            columns = parseColumnsListFromString(TableFunction::configuration->structure, context);
 
-        StoragePtr storage = Storage::create(
-            TableFunction::configuration, context, false, StorageID(TableFunction::getDatabaseName(), table_name),
-            columns, ConstraintsDescription{}, String{}, std::nullopt);
+        StorageObjectStorageConfigurationPtr configuration = TableFunction::configuration;
+        StoragePtr storage = StorageIceberg<StorageObjectStorage<StorageS3Settings>>::create(
+            configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name),
+            columns, ConstraintsDescription{}, String{}, std::nullopt, false);
 
         storage->startup();
         return storage;
@@ -45,19 +49,19 @@ protected:
 
     ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override
     {
-        if (TableFunction::configuration.structure == "auto")
+        if (TableFunction::configuration->structure == "auto")
         {
             context->checkAccess(TableFunction::getSourceAccessType());
-            return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context);
+            return Storage::getTableStructureFromData(TableFunction::object_storage, TableFunction::configuration, std::nullopt, context);
         }
 
-        return parseColumnsListFromString(TableFunction::configuration.structure, context);
+        return parseColumnsListFromString(TableFunction::configuration->structure, context);
     }
 
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override
     {
         /// Set default format to Parquet if it's not specified in arguments.
-        TableFunction::configuration.format = "Parquet";
+        TableFunction::configuration->format = "Parquet";
         TableFunction::parseArguments(ast_function, context);
     }
 };
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
deleted file mode 100644
index b098cac5144..00000000000
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/Context.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionS3.h>
-#include <TableFunctions/TableFunctionAzureBlobStorage.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Access/Common/AccessFlags.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTFunction.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Storages/StorageAzureBlob.h>
-#include <Storages/StorageURL.h>
-#include <Storages/NamedCollectionsHelpers.h>
-#include <Formats/FormatFactory.h>
-#include "registerTableFunctions.h"
-#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
-
-#include <boost/algorithm/string.hpp>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_ARGUMENTS;
-}
-
-namespace
-{
-
-bool isConnectionString(const std::string & candidate)
-{
-    return !candidate.starts_with("http");
-}
-
-}
-
-void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
-{
-    /// Supported signatures:
-    ///
-    /// AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
-    ///
-
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-    {
-        StorageAzureBlob::processNamedCollectionResult(configuration, *named_collection);
-
-        configuration.blobs_paths = {configuration.blob_path};
-
-        if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
-    }
-    else
-    {
-        if (engine_args.size() < 3 || engine_args.size() > 8)
-            throw Exception(
-                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Storage Azure requires 3 to 7 arguments: "
-                "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
-
-        for (auto & engine_arg : engine_args)
-            engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
-
-        std::unordered_map<std::string_view, size_t> engine_args_to_idx;
-
-        configuration.connection_url = checkAndGetLiteralArgument<String>(engine_args[0], "connection_string/storage_account_url");
-        configuration.is_connection_string = isConnectionString(configuration.connection_url);
-
-        configuration.container = checkAndGetLiteralArgument<String>(engine_args[1], "container");
-        configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
-
-        auto is_format_arg
-            = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
-
-        if (engine_args.size() == 4)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name/structure");
-            if (is_format_arg(fourth_arg))
-            {
-                configuration.format = fourth_arg;
-            }
-            else
-            {
-                configuration.structure = fourth_arg;
-            }
-        }
-        else if (engine_args.size() == 5)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-            if (is_format_arg(fourth_arg))
-            {
-                configuration.format = fourth_arg;
-                configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
-            }
-            else
-            {
-                configuration.account_name = fourth_arg;
-                configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-            }
-        }
-        else if (engine_args.size() == 6)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-            if (is_format_arg(fourth_arg))
-            {
-                configuration.format = fourth_arg;
-                configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[4], "compression");
-                configuration.structure = checkAndGetLiteralArgument<String>(engine_args[5], "structure");
-            }
-            else
-            {
-                configuration.account_name = fourth_arg;
-                configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-                auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name/structure");
-                if (is_format_arg(sixth_arg))
-                    configuration.format = sixth_arg;
-                else
-                    configuration.structure = sixth_arg;
-            }
-        }
-        else if (engine_args.size() == 7)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-            configuration.account_name = fourth_arg;
-            configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
-            if (!is_format_arg(sixth_arg))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
-            configuration.format = sixth_arg;
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
-        }
-        else if (engine_args.size() == 8)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
-            configuration.account_name = fourth_arg;
-            configuration.account_key = checkAndGetLiteralArgument<String>(engine_args[4], "account_key");
-            auto sixth_arg = checkAndGetLiteralArgument<String>(engine_args[5], "format/account_name");
-            if (!is_format_arg(sixth_arg))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format {}", sixth_arg);
-            configuration.format = sixth_arg;
-            configuration.compression_method = checkAndGetLiteralArgument<String>(engine_args[6], "compression");
-            configuration.structure = checkAndGetLiteralArgument<String>(engine_args[7], "structure");
-        }
-
-        configuration.blobs_paths = {configuration.blob_path};
-
-        if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.blob_path, true);
-    }
-}
-
-void TableFunctionAzureBlobStorage::parseArguments(const ASTPtr & ast_function, ContextPtr context)
-{
-    /// Clone ast function, because we can modify its arguments like removing headers.
-    auto ast_copy = ast_function->clone();
-
-    ASTs & args_func = ast_function->children;
-
-    if (args_func.size() != 1)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
-
-    auto & args = args_func.at(0)->children;
-
-    parseArgumentsImpl(args, context);
-}
-
-void TableFunctionAzureBlobStorage::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
-{
-    if (tryGetNamedCollectionWithOverrides(args, context))
-    {
-        /// In case of named collection, just add key-value pair "structure='...'"
-        /// at the end of arguments to override existed structure.
-        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
-        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
-        args.push_back(equal_func);
-    }
-    else
-    {
-        if (args.size() < 3 || args.size() > 8)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Storage Azure requires 3 to 7 arguments: "
-                            "AzureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
-
-        auto structure_literal = std::make_shared<ASTLiteral>(structure);
-
-        auto is_format_arg
-            = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
-
-
-        if (args.size() == 3)
-        {
-            /// Add format=auto & compression=auto before structure argument.
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
-            args.push_back(structure_literal);
-        }
-        else if (args.size() == 4)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
-            if (is_format_arg(fourth_arg))
-            {
-                /// Add compression=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(structure_literal);
-            }
-            else
-            {
-                args.back() = structure_literal;
-            }
-        }
-        else if (args.size() == 5)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
-            {
-                /// Add format=auto & compression=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-            }
-            args.push_back(structure_literal);
-        }
-        else if (args.size() == 6)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
-            {
-                /// Add compression=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(structure_literal);
-            }
-            else
-            {
-                args.back() = structure_literal;
-            }
-        }
-        else if (args.size() == 7)
-        {
-            args.push_back(structure_literal);
-        }
-        else if (args.size() == 8)
-        {
-            args.back() = structure_literal;
-        }
-    }
-}
-
-ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(ContextPtr context, bool is_insert_query) const
-{
-    if (configuration.structure == "auto")
-    {
-        context->checkAccess(getSourceAccessType());
-        auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
-        auto settings = StorageAzureBlob::createSettings(context);
-
-        auto object_storage = std::make_unique<AzureObjectStorage>("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container);
-        return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context, false);
-    }
-
-    return parseColumnsListFromString(configuration.structure, context);
-}
-
-bool TableFunctionAzureBlobStorage::supportsReadingSubsetOfColumns(const ContextPtr & context)
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context);
-}
-
-std::unordered_set<String> TableFunctionAzureBlobStorage::getVirtualsToCheckBeforeUsingStructureHint() const
-{
-    auto virtual_column_names = StorageAzureBlob::getVirtualColumnNames();
-    return {virtual_column_names.begin(), virtual_column_names.end()};
-}
-
-StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
-{
-    auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
-    auto settings = StorageAzureBlob::createSettings(context);
-
-    ColumnsDescription columns;
-    if (configuration.structure != "auto")
-        columns = parseColumnsListFromString(configuration.structure, context);
-    else if (!structure_hint.empty())
-        columns = structure_hint;
-
-    StoragePtr storage = std::make_shared<StorageAzureBlob>(
-        configuration,
-        std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
-        context,
-        StorageID(getDatabaseName(), table_name),
-        columns,
-        ConstraintsDescription{},
-        String{},
-        /// No format_settings for table function Azure
-        std::nullopt,
-        /* distributed_processing */ false,
-        nullptr);
-
-    storage->startup();
-
-    return storage;
-}
-
-void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionAzureBlobStorage>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage.)",
-            .examples{{"azureBlobStorage", "SELECT * FROM  azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
-         .allow_readonly = false});
-}
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.h b/src/TableFunctions/TableFunctionAzureBlobStorage.h
deleted file mode 100644
index 1a221f60c55..00000000000
--- a/src/TableFunctions/TableFunctionAzureBlobStorage.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/StorageAzureBlob.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/* AzureBlob(source, [access_key_id, secret_access_key,] [format, compression, structure]) - creates a temporary storage for a file in AzureBlob.
- */
-class TableFunctionAzureBlobStorage : public ITableFunction
-{
-public:
-    static constexpr auto name = "azureBlobStorage";
-
-    static constexpr auto signature = " - connection_string, container_name, blobpath\n"
-                                      " - connection_string, container_name, blobpath, structure \n"
-                                      " - connection_string, container_name, blobpath, format \n"
-                                      " - connection_string, container_name, blobpath, format, compression \n"
-                                      " - connection_string, container_name, blobpath, format, compression, structure \n"
-                                      " - storage_account_url, container_name, blobpath, account_name, account_key\n"
-                                      " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
-                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
-                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
-                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n";
-
-    static size_t getMaxNumberOfArguments() { return 8; }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    virtual String getSignature() const
-    {
-        return signature;
-    }
-
-    bool hasStaticStructure() const override { return configuration.structure != "auto"; }
-
-    bool needStructureHint() const override { return configuration.structure == "auto"; }
-
-    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
-
-    bool supportsReadingSubsetOfColumns(const ContextPtr & context) override;
-
-    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
-
-    virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
-
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
-
-protected:
-
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function,
-        ContextPtr context,
-        const std::string & table_name,
-        ColumnsDescription cached_columns,
-        bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "Azure"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    mutable StorageAzureBlob::Configuration configuration;
-    ColumnsDescription structure_hint;
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
deleted file mode 100644
index 1c3b302a186..00000000000
--- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <TableFunctions/TableFunctionAzureBlobStorageCluster.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Storages/StorageAzureBlob.h>
-
-#include "registerTableFunctions.h"
-
-#include <memory>
-
-
-namespace DB
-{
-
-StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl(
-    const ASTPtr & /*function*/, ContextPtr context,
-    const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
-{
-    StoragePtr storage;
-    ColumnsDescription columns;
-    bool structure_argument_was_provided = configuration.structure != "auto";
-
-    if (structure_argument_was_provided)
-    {
-        columns = parseColumnsListFromString(configuration.structure, context);
-    }
-    else if (!structure_hint.empty())
-    {
-        columns = structure_hint;
-    }
-
-    auto client = StorageAzureBlob::createClient(configuration, !is_insert_query);
-    auto settings = StorageAzureBlob::createSettings(context);
-
-    if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
-    {
-        /// On worker node this filename won't contains globs
-        storage = std::make_shared<StorageAzureBlob>(
-            configuration,
-            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
-            context,
-            StorageID(getDatabaseName(), table_name),
-            columns,
-            ConstraintsDescription{},
-            /* comment */String{},
-            /* format_settings */std::nullopt, /// No format_settings
-            /* distributed_processing */ true,
-            /*partition_by_=*/nullptr);
-    }
-    else
-    {
-        storage = std::make_shared<StorageAzureBlobCluster>(
-            cluster_name,
-            configuration,
-            std::make_unique<AzureObjectStorage>(table_name, std::move(client), std::move(settings), configuration.container),
-            StorageID(getDatabaseName(), table_name),
-            columns,
-            ConstraintsDescription{},
-            context,
-            structure_argument_was_provided);
-    }
-
-    storage->startup();
-
-    return storage;
-}
-
-
-void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionAzureBlobStorageCluster>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
-            .examples{{"azureBlobStorageCluster", "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
-         .allow_readonly = false}
-        );
-}
-
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h
deleted file mode 100644
index 58f79328f63..00000000000
--- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AZURE_BLOB_STORAGE
-
-#include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/TableFunctionAzureBlobStorage.h>
-#include <TableFunctions/ITableFunctionCluster.h>
-#include <Storages/StorageAzureBlobCluster.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/**
- * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure)
- * A table function, which allows to process many files from Azure Blob Storage on a specific cluster
- * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
- * in Azure Blob Storage file path and dispatch each file dynamically.
- * On worker node it asks initiator about next task to process, processes it.
- * This is repeated until the tasks are finished.
- */
-class TableFunctionAzureBlobStorageCluster : public ITableFunctionCluster<TableFunctionAzureBlobStorage>
-{
-public:
-    static constexpr auto name = "azureBlobStorageCluster";
-    static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    String getSignature() const override
-    {
-        return signature;
-    }
-
-protected:
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function,
-        ContextPtr context,
-        const std::string & table_name,
-        ColumnsDescription cached_columns,
-        bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "AzureBlobStorageCluster"; }
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp
index b8bf810f6fa..08b62ed2612 100644
--- a/src/TableFunctions/TableFunctionDeltaLake.cpp
+++ b/src/TableFunctions/TableFunctionDeltaLake.cpp
@@ -5,7 +5,7 @@
 #include <Storages/DataLakes/StorageDeltaLake.h>
 #include <TableFunctions/ITableFunctionDataLake.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionS3.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
 #include "registerTableFunctions.h"
 
 namespace DB
@@ -16,17 +16,17 @@ struct TableFunctionDeltaLakeName
     static constexpr auto name = "deltaLake";
 };
 
-using TableFunctionDeltaLake = ITableFunctionDataLake<TableFunctionDeltaLakeName, StorageDeltaLakeS3, TableFunctionS3>;
-
-void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionDeltaLake>(
-        {.documentation = {
-            .description=R"(The table function can be used to read the DeltaLake table stored on object store.)",
-            .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}},
-            .categories{"DataLake"}},
-         .allow_readonly = false});
-}
+// using TableFunctionDeltaLake = ITableFunctionDataLake<TableFunctionDeltaLakeName, StorageDeltaLakeS3, TableFunctionS3>;
+//
+// void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
+// {
+//     factory.registerFunction<TableFunctionDeltaLake>(
+//         {.documentation = {
+//             .description=R"(The table function can be used to read the DeltaLake table stored on object store.)",
+//             .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}},
+//             .categories{"DataLake"}},
+//          .allow_readonly = false});
+// }
 
 }
 
diff --git a/src/TableFunctions/TableFunctionHDFS.cpp b/src/TableFunctions/TableFunctionHDFS.cpp
deleted file mode 100644
index 8d48a7ba30e..00000000000
--- a/src/TableFunctions/TableFunctionHDFS.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-#include "config.h"
-#include "registerTableFunctions.h"
-
-#if USE_HDFS
-#include <Storages/HDFS/StorageHDFS.h>
-#include <Storages/ColumnsDescription.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionHDFS.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Interpreters/Context.h>
-#include <Access/Common/AccessFlags.h>
-
-namespace DB
-{
-
-StoragePtr TableFunctionHDFS::getStorage(
-    const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
-    const std::string & table_name, const String & compression_method_) const
-{
-    return std::make_shared<StorageHDFS>(
-        source,
-        StorageID(getDatabaseName(), table_name),
-        format_,
-        columns,
-        ConstraintsDescription{},
-        String{},
-        global_context,
-        compression_method_);
-}
-
-ColumnsDescription TableFunctionHDFS::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const
-{
-    if (structure == "auto")
-    {
-        context->checkAccess(getSourceAccessType());
-        return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context);
-    }
-
-    return parseColumnsListFromString(structure, context);
-}
-
-std::unordered_set<String> TableFunctionHDFS::getVirtualsToCheckBeforeUsingStructureHint() const
-{
-    auto virtual_column_names = StorageHDFS::getVirtualColumnNames();
-    return {virtual_column_names.begin(), virtual_column_names.end()};
-}
-
-void registerTableFunctionHDFS(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionHDFS>();
-}
-
-}
-#endif
diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h
deleted file mode 100644
index 3a719496b26..00000000000
--- a/src/TableFunctions/TableFunctionHDFS.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_HDFS
-
-#include <TableFunctions/ITableFunctionFileLike.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files
- *
- */
-class TableFunctionHDFS : public ITableFunctionFileLike
-{
-public:
-    static constexpr auto name = "hdfs";
-    static constexpr auto signature = " - uri\n"
-                                      " - uri, format\n"
-                                      " - uri, format, structure\n"
-                                      " - uri, format, structure, compression_method\n";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    String getSignature() const override
-    {
-        return signature;
-    }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-
-    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
-
-private:
-    StoragePtr getStorage(
-        const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
-        const std::string & table_name, const String & compression_method_) const override;
-    const char * getStorageTypeName() const override { return "HDFS"; }
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp
deleted file mode 100644
index 6fb7ed0fce5..00000000000
--- a/src/TableFunctions/TableFunctionHDFSCluster.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include "config.h"
-
-#if USE_HDFS
-
-#include <TableFunctions/TableFunctionHDFSCluster.h>
-#include <TableFunctions/TableFunctionFactory.h>
-
-#include <Storages/HDFS/StorageHDFSCluster.h>
-#include <Storages/HDFS/StorageHDFS.h>
-#include "registerTableFunctions.h"
-
-#include <memory>
-
-
-namespace DB
-{
-
-StoragePtr TableFunctionHDFSCluster::getStorage(
-    const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context,
-    const std::string & table_name, const String & /*compression_method_*/) const
-{
-    StoragePtr storage;
-    if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
-    {
-        /// On worker node this uri won't contains globs
-        storage = std::make_shared<StorageHDFS>(
-            filename,
-            StorageID(getDatabaseName(), table_name),
-            format,
-            columns,
-            ConstraintsDescription{},
-            String{},
-            context,
-            compression_method,
-            /*distributed_processing=*/true,
-            nullptr);
-    }
-    else
-    {
-        storage = std::make_shared<StorageHDFSCluster>(
-            context,
-            cluster_name,
-            filename,
-            StorageID(getDatabaseName(), table_name),
-            format,
-            columns,
-            ConstraintsDescription{},
-            compression_method,
-            structure != "auto");
-    }
-    return storage;
-}
-
-void registerTableFunctionHDFSCluster(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionHDFSCluster>();
-}
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h
deleted file mode 100644
index 0253217feb7..00000000000
--- a/src/TableFunctions/TableFunctionHDFSCluster.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_HDFS
-
-#include <TableFunctions/ITableFunctionFileLike.h>
-#include <TableFunctions/TableFunctionHDFS.h>
-#include <TableFunctions/ITableFunctionCluster.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/**
- * hdfsCluster(cluster, URI, format, structure, compression_method)
- * A table function, which allows to process many files from HDFS on a specific cluster
- * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
- * in HDFS file path and dispatch each file dynamically.
- * On worker node it asks initiator about next task to process, processes it.
- * This is repeated until the tasks are finished.
- */
-class TableFunctionHDFSCluster : public ITableFunctionCluster<TableFunctionHDFS>
-{
-public:
-    static constexpr auto name = "hdfsCluster";
-    static constexpr auto signature = " - cluster_name, uri\n"
-                                      " - cluster_name, uri, format\n"
-                                      " - cluster_name, uri, format, structure\n"
-                                      " - cluster_name, uri, format, structure, compression_method\n";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    String getSignature() const override
-    {
-        return signature;
-    }
-
-protected:
-    StoragePtr getStorage(
-        const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context,
-        const std::string & table_name, const String & compression_method_) const override;
-
-    const char * getStorageTypeName() const override { return "HDFSCluster"; }
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp
index 436e708b72d..c6d84504c40 100644
--- a/src/TableFunctions/TableFunctionHudi.cpp
+++ b/src/TableFunctions/TableFunctionHudi.cpp
@@ -5,7 +5,7 @@
 #include <Storages/DataLakes/StorageHudi.h>
 #include <TableFunctions/ITableFunctionDataLake.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionS3.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
 #include "registerTableFunctions.h"
 
 namespace DB
@@ -15,17 +15,17 @@ struct TableFunctionHudiName
 {
     static constexpr auto name = "hudi";
 };
-using TableFunctionHudi = ITableFunctionDataLake<TableFunctionHudiName, StorageHudiS3, TableFunctionS3>;
-
-void registerTableFunctionHudi(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionHudi>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the Hudi table stored on object store.)",
-            .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}},
-            .categories{"DataLake"}},
-         .allow_readonly = false});
-}
+// using TableFunctionHudi = ITableFunctionDataLake<TableFunctionHudiName, StorageHudiS3, TableFunctionS3>;
+//
+// void registerTableFunctionHudi(TableFunctionFactory & factory)
+// {
+//     factory.registerFunction<TableFunctionHudi>(
+//         {.documentation
+//          = {.description=R"(The table function can be used to read the Hudi table stored on object store.)",
+//             .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}},
+//             .categories{"DataLake"}},
+//          .allow_readonly = false});
+// }
 }
 
 #endif
diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp
index d37aace01c6..1a28f9292d1 100644
--- a/src/TableFunctions/TableFunctionIceberg.cpp
+++ b/src/TableFunctions/TableFunctionIceberg.cpp
@@ -5,7 +5,7 @@
 #include <Storages/DataLakes/Iceberg/StorageIceberg.h>
 #include <TableFunctions/ITableFunctionDataLake.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionS3.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
 #include "registerTableFunctions.h"
 
 
@@ -17,7 +17,10 @@ struct TableFunctionIcebergName
     static constexpr auto name = "iceberg";
 };
 
-using TableFunctionIceberg = ITableFunctionDataLake<TableFunctionIcebergName, StorageIceberg, TableFunctionS3>;
+using TableFunctionIceberg = ITableFunctionDataLake<
+    TableFunctionIcebergName,
+    StorageIceberg<S3StorageSettings>,
+    TableFunctionS3>;
 
 void registerTableFunctionIceberg(TableFunctionFactory & factory)
 {
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
new file mode 100644
index 00000000000..d009a9347f3
--- /dev/null
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -0,0 +1,224 @@
+#include "config.h"
+
+#include <Interpreters/Context.h>
+#include <TableFunctions/TableFunctionFactory.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
+#include <TableFunctions/TableFunctionObjectStorageCluster.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
+#include <Access/Common/AccessFlags.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/ObjectStorage/HDFSConfiguration.h>
+#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/NamedCollectionsHelpers.h>
+#include <Analyzer/TableFunctionNode.h>
+#include <Formats/FormatFactory.h>
+#include <Analyzer/FunctionNode.h>
+#include "registerTableFunctions.h"
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
+}
+
+static void initializeConfiguration(
+    StorageObjectStorageConfiguration & configuration,
+    ASTs & engine_args,
+    ContextPtr local_context,
+    bool with_table_structure)
+{
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+        configuration.fromNamedCollection(*named_collection);
+    else
+        configuration.fromAST(engine_args, local_context, with_table_structure);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+ObjectStoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
+{
+    if (!object_storage)
+        object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly);
+    return object_storage;
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+std::vector<size_t> TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
+{
+    auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
+    auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
+    size_t table_function_arguments_size = table_function_arguments_nodes.size();
+
+    std::vector<size_t> result;
+    for (size_t i = 0; i < table_function_arguments_size; ++i)
+    {
+        auto * function_node = table_function_arguments_nodes[i]->as<FunctionNode>();
+        if (function_node && function_node->getFunctionName() == "headers")
+            result.push_back(i);
+    }
+    return result;
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
+{
+    Configuration::addStructureToArgs(args, structure, context);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
+{
+    configuration = std::make_shared<Configuration>();
+    initializeConfiguration(*configuration, engine_args, local_context, true);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context)
+{
+    /// Clone ast function, because we can modify its arguments like removing headers.
+    auto ast_copy = ast_function->clone();
+    ASTs & args_func = ast_copy->children;
+    if (args_func.size() != 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
+
+    auto & args = args_func.at(0)->children;
+    parseArgumentsImpl(args, context);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+ColumnsDescription TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
+{
+    if (configuration->structure == "auto")
+    {
+        context->checkAccess(getSourceAccessType());
+        auto storage = getObjectStorage(context, !is_insert_query);
+        return StorageObjectStorage<StorageSettings>::getTableStructureFromData(storage, configuration, std::nullopt, context);
+    }
+
+    return parseColumnsListFromString(configuration->structure, context);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+bool TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
+{
+    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+std::unordered_set<String> TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
+{
+    auto virtual_column_names = StorageObjectStorage<StorageSettings>::getVirtualColumnNames();
+    return {virtual_column_names.begin(), virtual_column_names.end()};
+}
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::executeImpl(
+    const ASTPtr & /* ast_function */,
+    ContextPtr context,
+    const std::string & table_name,
+    ColumnsDescription cached_columns,
+    bool is_insert_query) const
+{
+    ColumnsDescription columns;
+    if (configuration->structure != "auto")
+        columns = parseColumnsListFromString(configuration->structure, context);
+    else if (!structure_hint.empty())
+        columns = structure_hint;
+    else if (!cached_columns.empty())
+        columns = cached_columns;
+
+    StoragePtr storage = std::make_shared<StorageObjectStorage<StorageSettings>>(
+        configuration,
+        getObjectStorage(context, !is_insert_query),
+        Definition::storage_type_name,
+        context,
+        StorageID(getDatabaseName(), table_name),
+        columns,
+        ConstraintsDescription{},
+        String{},
+        /// No format_settings for table function Azure
+        std::nullopt,
+        /* distributed_processing */ false,
+        nullptr);
+
+    storage->startup();
+    return storage;
+}
+
+void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
+{
+#if USE_AWS_S3
+    factory.registerFunction<TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>>(
+    {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on AWS S3.)",
+            .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""}
+        },
+        .categories{"DataLake"}},
+        .allow_readonly = false
+    });
+
+    factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>>(
+    {
+        .allow_readonly = false
+    });
+
+    factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>>(
+    {
+        .allow_readonly = false
+    });
+    factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>>(
+    {
+        .allow_readonly = false
+    });
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+    factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>>(
+    {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on Azure Blob Storage.)",
+            .examples{
+            {
+                "azureBlobStorage",
+                "SELECT * FROM  azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, "
+                "[account_name, account_key, format, compression, structure])", ""
+            }}
+        },
+        .allow_readonly = false
+    });
+#endif
+#if USE_HDFS
+    factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>>(
+    {
+        .allow_readonly = false
+    });
+#endif
+}
+
+#if USE_AZURE_BLOB_STORAGE
+template class TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorage<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+#if USE_AWS_S3
+template class TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorage<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>;
+#endif
+
+#if USE_HDFS
+template class TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+template class TableFunctionObjectStorage<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
+
+}
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
new file mode 100644
index 00000000000..1df0ba2f843
--- /dev/null
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -0,0 +1,150 @@
+#pragma once
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <TableFunctions/ITableFunction.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
+
+
+namespace DB
+{
+
+class Context;
+class StorageS3Configuration;
+class StorageAzureBlobConfiguration;
+class StorageHDFSConfiguration;
+struct S3StorageSettings;
+struct AzureStorageSettings;
+struct HDFSStorageSettings;
+
+struct AzureDefinition
+{
+    static constexpr auto name = "azureBlobStorage";
+    static constexpr auto storage_type_name = "Azure";
+    static constexpr auto signature = " - connection_string, container_name, blobpath\n"
+                                      " - connection_string, container_name, blobpath, structure \n"
+                                      " - connection_string, container_name, blobpath, format \n"
+                                      " - connection_string, container_name, blobpath, format, compression \n"
+                                      " - connection_string, container_name, blobpath, format, compression, structure \n"
+                                      " - storage_account_url, container_name, blobpath, account_name, account_key\n"
+                                      " - storage_account_url, container_name, blobpath, account_name, account_key, structure\n"
+                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
+                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
+                                      " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n";
+};
+
+struct S3Definition
+{
+    static constexpr auto name = "s3";
+    static constexpr auto storage_type_name = "S3";
+    static constexpr auto signature = " - url\n"
+                                      " - url, format\n"
+                                      " - url, format, structure\n"
+                                      " - url, format, structure, compression_method\n"
+                                      " - url, access_key_id, secret_access_key\n"
+                                      " - url, access_key_id, secret_access_key, session_token\n"
+                                      " - url, access_key_id, secret_access_key, format\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format\n"
+                                      " - url, access_key_id, secret_access_key, format, structure\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format, structure\n"
+                                      " - url, access_key_id, secret_access_key, format, structure, compression_method\n"
+                                      " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
+                                      "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
+};
+
+struct GCSDefinition
+{
+    static constexpr auto name = "gcs";
+    static constexpr auto storage_type_name = "GCS";
+    static constexpr auto signature = S3Definition::signature;
+};
+
+struct COSNDefinition
+{
+    static constexpr auto name = "cosn";
+    static constexpr auto storage_type_name = "COSN";
+    static constexpr auto signature = S3Definition::signature;
+};
+
+struct OSSDefinition
+{
+    static constexpr auto name = "oss";
+    static constexpr auto storage_type_name = "OSS";
+    static constexpr auto signature = S3Definition::signature;
+};
+
+struct HDFSDefinition
+{
+    static constexpr auto name = "hdfs";
+    static constexpr auto storage_type_name = "HDFS";
+    static constexpr auto signature = " - uri\n"
+                                      " - uri, format\n"
+                                      " - uri, format, structure\n"
+                                      " - uri, format, structure, compression_method\n";
+};
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+class TableFunctionObjectStorage : public ITableFunction
+{
+public:
+    static constexpr auto name = Definition::name;
+    static constexpr auto signature = Definition::signature;
+
+    static size_t getMaxNumberOfArguments() { return 8; }
+
+    String getName() const override { return name; }
+
+    virtual String getSignature() const { return signature; }
+
+    bool hasStaticStructure() const override { return configuration->structure != "auto"; }
+
+    bool needStructureHint() const override { return configuration->structure == "auto"; }
+
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
+
+    bool supportsReadingSubsetOfColumns(const ContextPtr & context) override;
+
+    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
+
+    virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
+
+    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
+
+protected:
+    StoragePtr executeImpl(
+        const ASTPtr & ast_function,
+        ContextPtr context,
+        const std::string & table_name,
+        ColumnsDescription cached_columns,
+        bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return Definition::storage_type_name; }
+
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
+    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
+    ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const;
+
+    mutable typename StorageObjectStorage<StorageSettings>::ConfigurationPtr configuration;
+    mutable ObjectStoragePtr object_storage;
+    ColumnsDescription structure_hint;
+
+    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
+};
+
+#if USE_AWS_S3
+using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>;
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+#if USE_HDFS
+using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
+}
+
+#endif
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
new file mode 100644
index 00000000000..1d27a857cea
--- /dev/null
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -0,0 +1,113 @@
+#include "config.h"
+
+#include <TableFunctions/TableFunctionFactory.h>
+#include <TableFunctions/TableFunctionObjectStorageCluster.h>
+#include <TableFunctions/registerTableFunctions.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
+#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/ObjectStorage/HDFSConfiguration.h>
+#include <Storages/ObjectStorage/AzureConfiguration.h>
+
+
+namespace DB
+{
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>::executeImpl(
+    const ASTPtr & /*function*/, ContextPtr context,
+    const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
+{
+    using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
+
+    StoragePtr storage;
+    ColumnsDescription columns;
+    bool structure_argument_was_provided = Base::configuration->structure != "auto";
+
+    if (structure_argument_was_provided)
+    {
+        columns = parseColumnsListFromString(Base::configuration->structure, context);
+    }
+    else if (!Base::structure_hint.empty())
+    {
+        columns = Base::structure_hint;
+    }
+
+    if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
+    {
+        /// On worker node this filename won't contains globs
+        storage = std::make_shared<StorageObjectStorage<StorageSettings>>(
+            Base::configuration,
+            Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query),
+            Definition::storage_type_name,
+            context,
+            StorageID(Base::getDatabaseName(), table_name),
+            columns,
+            ConstraintsDescription{},
+            /* comment */String{},
+            /* format_settings */std::nullopt, /// No format_settings
+            /* distributed_processing */ true,
+            /*partition_by_=*/nullptr);
+    }
+    else
+    {
+        storage = std::make_shared<StorageObjectStorageCluster<Definition, StorageSettings, Configuration>>(
+            ITableFunctionCluster<Base>::cluster_name,
+            Base::configuration,
+            Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query),
+            Definition::storage_type_name,
+            StorageID(Base::getDatabaseName(), table_name),
+            columns,
+            ConstraintsDescription{},
+            context,
+            structure_argument_was_provided);
+    }
+
+    storage->startup();
+    return storage;
+}
+
+
+void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
+{
+#if USE_AWS_S3
+    factory.registerFunction<TableFunctionS3Cluster>(
+    {
+        .documentation = {
+            .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
+            .examples{{"azureBlobStorageCluster", "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
+            .allow_readonly = false
+        }
+    );
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+    factory.registerFunction<TableFunctionAzureBlobCluster>(
+    {
+        .documentation = {
+            .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
+            .examples{{"azureBlobStorageCluster", "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
+            .allow_readonly = false
+        }
+    );
+#endif
+
+#if USE_HDFS
+    factory.registerFunction<TableFunctionHDFSCluster>();
+#endif
+}
+
+#if USE_AWS_S3
+template class TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+template class TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+#if USE_HDFS
+template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
+}
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h
new file mode 100644
index 00000000000..461456e37df
--- /dev/null
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h
@@ -0,0 +1,91 @@
+#pragma once
+#include "config.h"
+#include <TableFunctions/ITableFunction.h>
+#include <TableFunctions/ITableFunctionCluster.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
+
+
+namespace DB
+{
+
+class Context;
+
+class StorageS3Settings;
+class StorageAzureBlobSettings;
+class StorageS3Configuration;
+class StorageAzureBlobConfiguration;
+
+struct AzureClusterDefinition
+{
+    /**
+    * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure)
+    * A table function, which allows to process many files from Azure Blob Storage on a specific cluster
+    * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
+    * in Azure Blob Storage file path and dispatch each file dynamically.
+    * On worker node it asks initiator about next task to process, processes it.
+    * This is repeated until the tasks are finished.
+    */
+    static constexpr auto name = "azureBlobStorageCluster";
+    static constexpr auto storage_type_name = "AzureBlobStorageCluster";
+    static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]";
+};
+
+struct S3ClusterDefinition
+{
+    static constexpr auto name = "s3Cluster";
+    static constexpr auto storage_type_name = "S3Cluster";
+    static constexpr auto signature = " - cluster, url\n"
+                                      " - cluster, url, format\n"
+                                      " - cluster, url, format, structure\n"
+                                      " - cluster, url, access_key_id, secret_access_key\n"
+                                      " - cluster, url, format, structure, compression_method\n"
+                                      " - cluster, url, access_key_id, secret_access_key, format\n"
+                                      " - cluster, url, access_key_id, secret_access_key, format, structure\n"
+                                      " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n"
+                                      " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
+                                      "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
+};
+
+struct HDFSClusterDefinition
+{
+    static constexpr auto name = "hdfsCluster";
+    static constexpr auto storage_type_name = "HDFSCluster";
+    static constexpr auto signature = " - cluster_name, uri\n"
+                                      " - cluster_name, uri, format\n"
+                                      " - cluster_name, uri, format, structure\n"
+                                      " - cluster_name, uri, format, structure, compression_method\n";
+};
+
+template <typename Definition, typename StorageSettings, typename Configuration>
+class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, StorageSettings, Configuration>>
+{
+public:
+    static constexpr auto name = Definition::name;
+    static constexpr auto signature = Definition::signature;
+
+    String getName() const override { return name; }
+    String getSignature() const override { return signature; }
+
+protected:
+    StoragePtr executeImpl(
+        const ASTPtr & ast_function,
+        ContextPtr context,
+        const std::string & table_name,
+        ColumnsDescription cached_columns,
+        bool is_insert_query) const override;
+
+    const char * getStorageTypeName() const override { return Definition::storage_type_name; }
+};
+
+#if USE_AWS_S3
+using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+#endif
+
+#if USE_AZURE_BLOB_STORAGE
+using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+#endif
+
+#if USE_HDFS
+using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
+}
diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp
deleted file mode 100644
index a9c5a5c99f0..00000000000
--- a/src/TableFunctions/TableFunctionS3.cpp
+++ /dev/null
@@ -1,464 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <IO/S3Common.h>
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/Context.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionS3.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Access/Common/AccessFlags.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Parsers/ASTFunction.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Storages/StorageS3.h>
-#include <Storages/StorageURL.h>
-#include <Storages/NamedCollectionsHelpers.h>
-#include <Formats/FormatFactory.h>
-#include "registerTableFunctions.h"
-#include <Analyzer/FunctionNode.h>
-#include <Analyzer/TableFunctionNode.h>
-
-#include <boost/algorithm/string.hpp>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int LOGICAL_ERROR;
-}
-
-
-std::vector<size_t> TableFunctionS3::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
-{
-    auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
-    auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
-    size_t table_function_arguments_size = table_function_arguments_nodes.size();
-
-    std::vector<size_t> result;
-
-    for (size_t i = 0; i < table_function_arguments_size; ++i)
-    {
-        auto * function_node = table_function_arguments_nodes[i]->as<FunctionNode>();
-        if (function_node && function_node->getFunctionName() == "headers")
-            result.push_back(i);
-    }
-
-    return result;
-}
-
-/// This is needed to avoid copy-paste. Because s3Cluster arguments only differ in additional argument (first) - cluster name
-void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context)
-{
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context))
-    {
-        StorageS3::processNamedCollectionResult(configuration, *named_collection);
-        if (configuration.format == "auto")
-        {
-            String file_path = named_collection->getOrDefault<String>("filename", Poco::URI(named_collection->get<String>("url")).getPath());
-            configuration.format = FormatFactory::instance().getFormatFromFileName(file_path, true);
-        }
-    }
-    else
-    {
-
-        size_t count = StorageURL::evalArgsAndCollectHeaders(args, configuration.headers_from_ast, context);
-
-        if (count == 0 || count > 7)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature());
-
-        std::unordered_map<std::string_view, size_t> args_to_idx;
-
-        bool no_sign_request = false;
-
-        /// For 2 arguments we support 2 possible variants:
-        /// - s3(source, format)
-        /// - s3(source, NOSIGN)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
-        if (count == 2)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-                no_sign_request = true;
-            else
-                args_to_idx = {{"format", 1}};
-        }
-        /// For 3 arguments we support 3 possible variants:
-        /// - s3(source, format, structure)
-        /// - s3(source, access_key_id, secret_access_key)
-        /// - s3(source, NOSIGN, format)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a format name or not.
-        else if (count == 3)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/access_key_id/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                no_sign_request = true;
-                args_to_idx = {{"format", 2}};
-            }
-            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
-                args_to_idx = {{"format", 1}, {"structure", 2}};
-            else
-                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
-        }
-        /// For 4 arguments we support 4 possible variants:
-        /// - s3(source, format, structure, compression_method),
-        /// - s3(source, access_key_id, secret_access_key, format),
-        /// - s3(source, access_key_id, secret_access_key, session_token)
-        /// - s3(source, NOSIGN, format, structure)
-        /// We can distinguish them by looking at the 2-nd and 4-th argument: check if it's a format name or not.
-        else if (count == 4)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/access_key_id/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                no_sign_request = true;
-                args_to_idx = {{"format", 2}, {"structure", 3}};
-            }
-            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
-            {
-                args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}};
-            }
-            else
-            {
-                auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
-                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-                {
-                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
-                }
-                else
-                {
-                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}};
-                }
-            }
-        }
-        /// For 5 arguments we support 3 possible variants:
-        /// - s3(source, access_key_id, secret_access_key, format, structure)
-        /// - s3(source, access_key_id, secret_access_key, session_token, format)
-        /// - s3(source, NOSIGN, format, structure, compression_method)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or no,
-        /// and by the 4-th argument, check if it's a format name or not
-        else if (count == 5)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "NOSIGN/access_key_id");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                no_sign_request = true;
-                args_to_idx = {{"format", 2}, {"structure", 3}, {"compression_method", 4}};
-            }
-            else
-            {
-                auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
-                if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-                {
-                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}};
-                }
-                else
-                {
-                    args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}};
-                }
-            }
-        }
-        // For 6 arguments we support 2 possible variants:
-        /// - s3(source, access_key_id, secret_access_key, format, structure, compression_method)
-        /// - s3(source, access_key_id, secret_access_key, session_token, format, structure)
-        /// We can distinguish them by looking at the 4-th argument: check if it's a format name or not
-        else if (count == 6)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
-            if (fourth_arg == "auto" || FormatFactory::instance().getAllFormats().contains(fourth_arg))
-            {
-                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}};
-            }
-            else
-            {
-                args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}};
-            }
-        }
-        else if (count == 7)
-        {
-            args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"session_token", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}};
-        }
-
-        /// This argument is always the first
-        String url = checkAndGetLiteralArgument<String>(args[0], "url");
-        configuration.url = S3::URI(url);
-
-        if (args_to_idx.contains("format"))
-        {
-            auto format = checkAndGetLiteralArgument<String>(args[args_to_idx["format"]], "format");
-            /// Set format to configuration only of it's not 'auto',
-            /// because we can have default format set in configuration.
-            if (format != "auto")
-                configuration.format = format;
-        }
-
-        if (args_to_idx.contains("structure"))
-            configuration.structure = checkAndGetLiteralArgument<String>(args[args_to_idx["structure"]], "structure");
-
-        if (args_to_idx.contains("compression_method"))
-            configuration.compression_method = checkAndGetLiteralArgument<String>(args[args_to_idx["compression_method"]], "compression_method");
-
-        if (args_to_idx.contains("access_key_id"))
-            configuration.auth_settings.access_key_id = checkAndGetLiteralArgument<String>(args[args_to_idx["access_key_id"]], "access_key_id");
-
-        if (args_to_idx.contains("secret_access_key"))
-            configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument<String>(args[args_to_idx["secret_access_key"]], "secret_access_key");
-
-        if (args_to_idx.contains("session_token"))
-            configuration.auth_settings.session_token = checkAndGetLiteralArgument<String>(args[args_to_idx["session_token"]], "session_token");
-
-        configuration.auth_settings.no_sign_request = no_sign_request;
-
-        if (configuration.format == "auto")
-            configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(url).getPath(), true);
-    }
-
-    configuration.keys = {configuration.url.key};
-}
-
-void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
-{
-    /// Clone ast function, because we can modify its arguments like removing headers.
-    auto ast_copy = ast_function->clone();
-
-    /// Parse args
-    ASTs & args_func = ast_function->children;
-
-    if (args_func.size() != 1)
-        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName());
-
-    auto & args = args_func.at(0)->children;
-
-    parseArgumentsImpl(args, context);
-}
-
-void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
-{
-    if (tryGetNamedCollectionWithOverrides(args, context))
-    {
-        /// In case of named collection, just add key-value pair "structure='...'"
-        /// at the end of arguments to override existed structure.
-        ASTs equal_func_args = {std::make_shared<ASTIdentifier>("structure"), std::make_shared<ASTLiteral>(structure)};
-        auto equal_func = makeASTFunction("equals", std::move(equal_func_args));
-        args.push_back(equal_func);
-    }
-    else
-    {
-        HTTPHeaderEntries tmp_headers;
-        size_t count = StorageURL::evalArgsAndCollectHeaders(args, tmp_headers, context);
-
-        if (count == 0 || count > getMaxNumberOfArguments())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), count);
-
-        auto structure_literal = std::make_shared<ASTLiteral>(structure);
-
-        /// s3(s3_url)
-        if (count == 1)
-        {
-            /// Add format=auto before structure argument.
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
-            args.push_back(structure_literal);
-        }
-        /// s3(s3_url, format) or s3(s3_url, NOSIGN)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not.
-        else if (count == 2)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            /// If there is NOSIGN, add format=auto before structure.
-            if (boost::iequals(second_arg, "NOSIGN"))
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-            args.push_back(structure_literal);
-        }
-        /// s3(source, format, structure) or
-        /// s3(source, access_key_id, secret_access_key) or
-        /// s3(source, NOSIGN, format)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
-        else if (count == 3)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                args.push_back(structure_literal);
-            }
-            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
-            {
-                args[count - 1] = structure_literal;
-            }
-            else
-            {
-                /// Add format=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(structure_literal);
-            }
-        }
-        /// s3(source, format, structure, compression_method) or
-        /// s3(source, access_key_id, secret_access_key, format) or
-        /// s3(source, NOSIGN, format, structure)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither.
-        else if (count == 4)
-        {
-            auto second_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            if (boost::iequals(second_arg, "NOSIGN"))
-            {
-                args[count - 1] = structure_literal;
-            }
-            else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg))
-            {
-                args[count - 2] = structure_literal;
-            }
-            else
-            {
-                args.push_back(structure_literal);
-            }
-        }
-        /// s3(source, access_key_id, secret_access_key, format, structure) or
-        /// s3(source, NOSIGN, format, structure, compression_method)
-        /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not.
-        else if (count == 5)
-        {
-            auto sedond_arg = checkAndGetLiteralArgument<String>(args[1], "format/NOSIGN");
-            if (boost::iequals(sedond_arg, "NOSIGN"))
-            {
-                args[count - 2] = structure_literal;
-            }
-            else
-            {
-                args[count - 1] = structure_literal;
-            }
-        }
-        /// s3(source, access_key_id, secret_access_key, format, structure, compression)
-        else if (count == 6)
-        {
-            args[count - 2] = structure_literal;
-        }
-    }
-}
-
-ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const
-{
-    if (configuration.structure == "auto")
-    {
-        context->checkAccess(getSourceAccessType());
-        configuration.update(context);
-        return StorageS3::getTableStructureFromData(configuration, std::nullopt, context);
-    }
-
-    return parseColumnsListFromString(configuration.structure, context);
-}
-
-bool TableFunctionS3::supportsReadingSubsetOfColumns(const ContextPtr & context)
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration.format, context);
-}
-
-std::unordered_set<String> TableFunctionS3::getVirtualsToCheckBeforeUsingStructureHint() const
-{
-    auto virtual_column_names = StorageS3::getVirtualColumnNames();
-    return {virtual_column_names.begin(), virtual_column_names.end()};
-}
-
-StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool /*is_insert_query*/) const
-{
-    S3::URI s3_uri (configuration.url);
-
-    ColumnsDescription columns;
-    if (configuration.structure != "auto")
-        columns = parseColumnsListFromString(configuration.structure, context);
-    else if (!structure_hint.empty())
-        columns = structure_hint;
-    else if (!cached_columns.empty())
-        columns = cached_columns;
-
-    StoragePtr storage = std::make_shared<StorageS3>(
-        configuration,
-        context,
-        StorageID(getDatabaseName(), table_name),
-        columns,
-        ConstraintsDescription{},
-        String{},
-        /// No format_settings for table function S3
-        std::nullopt);
-
-    storage->startup();
-
-    return storage;
-}
-
-
-class TableFunctionGCS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "gcs";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "GCS"; }
-};
-
-class TableFunctionCOS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "cosn";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "COSN"; }
-};
-
-class TableFunctionOSS : public TableFunctionS3
-{
-public:
-    static constexpr auto name = "oss";
-    std::string getName() const override
-    {
-        return name;
-    }
-private:
-    const char * getStorageTypeName() const override { return "OSS"; }
-};
-
-
-void registerTableFunctionGCS(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionGCS>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the data stored on Google Cloud Storage.)",
-            .examples{{"gcs", "SELECT * FROM gcs(url, hmac_key, hmac_secret)", ""}},
-            .categories{"DataLake"}},
-         .allow_readonly = false});
-}
-
-void registerTableFunctionS3(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionS3>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the data stored on AWS S3.)",
-            .examples{{"s3", "SELECT * FROM s3(url, access_key_id, secret_access_key)", ""}},
-            .categories{"DataLake"}},
-         .allow_readonly = false});
-}
-
-
-void registerTableFunctionCOS(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionCOS>();
-}
-
-void registerTableFunctionOSS(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionOSS>();
-}
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h
deleted file mode 100644
index fa73c1d313e..00000000000
--- a/src/TableFunctions/TableFunctionS3.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <TableFunctions/ITableFunction.h>
-#include <Storages/StorageS3.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3.
- */
-class TableFunctionS3 : public ITableFunction
-{
-public:
-    static constexpr auto name = "s3";
-    static constexpr auto signature = " - url\n"
-                                      " - url, format\n"
-                                      " - url, format, structure\n"
-                                      " - url, format, structure, compression_method\n"
-                                      " - url, access_key_id, secret_access_key\n"
-                                      " - url, access_key_id, secret_access_key, session_token\n"
-                                      " - url, access_key_id, secret_access_key, format\n"
-                                      " - url, access_key_id, secret_access_key, session_token, format\n"
-                                      " - url, access_key_id, secret_access_key, format, structure\n"
-                                      " - url, access_key_id, secret_access_key, session_token, format, structure\n"
-                                      " - url, access_key_id, secret_access_key, format, structure, compression_method\n"
-                                      " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
-                                      "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
-
-    static size_t getMaxNumberOfArguments() { return 6; }
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    virtual String getSignature() const
-    {
-        return signature;
-    }
-
-    bool hasStaticStructure() const override { return configuration.structure != "auto"; }
-
-    bool needStructureHint() const override { return configuration.structure == "auto"; }
-
-    void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
-
-    bool supportsReadingSubsetOfColumns(const ContextPtr & context) override;
-
-    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
-
-    virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
-
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
-
-protected:
-
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function,
-        ContextPtr context,
-        const std::string & table_name,
-        ColumnsDescription cached_columns,
-        bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "S3"; }
-
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
-    void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-
-    mutable StorageS3::Configuration configuration;
-    ColumnsDescription structure_hint;
-
-private:
-
-    std::vector<size_t> skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override;
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp
deleted file mode 100644
index ce96f7f580b..00000000000
--- a/src/TableFunctions/TableFunctionS3Cluster.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <TableFunctions/TableFunctionS3Cluster.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Storages/StorageS3.h>
-
-#include "registerTableFunctions.h"
-
-#include <memory>
-
-
-namespace DB
-{
-
-StoragePtr TableFunctionS3Cluster::executeImpl(
-    const ASTPtr & /*function*/, ContextPtr context,
-    const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const
-{
-    StoragePtr storage;
-    ColumnsDescription columns;
-    bool structure_argument_was_provided = configuration.structure != "auto";
-
-    if (structure_argument_was_provided)
-    {
-        columns = parseColumnsListFromString(configuration.structure, context);
-    }
-    else if (!structure_hint.empty())
-    {
-        columns = structure_hint;
-    }
-
-    if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
-    {
-        /// On worker node this filename won't contains globs
-        storage = std::make_shared<StorageS3>(
-            configuration,
-            context,
-            StorageID(getDatabaseName(), table_name),
-            columns,
-            ConstraintsDescription{},
-            /* comment */String{},
-            /* format_settings */std::nullopt, /// No format_settings for S3Cluster
-            /*distributed_processing=*/true);
-    }
-    else
-    {
-        storage = std::make_shared<StorageS3Cluster>(
-            cluster_name,
-            configuration,
-            StorageID(getDatabaseName(), table_name),
-            columns,
-            ConstraintsDescription{},
-            context,
-            structure_argument_was_provided);
-    }
-
-    storage->startup();
-
-    return storage;
-}
-
-
-void registerTableFunctionS3Cluster(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionS3Cluster>();
-}
-
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h
deleted file mode 100644
index 718b0d90de8..00000000000
--- a/src/TableFunctions/TableFunctionS3Cluster.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <TableFunctions/ITableFunction.h>
-#include <TableFunctions/TableFunctionS3.h>
-#include <TableFunctions/ITableFunctionCluster.h>
-#include <Storages/StorageS3Cluster.h>
-
-
-namespace DB
-{
-
-class Context;
-
-/**
- * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure, compression_method)
- * A table function, which allows to process many files from S3 on a specific cluster
- * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
- * in S3 file path and dispatch each file dynamically.
- * On worker node it asks initiator about next task to process, processes it.
- * This is repeated until the tasks are finished.
- */
-class TableFunctionS3Cluster : public ITableFunctionCluster<TableFunctionS3>
-{
-public:
-    static constexpr auto name = "s3Cluster";
-    static constexpr auto signature = " - cluster, url\n"
-                                      " - cluster, url, format\n"
-                                      " - cluster, url, format, structure\n"
-                                      " - cluster, url, access_key_id, secret_access_key\n"
-                                      " - cluster, url, format, structure, compression_method\n"
-                                      " - cluster, url, access_key_id, secret_access_key, format\n"
-                                      " - cluster, url, access_key_id, secret_access_key, format, structure\n"
-                                      " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n"
-                                      " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
-                                      "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
-
-    String getName() const override
-    {
-        return name;
-    }
-
-    String getSignature() const override
-    {
-        return signature;
-    }
-
-protected:
-    StoragePtr executeImpl(
-        const ASTPtr & ast_function,
-        ContextPtr context,
-        const std::string & table_name,
-        ColumnsDescription cached_columns,
-        bool is_insert_query) const override;
-
-    const char * getStorageTypeName() const override { return "S3Cluster"; }
-};
-
-}
-
-#endif
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index 8c18c298f45..627d945fbf3 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -28,26 +28,17 @@ void registerTableFunctions()
 #endif
 
 #if USE_AWS_S3
-    registerTableFunctionS3(factory);
-    registerTableFunctionS3Cluster(factory);
-    registerTableFunctionCOS(factory);
-    registerTableFunctionOSS(factory);
-    registerTableFunctionGCS(factory);
-    registerTableFunctionHudi(factory);
+    // registerTableFunctionS3Cluster(factory);
+    // registerTableFunctionHudi(factory);
 #if USE_PARQUET
-    registerTableFunctionDeltaLake(factory);
+    // registerTableFunctionDeltaLake(factory);
 #endif
 #if USE_AVRO
-    registerTableFunctionIceberg(factory);
+    // registerTableFunctionIceberg(factory);
 #endif
 
 #endif
 
-#if USE_HDFS
-    registerTableFunctionHDFS(factory);
-    registerTableFunctionHDFSCluster(factory);
-#endif
-
 #if USE_HIVE
     registerTableFunctionHive(factory);
 #endif
@@ -75,10 +66,8 @@ void registerTableFunctions()
     registerTableFunctionFormat(factory);
     registerTableFunctionExplain(factory);
 
-#if USE_AZURE_BLOB_STORAGE
-    registerTableFunctionAzureBlobStorage(factory);
-    registerTableFunctionAzureBlobStorageCluster(factory);
-#endif
+    registerTableFunctionObjectStorage(factory);
+    registerTableFunctionObjectStorageCluster(factory);
 
 
 }
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index fae763e7dc8..cefb198273e 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -39,11 +39,6 @@ void registerTableFunctionIceberg(TableFunctionFactory & factory);
 #endif
 #endif
 
-#if USE_HDFS
-void registerTableFunctionHDFS(TableFunctionFactory & factory);
-void registerTableFunctionHDFSCluster(TableFunctionFactory & factory);
-#endif
-
 #if USE_HIVE
 void registerTableFunctionHive(TableFunctionFactory & factory);
 #endif
@@ -73,8 +68,8 @@ void registerTableFunctionFormat(TableFunctionFactory & factory);
 void registerTableFunctionExplain(TableFunctionFactory & factory);
 
 #if USE_AZURE_BLOB_STORAGE
-void registerTableFunctionAzureBlobStorage(TableFunctionFactory & factory);
-void registerTableFunctionAzureBlobStorageCluster(TableFunctionFactory & factory);
+void registerTableFunctionObjectStorage(TableFunctionFactory & factory);
+void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory);
 #endif
 
 void registerTableFunctions();
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index 3cccd07c134..41218e41069 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -29,6 +29,8 @@ def cluster():
             with_azurite=True,
         )
         cluster.start()
+        container_client = cluster.blob_service_client.get_container_client("cont")
+        container_client.create_container()
         yield cluster
     finally:
         cluster.shutdown()
@@ -129,8 +131,10 @@ def test_create_table_connection_string(cluster):
     node = cluster.instances["node"]
     azure_query(
         node,
-        f"CREATE TABLE test_create_table_conn_string (key UInt64, data String) Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}',"
-        f"'cont', 'test_create_connection_string', 'CSV')",
+        f"""
+        CREATE TABLE test_create_table_conn_string (key UInt64, data String)
+        Engine = AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_create_connection_string', 'CSV')
+        """,
     )
 
 
From 6d91d92601c04f160ba95a743fca270371b65eb8 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 12 Feb 2024 18:17:22 +0100
Subject: [PATCH 011/392] Better

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  13 +-
 .../AzureBlobStorage/AzureObjectStorage.cpp   |   8 +-
 .../AzureBlobStorage/AzureObjectStorage.h     |   4 +-
 .../Cached/CachedObjectStorage.cpp            |   2 +-
 .../Cached/CachedObjectStorage.h              |   2 +-
 src/Disks/ObjectStorages/IObjectStorage.cpp   |   6 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |   8 +-
 .../ObjectStorageIteratorAsync.cpp            |  63 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  19 +-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |   4 +-
 .../DataLakes/DeltaLakeMetadataParser.h       |   2 +-
 src/Storages/DataLakes/HudiMetadataParser.h   |   3 +-
 .../DataLakes/Iceberg/IcebergMetadata.cpp     |   1 -
 .../DataLakes/Iceberg/IcebergMetadata.h       |   2 +-
 .../ObjectStorage/AzureConfiguration.cpp      |  11 +
 .../ObjectStorage/AzureConfiguration.h        |   2 +-
 .../ObjectStorage/HDFSConfiguration.h         |   2 +-
 .../ObjectStorage/ReadBufferIterator.cpp      | 179 ++++++
 .../ObjectStorage/ReadBufferIterator.h        | 179 +-----
 .../ObjectStorage/ReadFromObjectStorage.h     | 105 ----
 .../ReadFromStorageObjectStorage.cpp          |  94 +++
 .../ReadFromStorageObjectStorage.h            |  60 ++
 src/Storages/ObjectStorage/S3Configuration.h  |   2 +-
 ....h => StorageObejctStorageConfiguration.h} |  28 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  91 +--
 .../StorageObjectStorageCluster.cpp           |   9 +-
 .../StorageObjectStorageCluster.h             |   1 -
 .../StorageObjectStorageConfiguration.cpp     |  40 ++
 ....h => StorageObjectStorageQuerySettings.h} |   8 +
 .../ObjectStorage/StorageObjectStorageSink.h  |   2 +-
 .../StorageObjectStorageSource.cpp            | 539 +++++++++---------
 .../StorageObjectStorageSource.h              |  98 ++--
 .../StorageObjectStorage_fwd_internal.h       |  11 +
 .../registerStorageObjectStorage.cpp          |  18 +-
 src/Storages/S3Queue/S3QueueSource.cpp        |  17 +-
 src/Storages/S3Queue/S3QueueSource.h          |  25 +-
 src/Storages/S3Queue/S3QueueTableMetadata.h   |   2 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |  32 +-
 src/Storages/S3Queue/StorageS3Queue.h         |   1 -
 src/TableFunctions/ITableFunctionDataLake.h   |   2 +-
 .../TableFunctionObjectStorage.cpp            |  55 +-
 .../TableFunctionObjectStorageCluster.cpp     |  14 +-
 42 files changed, 973 insertions(+), 791 deletions(-)
 create mode 100644 src/Storages/ObjectStorage/ReadBufferIterator.cpp
 delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorage.h
 create mode 100644 src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
 create mode 100644 src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
 rename src/Storages/ObjectStorage/{Configuration.h => StorageObejctStorageConfiguration.h} (73%)
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
 rename src/Storages/ObjectStorage/{Settings.h => StorageObjectStorageQuerySettings.h} (86%)
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index dc636f90be7..f12cc4c1d58 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -208,10 +208,15 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
        /* for_disk_azure_blob_storage= */ true);
 }
 
-void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
+void BackupWriterAzureBlobStorage::copyDataToFile(
+    const String & path_in_backup,
+    const CreateReadBufferFunction & create_read_buffer,
+    UInt64 start_pos,
+    UInt64 length)
 {
-    copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, path_in_backup, settings,
-                     threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
+    copyDataToAzureBlobStorageFile(
+        create_read_buffer, start_pos, length, client, configuration.container,
+        path_in_backup, settings, threadPoolCallbackRunner<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
 BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
@@ -245,7 +250,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
     object_storage->listObjects(key,children,/*max_keys*/0);
     if (children.empty())
         throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
-    return children[0]->metadata.size_bytes;
+    return children[0]->metadata->size_bytes;
 }
 
 std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index 2ca44137442..bbbb5357505 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -128,15 +128,15 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
     return false;
 }
 
-ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const
+ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
 {
     auto settings_ptr = settings.get();
     auto client_ptr = client.get();
 
-    return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, settings_ptr->list_object_keys_size);
+    return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys);
 }
 
-void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
+void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
     auto client_ptr = client.get();
 
@@ -168,7 +168,7 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
 
         if (max_keys)
         {
-            int keys_left = max_keys - static_cast<int>(children.size());
+            size_t keys_left = max_keys - children.size();
             if (keys_left <= 0)
                 break;
             options.PageSizeHint = keys_left;
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index f16c35fb52c..31eb78924f9 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -69,9 +69,9 @@ public:
         SettingsPtr && settings_,
         const String & container_);
 
-    void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
+    void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
 
-    ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
+    ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
 
     std::string getName() const override { return "AzureObjectStorage"; }
 
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index 1444f4c9c76..9f195b787a8 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -180,7 +180,7 @@ std::unique_ptr<IObjectStorage> CachedObjectStorage::cloneObjectStorage(
     return object_storage->cloneObjectStorage(new_namespace, config, config_prefix, context);
 }
 
-void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
+void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
     object_storage->listObjects(path, children, max_keys);
 }
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 437baead7be..ec116b63d01 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -80,7 +80,7 @@ public:
         const std::string & config_prefix,
         ContextPtr context) override;
 
-    void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
+    void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
 
     ObjectMetadata getObjectMetadata(const std::string & path) const override;
 
diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp
index 78fbdcaddfa..d36ef4f414a 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/IObjectStorage.cpp
@@ -24,16 +24,16 @@ bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
     return !files.empty();
 }
 
-void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, int) const
+void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
 }
 
 
-ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const
+ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
 {
     RelativePathsWithMetadata files;
-    listObjects(path_prefix, files, 0);
+    listObjects(path_prefix, files, max_keys);
 
     return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
 }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 7d354e6383d..4955b0e6924 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -54,11 +54,11 @@ struct ObjectMetadata
 struct RelativePathWithMetadata
 {
     String relative_path;
-    ObjectMetadata metadata;
+    std::optional<ObjectMetadata> metadata;
 
     RelativePathWithMetadata() = default;
 
-    RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_)
+    explicit RelativePathWithMetadata(String relative_path_, std::optional<ObjectMetadata> metadata_ = std::nullopt)
         : relative_path(std::move(relative_path_))
         , metadata(std::move(metadata_))
     {}
@@ -111,9 +111,9 @@ public:
     /// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d
     virtual bool existsOrHasAnyChild(const std::string & path) const;
 
-    virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const;
+    virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
 
-    virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const;
+    virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;
 
     /// Get object metadata if supported. It should be possible to receive
     /// at least size of object
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index b7729623a64..62bdd0ed0c8 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -14,27 +14,32 @@ namespace ErrorCodes
 void IObjectStorageIteratorAsync::nextBatch()
 {
     std::lock_guard lock(mutex);
-    if (!is_finished)
+    if (is_finished)
     {
+        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 3");
+        current_batch.clear();
+        current_batch_iterator = current_batch.begin();
+    }
+    else
+    {
+        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 4");
         if (!is_initialized)
         {
             outcome_future = scheduleBatch();
             is_initialized = true;
         }
 
-         BatchAndHasNext next_batch = outcome_future.get();
-         current_batch = std::move(next_batch.batch);
-         accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
-         current_batch_iterator = current_batch.begin();
-         if (next_batch.has_next)
-             outcome_future = scheduleBatch();
-         else
-             is_finished = true;
-    }
-    else
-    {
-        current_batch.clear();
+        chassert(outcome_future.valid());
+        auto [batch, has_next] = outcome_future.get();
+        current_batch = std::move(batch);
         current_batch_iterator = current_batch.begin();
+
+        accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
+
+        if (has_next)
+            outcome_future = scheduleBatch();
+        else
+            is_finished = true;
     }
 }
 
@@ -42,24 +47,10 @@ void IObjectStorageIteratorAsync::next()
 {
     std::lock_guard lock(mutex);
 
-    if (current_batch_iterator != current_batch.end())
-    {
+    if (current_batch_iterator == current_batch.end())
+        nextBatch();
+    else
         ++current_batch_iterator;
-    }
-    else if (!is_finished)
-    {
-        if (outcome_future.valid())
-        {
-            BatchAndHasNext next_batch = outcome_future.get();
-            current_batch = std::move(next_batch.batch);
-            accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
-            current_batch_iterator = current_batch.begin();
-            if (next_batch.has_next)
-                outcome_future = scheduleBatch();
-            else
-                is_finished = true;
-        }
-    }
 }
 
 std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
@@ -107,14 +98,16 @@ std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrent
     if (!is_initialized)
         nextBatch();
 
-    if (current_batch_iterator != current_batch.end())
+    if (current_batch_iterator == current_batch.end())
     {
-        auto temp_current_batch = current_batch;
-        nextBatch();
-        return temp_current_batch;
+        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 2");
+        return std::nullopt;
     }
 
-    return std::nullopt;
+    auto temp_current_batch = std::move(current_batch);
+    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 1: {}", temp_current_batch.size());
+    nextBatch();
+    return temp_current_batch;
 }
 
 size_t IObjectStorageIteratorAsync::getAccumulatedSize() const
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index cc138c43c71..a9bd520e6e9 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -138,9 +138,10 @@ private:
             return outcome.GetResult().GetIsTruncated();
         }
 
-        throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
-                backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
+        throw S3Exception(outcome.GetError().GetErrorType(),
+                          "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
+                          quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
+                          backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
     }
 
     std::shared_ptr<const S3::Client> client;
@@ -263,13 +264,13 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
 }
 
 
-ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
+ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
 {
     auto settings_ptr = s3_settings.get();
-    return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
+    return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys);
 }
 
-void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
+void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
     auto settings_ptr = s3_settings.get();
 
@@ -277,7 +278,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
     request.SetBucket(uri.bucket);
     request.SetPrefix(path);
     if (max_keys)
-        request.SetMaxKeys(max_keys);
+        request.SetMaxKeys(static_cast<int>(max_keys));
     else
         request.SetMaxKeys(settings_ptr->list_object_keys_size);
 
@@ -305,10 +306,10 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
 
         if (max_keys)
         {
-            int keys_left = max_keys - static_cast<int>(children.size());
+            size_t keys_left = max_keys - children.size();
             if (keys_left <= 0)
                 break;
-            request.SetMaxKeys(keys_left);
+            request.SetMaxKeys(static_cast<int>(keys_left));
         }
 
         request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index ab0fa5bed68..a6843a383e5 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -100,9 +100,9 @@ public:
         size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
         const WriteSettings & write_settings = {}) override;
 
-    void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
+    void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
 
-    ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
+    ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
 
     /// Uses `DeleteObjectRequest`.
     void removeObject(const StoredObject & object) override;
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h
index f94024597d6..251ea3e3f15 100644
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h
+++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.h
@@ -3,7 +3,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 
 namespace DB
 {
diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h
index 2fc004595ca..72766a95876 100644
--- a/src/Storages/DataLakes/HudiMetadataParser.h
+++ b/src/Storages/DataLakes/HudiMetadataParser.h
@@ -2,7 +2,8 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Core/Types.h>
 
 namespace DB
 {
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
index 08cebb3f396..5543e60e7a7 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
@@ -25,7 +25,6 @@
 #include <IO/ReadHelpers.h>
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
 #include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
-#include <Storages/ObjectStorage/Configuration.h>
 
 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
index 92946e4192b..a289715848f 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
+++ b/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
@@ -5,7 +5,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 
 namespace DB
 {
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureConfiguration.cpp
index ba3e796223a..04f6f26111b 100644
--- a/src/Storages/ObjectStorage/AzureConfiguration.cpp
+++ b/src/Storages/ObjectStorage/AzureConfiguration.cpp
@@ -89,6 +89,17 @@ StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone()
     return configuration;
 }
 
+StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other)
+{
+    connection_url = other.connection_url;
+    is_connection_string = other.is_connection_string;
+    account_name = other.account_name;
+    account_key = other.account_key;
+    container = other.container;
+    blob_path = other.blob_path;
+    blobs_paths = other.blobs_paths;
+}
+
 AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context)
 {
     const auto & context_settings = context->getSettingsRef();
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureConfiguration.h
index 40d718d7690..4f285128241 100644
--- a/src/Storages/ObjectStorage/AzureConfiguration.h
+++ b/src/Storages/ObjectStorage/AzureConfiguration.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 
 namespace DB
 {
diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h
index f42cedf459d..aa45c634042 100644
--- a/src/Storages/ObjectStorage/HDFSConfiguration.h
+++ b/src/Storages/ObjectStorage/HDFSConfiguration.h
@@ -3,7 +3,7 @@
 
 #if USE_HDFS
 
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <Storages/HDFS/HDFSCommon.h>
 #include <Interpreters/Context.h>
 #include <Storages/checkAndGetLiteralArgument.h>
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
new file mode 100644
index 00000000000..dcdf36dbcf5
--- /dev/null
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -0,0 +1,179 @@
+#include <Storages/ObjectStorage/ReadBufferIterator.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <IO/ReadBufferFromFileBase.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+
+}
+
+ReadBufferIterator::ReadBufferIterator(
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    const FileIterator & file_iterator_,
+    const std::optional<FormatSettings> & format_settings_,
+    const StorageObjectStorageSettings & query_settings_,
+    SchemaCache & schema_cache_,
+    ObjectInfos & read_keys_,
+    const ContextPtr & context_)
+    : WithContext(context_)
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+    , file_iterator(file_iterator_)
+    , format_settings(format_settings_)
+    , query_settings(query_settings_)
+    , schema_cache(schema_cache_)
+    , read_keys(read_keys_)
+    , prev_read_keys_size(read_keys_.size())
+{
+}
+
+SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path) const
+{
+    auto source = fs::path(configuration->getDataSourceDescription()) / path;
+    return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
+}
+
+SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const
+{
+    Strings sources;
+    sources.reserve(read_keys.size());
+    std::transform(
+        read_keys.begin(), read_keys.end(),
+        std::back_inserter(sources),
+        [&](const auto & elem)
+        {
+            return fs::path(configuration->getDataSourceDescription()) / elem->relative_path;
+        });
+    return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext());
+}
+
+std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
+    const ObjectInfos::iterator & begin,
+    const ObjectInfos::iterator & end)
+{
+    if (!query_settings.schema_inference_use_cache)
+        return std::nullopt;
+
+    for (auto it = begin; it < end; ++it)
+    {
+        const auto & object_info = (*it);
+        auto get_last_mod_time = [&] -> std::optional<time_t>
+        {
+            if (object_info->metadata)
+                return object_info->metadata->last_modified->epochMicroseconds();
+            else
+            {
+                object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+                return object_info->metadata->last_modified->epochMicroseconds();
+            }
+        };
+
+        auto cache_key = getKeyForSchemaCache(object_info->relative_path);
+        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
+        if (columns)
+            return columns;
+    }
+
+    return std::nullopt;
+}
+
+void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows)
+{
+    if (query_settings.schema_inference_use_cache)
+        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path), num_rows);
+}
+
+void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
+{
+    if (query_settings.schema_inference_use_cache
+        && query_settings.schema_inference_mode == SchemaInferenceMode::UNION)
+    {
+        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path), columns);
+    }
+}
+
+void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns)
+{
+    if (query_settings.schema_inference_use_cache
+        && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
+    {
+        schema_cache.addManyColumns(getPathsForSchemaCache(), columns);
+    }
+}
+
+String ReadBufferIterator::getLastFileName() const
+{
+    if (current_object_info)
+        return current_object_info->relative_path;
+    else
+        return "";
+}
+
+std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> ReadBufferIterator::next()
+{
+    /// For default mode check cached columns for currently read keys on first iteration.
+    if (first && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
+    {
+        if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+            return {nullptr, cached_columns};
+    }
+
+    current_object_info = file_iterator->next(0);
+    if (!current_object_info || current_object_info->relative_path.empty())
+    {
+        if (first)
+        {
+            throw Exception(
+                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                "Cannot extract table structure from {} format file, "
+                "because there are no files with provided path. "
+                "You must specify table structure manually",
+                configuration->format);
+        }
+        return {nullptr, std::nullopt};
+    }
+
+    first = false;
+
+    /// File iterator could get new keys after new iteration,
+    /// check them in schema cache if schema inference mode is default.
+    if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT
+        && read_keys.size() > prev_read_keys_size)
+    {
+        auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+        prev_read_keys_size = read_keys.size();
+        if (columns_from_cache)
+            return {nullptr, columns_from_cache};
+    }
+    else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+    {
+        ObjectInfos paths = {current_object_info};
+        if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
+            return {nullptr, columns_from_cache};
+    }
+
+    first = false;
+
+    chassert(current_object_info->metadata);
+    std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
+        StoredObject(current_object_info->relative_path),
+        getContext()->getReadSettings(),
+        {},
+        current_object_info->metadata->size_bytes);
+
+    read_buffer = wrapReadBufferWithCompressionMethod(
+        std::move(read_buffer),
+        chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
+        static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
+
+    return {std::move(read_buffer), std::nullopt};
+}
+
+}
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 248700e2edf..4e9b8cfcfca 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -1,197 +1,54 @@
 #pragma once
 #include <Interpreters/Context_fwd.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/Settings.h>
-#include <IO/ReadBufferFromFileBase.h>
 #include <Formats/ReadSchemaUtils.h>
 
 
 namespace DB
 {
-namespace ErrorCodes
-{
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 
-}
-
-template <typename StorageSettings>
 class ReadBufferIterator : public IReadBufferIterator, WithContext
 {
 public:
-    using Storage = StorageObjectStorage<StorageSettings>;
-    using Source = StorageObjectStorageSource<StorageSettings>;
-    using FileIterator = std::shared_ptr<typename Source::IIterator>;
-    using ObjectInfos = typename Storage::ObjectInfos;
+    using FileIterator = std::shared_ptr<StorageObjectStorageSource::IIterator>;
 
     ReadBufferIterator(
         ObjectStoragePtr object_storage_,
-        Storage::ConfigurationPtr configuration_,
+        ConfigurationPtr configuration_,
         const FileIterator & file_iterator_,
         const std::optional<FormatSettings> & format_settings_,
+        const StorageObjectStorageSettings & query_settings_,
+        SchemaCache & schema_cache_,
         ObjectInfos & read_keys_,
-        const ContextPtr & context_)
-        : WithContext(context_)
-        , object_storage(object_storage_)
-        , configuration(configuration_)
-        , file_iterator(file_iterator_)
-        , format_settings(format_settings_)
-        , storage_settings(StorageSettings::create(context_->getSettingsRef()))
-        , read_keys(read_keys_)
-        , prev_read_keys_size(read_keys_.size())
-    {
-    }
+        const ContextPtr & context_);
 
-    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override
-    {
-        /// For default mode check cached columns for currently read keys on first iteration.
-        if (first && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
-        {
-            if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-                return {nullptr, cached_columns};
-        }
+    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override;
 
-        current_object_info = file_iterator->next(0);
-        if (current_object_info->relative_path.empty())
-        {
-            if (first)
-            {
-                throw Exception(
-                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                    "Cannot extract table structure from {} format file, "
-                    "because there are no files with provided path. "
-                    "You must specify table structure manually",
-                    configuration->format);
-            }
-            return {nullptr, std::nullopt};
-        }
+    void setNumRowsToLastFile(size_t num_rows) override;
 
-        first = false;
+    void setSchemaToLastFile(const ColumnsDescription & columns) override;
 
-        /// File iterator could get new keys after new iteration,
-        /// check them in schema cache if schema inference mode is default.
-        if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT
-            && read_keys.size() > prev_read_keys_size)
-        {
-            auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
-            prev_read_keys_size = read_keys.size();
-            if (columns_from_cache)
-                return {nullptr, columns_from_cache};
-        }
-        else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-        {
-            ObjectInfos paths = {current_object_info};
-            if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
-                return {nullptr, columns_from_cache};
-        }
+    void setResultingSchema(const ColumnsDescription & columns) override;
 
-        first = false;
-
-        std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
-            StoredObject(current_object_info->relative_path),
-            getContext()->getReadSettings(),
-            {},
-            current_object_info->metadata.size_bytes);
-
-        read_buffer = wrapReadBufferWithCompressionMethod(
-            std::move(read_buffer),
-            chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
-            static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
-
-        return {std::move(read_buffer), std::nullopt};
-    }
-
-    void setNumRowsToLastFile(size_t num_rows) override
-    {
-        if (storage_settings.schema_inference_use_cache)
-        {
-            Storage::getSchemaCache(getContext()).addNumRows(
-                getKeyForSchemaCache(current_object_info->relative_path), num_rows);
-        }
-    }
-
-    void setSchemaToLastFile(const ColumnsDescription & columns) override
-    {
-        if (storage_settings.schema_inference_use_cache
-            && storage_settings.schema_inference_mode == SchemaInferenceMode::UNION)
-        {
-            Storage::getSchemaCache(getContext()).addColumns(
-                getKeyForSchemaCache(current_object_info->relative_path), columns);
-        }
-    }
-
-    void setResultingSchema(const ColumnsDescription & columns) override
-    {
-        if (storage_settings.schema_inference_use_cache
-            && storage_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
-        {
-            Storage::getSchemaCache(getContext()).addManyColumns(getPathsForSchemaCache(), columns);
-        }
-    }
-
-    String getLastFileName() const override { return current_object_info->relative_path; }
+    String getLastFileName() const override;
 
 private:
-    SchemaCache::Key getKeyForSchemaCache(const String & path) const
-    {
-        auto source = fs::path(configuration->getDataSourceDescription()) / path;
-        return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
-    }
-
-    SchemaCache::Keys getPathsForSchemaCache() const
-    {
-        Strings sources;
-        sources.reserve(read_keys.size());
-        std::transform(
-            read_keys.begin(), read_keys.end(),
-            std::back_inserter(sources),
-            [&](const auto & elem)
-            {
-                return fs::path(configuration->getDataSourceDescription()) / elem->relative_path;
-            });
-        return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext());
-    }
-
+    SchemaCache::Key getKeyForSchemaCache(const String & path) const;
+    SchemaCache::Keys getPathsForSchemaCache() const;
     std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const ObjectInfos::iterator & begin,
-        const ObjectInfos::iterator & end)
-    {
-        if (!storage_settings.schema_inference_use_cache)
-            return std::nullopt;
-
-        auto & schema_cache = Storage::getSchemaCache(getContext());
-        for (auto it = begin; it < end; ++it)
-        {
-            const auto & object_info = (*it);
-            auto get_last_mod_time = [&] -> std::optional<time_t>
-            {
-                if (object_info->metadata.last_modified)
-                    return object_info->metadata.last_modified->epochMicroseconds();
-                else
-                {
-                    object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-                    return object_info->metadata.last_modified->epochMicroseconds();
-                }
-            };
-
-            auto cache_key = getKeyForSchemaCache(object_info->relative_path);
-            auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-            if (columns)
-                return columns;
-        }
-
-        return std::nullopt;
-    }
+        const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end);
 
     ObjectStoragePtr object_storage;
-    const Storage::ConfigurationPtr configuration;
+    const ConfigurationPtr configuration;
     const FileIterator file_iterator;
     const std::optional<FormatSettings> & format_settings;
-    const StorageObjectStorageSettings storage_settings;
+    const StorageObjectStorageSettings query_settings;
+    SchemaCache & schema_cache;
     ObjectInfos & read_keys;
 
     size_t prev_read_keys_size;
-    Storage::ObjectInfoPtr current_object_info;
+    ObjectInfoPtr current_object_info;
     bool first = true;
 };
 }
diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorage.h
deleted file mode 100644
index 9cb77dcc25e..00000000000
--- a/src/Storages/ObjectStorage/ReadFromObjectStorage.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Processors/Sources/NullSource.h>
-
-namespace DB
-{
-
-template <typename StorageSettings>
-class ReadFromStorageObejctStorage : public SourceStepWithFilter
-{
-public:
-    using Storage = StorageObjectStorage<StorageSettings>;
-    using Source = StorageObjectStorageSource<StorageSettings>;
-
-    ReadFromStorageObejctStorage(
-        ObjectStoragePtr object_storage_,
-        Storage::ConfigurationPtr configuration_,
-        const String & name_,
-        const NamesAndTypesList & virtual_columns_,
-        const std::optional<DB::FormatSettings> & format_settings_,
-        bool distributed_processing_,
-        ReadFromFormatInfo info_,
-        const bool need_only_count_,
-        ContextPtr context_,
-        size_t max_block_size_,
-        size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = info_.source_header})
-        , object_storage(object_storage_)
-        , configuration(configuration_)
-        , context(std::move(context_))
-        , info(std::move(info_))
-        , virtual_columns(virtual_columns_)
-        , format_settings(format_settings_)
-        , name(name_ + "Source")
-        , need_only_count(need_only_count_)
-        , max_block_size(max_block_size_)
-        , num_streams(num_streams_)
-        , distributed_processing(distributed_processing_)
-    {
-    }
-
-    std::string getName() const override { return name; }
-
-    void applyFilters() override
-    {
-        auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
-        const ActionsDAG::Node * predicate = nullptr;
-        if (filter_actions_dag)
-            predicate = filter_actions_dag->getOutputs().at(0);
-
-        createIterator(predicate);
-    }
-
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
-    {
-        createIterator(nullptr);
-
-        Pipes pipes;
-        for (size_t i = 0; i < num_streams; ++i)
-        {
-            pipes.emplace_back(std::make_shared<Source>(
-                getName(), object_storage, configuration, info, format_settings,
-                context, max_block_size, iterator_wrapper, need_only_count));
-        }
-
-        auto pipe = Pipe::unitePipes(std::move(pipes));
-        if (pipe.empty())
-            pipe = Pipe(std::make_shared<NullSource>(info.source_header));
-
-        for (const auto & processor : pipe.getProcessors())
-            processors.emplace_back(processor);
-
-        pipeline.init(std::move(pipe));
-    }
-
-private:
-    ObjectStoragePtr object_storage;
-    Storage::ConfigurationPtr configuration;
-    ContextPtr context;
-
-    const ReadFromFormatInfo info;
-    const NamesAndTypesList virtual_columns;
-    const std::optional<DB::FormatSettings> format_settings;
-    const String name;
-    const bool need_only_count;
-    const size_t max_block_size;
-    const size_t num_streams;
-    const bool distributed_processing;
-
-    std::shared_ptr<typename Source::IIterator> iterator_wrapper;
-
-    void createIterator(const ActionsDAG::Node * predicate)
-    {
-        if (!iterator_wrapper)
-        {
-            iterator_wrapper = Source::createFileIterator(
-                configuration, object_storage, distributed_processing, context,
-                predicate, virtual_columns, nullptr, context->getFileProgressCallback());
-        }
-    }
-};
-
-}
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
new file mode 100644
index 00000000000..2c27c816078
--- /dev/null
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
@@ -0,0 +1,94 @@
+#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
+#include <Processors/Sources/NullSource.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+
+namespace DB
+{
+
+ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    const String & name_,
+    const NamesAndTypesList & virtual_columns_,
+    const std::optional<DB::FormatSettings> & format_settings_,
+    const StorageObjectStorageSettings & query_settings_,
+    bool distributed_processing_,
+    ReadFromFormatInfo info_,
+    SchemaCache & schema_cache_,
+    const bool need_only_count_,
+    ContextPtr context_,
+    size_t max_block_size_,
+    size_t num_streams_,
+    CurrentMetrics::Metric metric_threads_count_,
+    CurrentMetrics::Metric metric_threads_active_,
+    CurrentMetrics::Metric metric_threads_scheduled_)
+    : SourceStepWithFilter(DataStream{.header = info_.source_header})
+    , WithContext(context_)
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+    , info(std::move(info_))
+    , virtual_columns(virtual_columns_)
+    , format_settings(format_settings_)
+    , query_settings(query_settings_)
+    , schema_cache(schema_cache_)
+    , name(name_ + "Source")
+    , need_only_count(need_only_count_)
+    , max_block_size(max_block_size_)
+    , num_streams(num_streams_)
+    , distributed_processing(distributed_processing_)
+    , metric_threads_count(metric_threads_count_)
+    , metric_threads_active(metric_threads_active_)
+    , metric_threads_scheduled(metric_threads_scheduled_)
+{
+}
+
+void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predicate)
+{
+    if (!iterator_wrapper)
+    {
+        auto context = getContext();
+        iterator_wrapper = StorageObjectStorageSource::createFileIterator(
+            configuration, object_storage, distributed_processing, context, predicate,
+            virtual_columns, nullptr, query_settings.list_object_keys_size, context->getFileProgressCallback());
+    }
+}
+
+void ReadFromStorageObejctStorage::applyFilters()
+{
+    auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG(filter_nodes.nodes);
+    const ActionsDAG::Node * predicate = nullptr;
+    if (filter_actions_dag)
+        predicate = filter_actions_dag->getOutputs().at(0);
+
+    createIterator(predicate);
+}
+
+void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+{
+    createIterator(nullptr);
+    auto context = getContext();
+
+    Pipes pipes;
+    for (size_t i = 0; i < num_streams; ++i)
+    {
+        auto threadpool = std::make_shared<ThreadPool>(
+            metric_threads_count, metric_threads_active, metric_threads_scheduled, /* max_threads */1);
+
+        auto source = std::make_shared<StorageObjectStorageSource>(
+            getName(), object_storage, configuration, info, format_settings, query_settings,
+            context, max_block_size, iterator_wrapper, need_only_count, schema_cache, std::move(threadpool));
+
+        pipes.emplace_back(std::move(source));
+    }
+
+    auto pipe = Pipe::unitePipes(std::move(pipes));
+    if (pipe.empty())
+        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+    for (const auto & processor : pipe.getProcessors())
+        processors.emplace_back(processor);
+
+    pipeline.init(std::move(pipe));
+}
+
+}
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
new file mode 100644
index 00000000000..f5e057d297f
--- /dev/null
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
@@ -0,0 +1,60 @@
+#pragma once
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
+
+namespace DB
+{
+
+class ReadFromStorageObejctStorage : public SourceStepWithFilter, WithContext
+{
+public:
+    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+
+    ReadFromStorageObejctStorage(
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr configuration_,
+        const String & name_,
+        const NamesAndTypesList & virtual_columns_,
+        const std::optional<DB::FormatSettings> & format_settings_,
+        const StorageObjectStorageSettings & query_settings_,
+        bool distributed_processing_,
+        ReadFromFormatInfo info_,
+        SchemaCache & schema_cache_,
+        bool need_only_count_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_,
+        CurrentMetrics::Metric metric_threads_count_,
+        CurrentMetrics::Metric metric_threads_active_,
+        CurrentMetrics::Metric metric_threads_scheduled_);
+
+    std::string getName() const override { return name; }
+
+    void applyFilters() override;
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
+
+private:
+    ObjectStoragePtr object_storage;
+    ConfigurationPtr configuration;
+    std::shared_ptr<StorageObjectStorageSource::IIterator> iterator_wrapper;
+
+    const ReadFromFormatInfo info;
+    const NamesAndTypesList virtual_columns;
+    const std::optional<DB::FormatSettings> format_settings;
+    const StorageObjectStorageSettings query_settings;
+    SchemaCache & schema_cache;
+    const String name;
+    const bool need_only_count;
+    const size_t max_block_size;
+    const size_t num_streams;
+    const bool distributed_processing;
+    const CurrentMetrics::Metric metric_threads_count;
+    const CurrentMetrics::Metric metric_threads_active;
+    const CurrentMetrics::Metric metric_threads_scheduled;
+
+    void createIterator(const ActionsDAG::Node * predicate);
+};
+
+}
diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3Configuration.h
index 34f5735e02a..c953bc25c4e 100644
--- a/src/Storages/ObjectStorage/S3Configuration.h
+++ b/src/Storages/ObjectStorage/S3Configuration.h
@@ -1,7 +1,7 @@
 #pragma once
 #include <IO/S3/getObjectInfo.h>
 #include <Storages/StorageS3Settings.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 
 namespace DB
 {
diff --git a/src/Storages/ObjectStorage/Configuration.h b/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h
similarity index 73%
rename from src/Storages/ObjectStorage/Configuration.h
rename to src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h
index 708041980e3..427d6a8d453 100644
--- a/src/Storages/ObjectStorage/Configuration.h
+++ b/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h
@@ -17,6 +17,12 @@ public:
     using Path = std::string;
     using Paths = std::vector<Path>;
 
+    static void initialize(
+        StorageObjectStorageConfiguration & configuration,
+        ASTs & engine_args,
+        ContextPtr local_context,
+        bool with_table_structure);
+
     virtual Path getPath() const = 0;
     virtual void setPath(const Path & path) = 0;
 
@@ -26,28 +32,24 @@ public:
     virtual String getDataSourceDescription() = 0;
     virtual String getNamespace() const = 0;
 
-    bool isPathWithGlobs() const { return getPath().find_first_of("*?{") != std::string::npos; }
-    bool isNamespaceWithGlobs() const { return getNamespace().find_first_of("*?{") != std::string::npos; }
-
-    std::string getPathWithoutGlob() const { return getPath().substr(0, getPath().find_first_of("*?{")); }
-
-    virtual bool withWildcard() const
-    {
-        static const String PARTITION_ID_WILDCARD = "{_partition_id}";
-        return getPath().find(PARTITION_ID_WILDCARD) != String::npos;
-    }
+    bool withWildcard() const;
+    bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
+    bool isPathWithGlobs() const;
+    bool isNamespaceWithGlobs() const;
+    std::string getPathWithoutGlob() const;
 
     virtual void check(ContextPtr context) const = 0;
     virtual StorageObjectStorageConfigurationPtr clone() = 0;
 
     virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
 
-    virtual void fromNamedCollection(const NamedCollection & collection) = 0;
-    virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
-
     String format = "auto";
     String compression_method = "auto";
     String structure = "auto";
+
+protected:
+    virtual void fromNamedCollection(const NamedCollection & collection) = 0;
+    virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
 };
 
 using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 9250ab8ecbe..9a7260ea47c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -9,12 +9,12 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Storages/ObjectStorage/Configuration.h>
-#include <Storages/ObjectStorage/Settings.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
-#include <Storages/ObjectStorage/ReadFromObjectStorage.h>
+#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
 
 
 namespace DB
@@ -154,34 +154,38 @@ void StorageObjectStorage<StorageSettings>::read(
     size_t max_block_size,
     size_t num_streams)
 {
-    if (partition_by && configuration->withWildcard())
+    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
+    if (partition_by && query_configuration->withWildcard())
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "Reading from a partitioned {} storage is not implemented yet",
                         getName());
     }
 
-    auto this_ptr = std::static_pointer_cast<StorageObjectStorage>(shared_from_this());
-    auto read_from_format_info = prepareReadingFromFormat(
+    const auto read_from_format_info = prepareReadingFromFormat(
         column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
+    const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
-    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
-    auto reading = std::make_unique<ReadFromStorageObejctStorage<StorageSettings>>(
+    auto read_step = std::make_unique<ReadFromStorageObejctStorage>(
         query_object_storage,
         query_configuration,
         getName(),
         virtual_columns,
         format_settings,
+        StorageSettings::create(local_context->getSettingsRef()),
         distributed_processing,
         std::move(read_from_format_info),
+        getSchemaCache(local_context),
         need_only_count,
         local_context,
         max_block_size,
-        num_streams);
+        num_streams,
+        StorageSettings::ObjectStorageThreads(),
+        StorageSettings::ObjectStorageThreadsActive(),
+        StorageSettings::ObjectStorageThreadsScheduled());
 
-    query_plan.addStep(std::move(reading));
+    query_plan.addStep(std::move(read_step));
 }
 
 template <typename StorageSettings>
@@ -191,35 +195,43 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
     ContextPtr local_context,
     bool /* async_insert */)
 {
-    auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query);
-    auto partition_by_ast = insert_query
-        ? (insert_query->partition_by ? insert_query->partition_by : partition_by)
-        : nullptr;
-    bool is_partitioned_implementation = partition_by_ast && configuration->withWildcard();
+    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
+    const auto sample_block = metadata_snapshot->getSampleBlock();
 
-    auto sample_block = metadata_snapshot->getSampleBlock();
-    auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
-
-    if (is_partitioned_implementation)
+    if (query_configuration->withWildcard())
     {
-        return std::make_shared<PartitionedStorageObjectStorageSink>(
-            object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
+        ASTPtr partition_by_ast = nullptr;
+        if (auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query))
+        {
+            if (insert_query->partition_by)
+                partition_by_ast = insert_query->partition_by;
+            else
+                partition_by_ast = partition_by;
+        }
+
+        if (partition_by_ast)
+        {
+            return std::make_shared<PartitionedStorageObjectStorageSink>(
+                object_storage, query_configuration, format_settings, sample_block, local_context, partition_by_ast);
+        }
     }
 
-    if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs())
+    if (query_configuration->withGlobs())
     {
         throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
                         "{} key '{}' contains globs, so the table is in readonly mode",
-                        getName(), configuration->getPath());
+                        getName(), query_configuration->getPath());
     }
 
+    const auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
     if (!storage_settings.truncate_on_insert
-        && object_storage->exists(StoredObject(configuration->getPath())))
+        && object_storage->exists(StoredObject(query_configuration->getPath())))
     {
         if (storage_settings.create_new_file_on_insert)
         {
-            size_t index = configuration->getPaths().size();
-            const auto & first_key = configuration->getPaths()[0];
+            auto & paths = query_configuration->getPaths();
+            size_t index = paths.size();
+            const auto & first_key = paths[0];
             auto pos = first_key.find_first_of('.');
             String new_key;
 
@@ -233,7 +245,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
             }
             while (object_storage->exists(StoredObject(new_key)));
 
-            configuration->getPaths().push_back(new_key);
+            paths.push_back(new_key);
         }
         else
         {
@@ -242,12 +254,12 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
                 "Object in bucket {} with key {} already exists. "
                 "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you "
                 "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert",
-                configuration->getNamespace(), configuration->getPaths().back());
+                query_configuration->getNamespace(), query_configuration->getPaths().back());
         }
     }
 
     return std::make_shared<StorageObjectStorageSink>(
-        object_storage, configuration, format_settings, sample_block, local_context);
+        object_storage, query_configuration, format_settings, sample_block, local_context);
 }
 
 template <typename StorageSettings>
@@ -257,7 +269,7 @@ void StorageObjectStorage<StorageSettings>::truncate(
     ContextPtr,
     TableExclusiveLockHolder &)
 {
-    if (configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs())
+    if (configuration->withGlobs())
     {
         throw Exception(
             ErrorCodes::DATABASE_ACCESS_DENIED,
@@ -279,21 +291,18 @@ ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromD
     const std::optional<FormatSettings> & format_settings,
     ContextPtr context)
 {
-    using Source = StorageObjectStorageSource<StorageSettings>;
-
     ObjectInfos read_keys;
-    auto file_iterator = Source::createFileIterator(
+    const auto settings = StorageSettings::create(context->getSettingsRef());
+    auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration, object_storage, /* distributed_processing */false,
-        context, /* predicate */{}, /* virtual_columns */{}, &read_keys);
+        context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size);
 
-    ReadBufferIterator<StorageSettings> read_buffer_iterator(
+    ReadBufferIterator read_buffer_iterator(
         object_storage, configuration, file_iterator,
-        format_settings, read_keys, context);
+        format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context);
 
-    const bool retry = configuration->isPathWithGlobs() || configuration->isNamespaceWithGlobs();
-    return readSchemaFromFormat(
-        configuration->format, format_settings,
-        read_buffer_iterator, retry, context);
+    const bool retry = configuration->withGlobs();
+    return readSchemaFromFormat(configuration->format, format_settings, read_buffer_iterator, retry, context);
 }
 
 template class StorageObjectStorage<S3StorageSettings>;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 414932016f4..39cd5d8eca6 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -11,8 +11,8 @@
 #include <Storages/StorageDictionary.h>
 #include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Storages/ObjectStorage/Configuration.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <Common/Exception.h>
 #include <Parsers/queryToString.h>
 
@@ -82,10 +82,11 @@ void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::ad
 
 template <typename Definition, typename StorageSettings, typename Configuration>
 RemoteQueryExecutor::Extension
-StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr &) const
+StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
 {
-    auto iterator = std::make_shared<typename Source::GlobIterator>(
-        object_storage, configuration, predicate, virtual_columns, nullptr);
+    const auto settings = StorageSettings::create(local_context->getSettingsRef());
+    auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
+        object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size);
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; });
     return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index b1f9af14e03..aae8f704a73 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -21,7 +21,6 @@ class StorageObjectStorageCluster : public IStorageCluster
 {
 public:
     using Storage = StorageObjectStorage<StorageSettings>;
-    using Source = StorageObjectStorageSource<StorageSettings>;
 
     StorageObjectStorageCluster(
         const String & cluster_name_,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
new file mode 100644
index 00000000000..2d5760ed9d8
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -0,0 +1,40 @@
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+
+
+namespace DB
+{
+
+void StorageObjectStorageConfiguration::initialize(
+    StorageObjectStorageConfiguration & configuration,
+    ASTs & engine_args,
+    ContextPtr local_context,
+    bool with_table_structure)
+{
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+        configuration.fromNamedCollection(*named_collection);
+    else
+        configuration.fromAST(engine_args, local_context, with_table_structure);
+}
+
+bool StorageObjectStorageConfiguration::withWildcard() const
+{
+    static const String PARTITION_ID_WILDCARD = "{_partition_id}";
+    return getPath().find(PARTITION_ID_WILDCARD) != String::npos;
+}
+
+bool StorageObjectStorageConfiguration::isPathWithGlobs() const
+{
+    return getPath().find_first_of("*?{") != std::string::npos;
+}
+
+bool StorageObjectStorageConfiguration::isNamespaceWithGlobs() const
+{
+    return getNamespace().find_first_of("*?{") != std::string::npos;
+}
+
+std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const
+{
+    return getPath().substr(0, getPath().find_first_of("*?{"));
+}
+
+}
diff --git a/src/Storages/ObjectStorage/Settings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
similarity index 86%
rename from src/Storages/ObjectStorage/Settings.h
rename to src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
index 015cf9bc01d..454da7c355f 100644
--- a/src/Storages/ObjectStorage/Settings.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
@@ -23,6 +23,8 @@ struct StorageObjectStorageSettings
     bool create_new_file_on_insert;
     bool schema_inference_use_cache;
     SchemaInferenceMode schema_inference_mode;
+    bool skip_empty_files;
+    size_t list_object_keys_size;
 };
 
 struct S3StorageSettings
@@ -34,6 +36,8 @@ struct S3StorageSettings
             .create_new_file_on_insert = settings.s3_create_new_file_on_insert,
             .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
             .schema_inference_mode = settings.schema_inference_mode,
+            .skip_empty_files = settings.s3_skip_empty_files,
+            .list_object_keys_size = settings.s3_list_object_keys_size,
         };
     }
 
@@ -53,6 +57,8 @@ struct AzureStorageSettings
             .create_new_file_on_insert = settings.azure_create_new_file_on_insert,
             .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
             .schema_inference_mode = settings.schema_inference_mode,
+            .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
+            .list_object_keys_size = settings.azure_list_object_keys_size,
         };
     }
 
@@ -72,6 +78,8 @@ struct HDFSStorageSettings
             .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
             .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
             .schema_inference_mode = settings.schema_inference_mode,
+            .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs
+            .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
         };
     }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index 34ab8ebec66..a2d42d7fa9f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -1,6 +1,6 @@
 #pragma once
 #include <Storages/PartitionedSink.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <Formats/FormatFactory.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 9fc7925a6d1..f170a46112f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -9,8 +9,8 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
-#include <Storages/ObjectStorage/Configuration.h>
-#include <Storages/ObjectStorage/Settings.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/parseGlobs.h>
 
@@ -28,20 +28,55 @@ namespace ErrorCodes
     extern const int CANNOT_COMPILE_REGEXP;
 }
 
-template <typename StorageSettings>
-std::shared_ptr<typename StorageObjectStorageSource<StorageSettings>::IIterator>
-StorageObjectStorageSource<StorageSettings>::createFileIterator(
-    Storage::ConfigurationPtr configuration,
+StorageObjectStorageSource::StorageObjectStorageSource(
+    String name_,
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    const ReadFromFormatInfo & info,
+    std::optional<FormatSettings> format_settings_,
+    const StorageObjectStorageSettings & query_settings_,
+    ContextPtr context_,
+    UInt64 max_block_size_,
+    std::shared_ptr<IIterator> file_iterator_,
+    bool need_only_count_,
+    SchemaCache & schema_cache_,
+    std::shared_ptr<ThreadPool> reader_pool_)
+    : SourceWithKeyCondition(info.source_header, false)
+    , WithContext(context_)
+    , name(std::move(name_))
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+    , format_settings(format_settings_)
+    , query_settings(query_settings_)
+    , max_block_size(max_block_size_)
+    , need_only_count(need_only_count_)
+    , read_from_format_info(info)
+    , create_reader_pool(reader_pool_)
+    , columns_desc(info.columns_description)
+    , file_iterator(file_iterator_)
+    , schema_cache(schema_cache_)
+    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(*create_reader_pool, "Reader"))
+{
+}
+
+StorageObjectStorageSource::~StorageObjectStorageSource()
+{
+    create_reader_pool->wait();
+}
+
+std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
+    ConfigurationPtr configuration,
     ObjectStoragePtr object_storage,
     bool distributed_processing,
     const ContextPtr & local_context,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
+    size_t list_object_keys_size,
     std::function<void(FileProgress)> file_progress_callback)
 {
     if (distributed_processing)
-        return std::make_shared<typename Source::ReadTaskIterator>(local_context->getReadTaskCallback());
+        return std::make_shared<ReadTaskIterator>(local_context->getReadTaskCallback());
 
     if (configuration->isNamespaceWithGlobs())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
@@ -49,25 +84,240 @@ StorageObjectStorageSource<StorageSettings>::createFileIterator(
     if (configuration->isPathWithGlobs())
     {
         /// Iterate through disclosed globs and make a source for each file
-        return std::make_shared<typename Source::GlobIterator>(
-            object_storage, configuration, predicate, virtual_columns, read_keys, file_progress_callback);
+        return std::make_shared<GlobIterator>(
+            object_storage, configuration, predicate, virtual_columns, local_context, read_keys, list_object_keys_size, file_progress_callback);
     }
     else
     {
-        return std::make_shared<typename Source::KeysIterator>(
+        return std::make_shared<KeysIterator>(
             object_storage, configuration, virtual_columns, read_keys, file_progress_callback);
     }
 }
 
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::GlobIterator::GlobIterator(
+void StorageObjectStorageSource::lazyInitialize(size_t processor)
+{
+    if (initialized)
+        return;
+
+    reader = createReader(processor);
+    if (reader)
+        reader_future = createReaderAsync(processor);
+    initialized = true;
+}
+
+Chunk StorageObjectStorageSource::generate()
+{
+    lazyInitialize(0);
+
+    while (true)
+    {
+        if (isCancelled() || !reader)
+        {
+            if (reader)
+                reader->cancel();
+            break;
+        }
+
+        Chunk chunk;
+        if (reader->pull(chunk))
+        {
+            UInt64 num_rows = chunk.getNumRows();
+            total_rows_in_file += num_rows;
+
+            size_t chunk_size = 0;
+            if (const auto * input_format = reader.getInputFormat())
+                chunk_size = input_format->getApproxBytesReadForChunk();
+
+            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
+
+            const auto & object_info = reader.getObjectInfo();
+            chassert(object_info.metadata);
+            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
+                chunk,
+                read_from_format_info.requested_virtual_columns,
+                fs::path(configuration->getNamespace()) / reader.getRelativePath(),
+                object_info.metadata->size_bytes);
+
+            return chunk;
+        }
+
+        if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
+            addNumRowsToCache(reader.getRelativePath(), total_rows_in_file);
+
+        total_rows_in_file = 0;
+
+        assert(reader_future.valid());
+        reader = reader_future.get();
+
+        if (!reader)
+            break;
+
+        /// Even if task is finished the thread may be not freed in pool.
+        /// So wait until it will be freed before scheduling a new task.
+        create_reader_pool->wait();
+        reader_future = createReaderAsync();
+    }
+
+    return {};
+}
+
+void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows)
+{
+    const auto cache_key = getKeyForSchemaCache(
+        fs::path(configuration->getDataSourceDescription()) / path,
+        configuration->format,
+        format_settings,
+        getContext());
+
+    schema_cache.addNumRows(cache_key, num_rows);
+}
+
+std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info)
+{
+    const auto cache_key = getKeyForSchemaCache(
+        fs::path(configuration->getDataSourceDescription()) / object_info->relative_path,
+        configuration->format,
+        format_settings,
+        getContext());
+
+    auto get_last_mod_time = [&]() -> std::optional<time_t>
+    {
+        return object_info->metadata && object_info->metadata->last_modified
+            ? object_info->metadata->last_modified->epochMicroseconds()
+            : 0;
+    };
+    return schema_cache.tryGetNumRows(cache_key, get_last_mod_time);
+}
+
+StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor)
+{
+    ObjectInfoPtr object_info;
+    do
+    {
+        object_info = file_iterator->next(processor);
+        if (!object_info || object_info->relative_path.empty())
+            return {};
+
+        if (!object_info->metadata)
+            object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+    }
+    while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0);
+
+    QueryPipelineBuilder builder;
+    std::shared_ptr<ISource> source;
+    std::unique_ptr<ReadBuffer> read_buf;
+
+    std::optional<size_t> num_rows_from_cache = need_only_count
+        && getContext()->getSettingsRef().use_cache_for_count_from_files
+        ? tryGetNumRowsFromCache(object_info)
+        : std::nullopt;
+
+    if (num_rows_from_cache)
+    {
+        /// We should not return single chunk with all number of rows,
+        /// because there is a chance that this chunk will be materialized later
+        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
+        /// Instead, we use special ConstChunkGenerator that will generate chunks
+        /// with max_block_size rows until total number of rows is reached.
+        builder.init(Pipe(std::make_shared<ConstChunkGenerator>(
+                              read_from_format_info.format_header, *num_rows_from_cache, max_block_size)));
+    }
+    else
+    {
+        const auto compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method);
+        const auto max_parsing_threads = need_only_count ? std::optional<size_t>(1) : std::nullopt;
+        read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes);
+
+        auto input_format = FormatFactory::instance().getInput(
+            configuration->format, *read_buf, read_from_format_info.format_header,
+            getContext(), max_block_size, format_settings, max_parsing_threads,
+            std::nullopt, /* is_remote_fs */ true, compression_method);
+
+        if (key_condition)
+            input_format->setKeyCondition(key_condition);
+
+        if (need_only_count)
+            input_format->needOnlyCount();
+
+        builder.init(Pipe(input_format));
+
+        if (columns_desc.hasDefaults())
+        {
+            builder.addSimpleTransform(
+                [&](const Block & header)
+                {
+                    return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
+                });
+        }
+
+        source = input_format;
+    }
+
+    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
+    /// from chunk read by IInputFormat.
+    builder.addSimpleTransform([&](const Block & header)
+    {
+        return std::make_shared<ExtractColumnsTransform>(header, read_from_format_info.requested_columns);
+    });
+
+    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
+    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
+
+    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
+
+    return ReaderHolder(
+        object_info, std::move(read_buf), std::move(source), std::move(pipeline), std::move(current_reader));
+}
+
+std::future<StorageObjectStorageSource::ReaderHolder> StorageObjectStorageSource::createReaderAsync(size_t processor)
+{
+    return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{});
+}
+
+std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size)
+{
+    auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
+    read_settings.enable_filesystem_cache = false;
+    read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size;
+
+    const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size;
+    const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool;
+    read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read;
+
+    // Create a read buffer that will prefetch the first ~1 MB of the file.
+    // When reading lots of tiny files, this prefetching almost doubles the throughput.
+    // For bigger files, parallel reading is more useful.
+    if (use_prefetch)
+    {
+        LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
+
+        auto async_reader = object_storage->readObjects(
+            StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings);
+
+        async_reader->setReadUntilEnd();
+        if (read_settings.remote_fs_prefetch)
+            async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY);
+
+        return async_reader;
+    }
+    else
+    {
+        /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting.
+        return object_storage->readObject(StoredObject(key), read_settings);
+    }
+}
+
+StorageObjectStorageSource::GlobIterator::GlobIterator(
     ObjectStoragePtr object_storage_,
-    Storage::ConfigurationPtr configuration_,
+    ConfigurationPtr configuration_,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns_,
+    ContextPtr context_,
     ObjectInfos * read_keys_,
+    size_t list_object_keys_size,
     std::function<void(FileProgress)> file_progress_callback_)
-    : object_storage(object_storage_)
+    : WithContext(context_)
+    , object_storage(object_storage_)
     , configuration(configuration_)
     , virtual_columns(virtual_columns_)
     , read_keys(read_keys_)
@@ -81,7 +331,7 @@ StorageObjectStorageSource<StorageSettings>::GlobIterator::GlobIterator(
     {
         const auto key_with_globs = configuration_->getPath();
         const auto key_prefix = configuration->getPathWithoutGlob();
-        object_storage_iterator = object_storage->iterate(key_prefix);
+        object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size);
 
         matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(key_with_globs));
         if (matcher->ok())
@@ -113,13 +363,11 @@ StorageObjectStorageSource<StorageSettings>::GlobIterator::GlobIterator(
     }
 }
 
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::ObjectInfoPtr
-StorageObjectStorageSource<StorageSettings>::GlobIterator::next(size_t /* processor */)
+ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor */)
 {
     std::lock_guard lock(next_mutex);
 
-    if (is_finished && index >= object_infos.size())
+    if (is_finished)
         return {};
 
     bool need_new_batch = object_infos.empty() || index >= object_infos.size();
@@ -130,9 +378,10 @@ StorageObjectStorageSource<StorageSettings>::GlobIterator::next(size_t /* proces
         while (new_batch.empty())
         {
             auto result = object_storage_iterator->getCurrentBatchAndScheduleNext();
+            LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", result.has_value());
             if (result.has_value())
             {
-                new_batch = result.value();
+                new_batch = std::move(result.value());
             }
             else
             {
@@ -169,7 +418,8 @@ StorageObjectStorageSource<StorageSettings>::GlobIterator::next(size_t /* proces
         {
             for (const auto & object_info : object_infos)
             {
-                file_progress_callback(FileProgress(0, object_info->metadata.size_bytes));
+                chassert(object_info->metadata);
+                file_progress_callback(FileProgress(0, object_info->metadata->size_bytes));
             }
         }
     }
@@ -181,10 +431,9 @@ StorageObjectStorageSource<StorageSettings>::GlobIterator::next(size_t /* proces
     return object_infos[current_index];
 }
 
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::KeysIterator::KeysIterator(
+StorageObjectStorageSource::KeysIterator::KeysIterator(
     ObjectStoragePtr object_storage_,
-    Storage::ConfigurationPtr configuration_,
+    ConfigurationPtr configuration_,
     const NamesAndTypesList & virtual_columns_,
     ObjectInfos * read_keys_,
     std::function<void(FileProgress)> file_progress_callback_)
@@ -199,15 +448,13 @@ StorageObjectStorageSource<StorageSettings>::KeysIterator::KeysIterator(
         /// TODO: should we add metadata if we anyway fetch it if file_progress_callback is passed?
         for (auto && key : keys)
         {
-            auto object_info = std::make_shared<ObjectInfo>(key, ObjectMetadata{});
+            auto object_info = std::make_shared<ObjectInfo>(key);
             read_keys_->emplace_back(object_info);
         }
     }
 }
 
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::ObjectInfoPtr
-StorageObjectStorageSource<StorageSettings>::KeysIterator::next(size_t /* processor */)
+ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor */)
 {
     size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
     if (current_index >= keys.size())
@@ -225,240 +472,4 @@ StorageObjectStorageSource<StorageSettings>::KeysIterator::next(size_t /* proces
     return std::make_shared<ObjectInfo>(key, metadata);
 }
 
-template <typename StorageSettings>
-Chunk StorageObjectStorageSource<StorageSettings>::generate()
-{
-    while (true)
-    {
-        if (isCancelled() || !reader)
-        {
-            if (reader)
-                reader->cancel();
-            break;
-        }
-
-        Chunk chunk;
-        if (reader->pull(chunk))
-        {
-            UInt64 num_rows = chunk.getNumRows();
-            total_rows_in_file += num_rows;
-            size_t chunk_size = 0;
-            if (const auto * input_format = reader.getInputFormat())
-                chunk_size = input_format->getApproxBytesReadForChunk();
-            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-
-            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
-                chunk,
-                read_from_format_info.requested_virtual_columns,
-                fs::path(configuration->getNamespace()) / reader.getRelativePath(),
-                reader.getObjectInfo().metadata.size_bytes);
-
-            return chunk;
-        }
-
-        if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(reader.getRelativePath(), total_rows_in_file);
-
-        total_rows_in_file = 0;
-
-        assert(reader_future.valid());
-        reader = reader_future.get();
-
-        if (!reader)
-            break;
-
-        /// Even if task is finished the thread may be not freed in pool.
-        /// So wait until it will be freed before scheduling a new task.
-        create_reader_pool.wait();
-        reader_future = createReaderAsync();
-    }
-
-    return {};
-}
-
-template <typename StorageSettings>
-void StorageObjectStorageSource<StorageSettings>::addNumRowsToCache(const String & path, size_t num_rows)
-{
-    String source = fs::path(configuration->getDataSourceDescription()) / path;
-    auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
-    Storage::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
-}
-
-template <typename StorageSettings>
-std::optional<size_t> StorageObjectStorageSource<StorageSettings>::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info)
-{
-    String source = fs::path(configuration->getDataSourceDescription()) / object_info->relative_path;
-    auto cache_key = getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        auto last_mod = object_info->metadata.last_modified;
-        if (last_mod)
-            return last_mod->epochTime();
-        else
-        {
-            object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-            return object_info->metadata.last_modified->epochMicroseconds();
-        }
-    };
-    return Storage::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
-}
-
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::StorageObjectStorageSource(
-    String name_,
-    ObjectStoragePtr object_storage_,
-    Storage::ConfigurationPtr configuration_,
-    const ReadFromFormatInfo & info,
-    std::optional<FormatSettings> format_settings_,
-    ContextPtr context_,
-    UInt64 max_block_size_,
-    std::shared_ptr<IIterator> file_iterator_,
-    bool need_only_count_)
-    :ISource(info.source_header, false)
-    , WithContext(context_)
-    , name(std::move(name_))
-    , object_storage(object_storage_)
-    , configuration(configuration_)
-    , format_settings(format_settings_)
-    , max_block_size(max_block_size_)
-    , need_only_count(need_only_count_)
-    , read_from_format_info(info)
-    , columns_desc(info.columns_description)
-    , file_iterator(file_iterator_)
-    , create_reader_pool(StorageSettings::ObjectStorageThreads(),
-                         StorageSettings::ObjectStorageThreadsActive(),
-                         StorageSettings::ObjectStorageThreadsScheduled(), 1)
-    , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(create_reader_pool, "Reader"))
-{
-    reader = createReader();
-    if (reader)
-        reader_future = createReaderAsync();
-}
-
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::~StorageObjectStorageSource()
-{
-    create_reader_pool.wait();
-}
-
-template <typename StorageSettings>
-StorageObjectStorageSource<StorageSettings>::ReaderHolder
-StorageObjectStorageSource<StorageSettings>::createReader(size_t processor)
-{
-    auto object_info = file_iterator->next(processor);
-    if (object_info->relative_path.empty())
-        return {};
-
-    if (object_info->metadata.size_bytes == 0)
-        object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-
-    QueryPipelineBuilder builder;
-    std::shared_ptr<ISource> source;
-    std::unique_ptr<ReadBuffer> read_buf;
-    std::optional<size_t> num_rows_from_cache = need_only_count
-        && getContext()->getSettingsRef().use_cache_for_count_from_files
-        ? tryGetNumRowsFromCache(object_info)
-        : std::nullopt;
-
-    if (num_rows_from_cache)
-    {
-        /// We should not return single chunk with all number of rows,
-        /// because there is a chance that this chunk will be materialized later
-        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
-        /// Instead, we use special ConstChunkGenerator that will generate chunks
-        /// with max_block_size rows until total number of rows is reached.
-        source = std::make_shared<ConstChunkGenerator>(
-            read_from_format_info.format_header, *num_rows_from_cache, max_block_size);
-        builder.init(Pipe(source));
-    }
-    else
-    {
-        std::optional<size_t> max_parsing_threads;
-        if (need_only_count)
-            max_parsing_threads = 1;
-
-        auto compression_method = chooseCompressionMethod(
-            object_info->relative_path, configuration->compression_method);
-
-        read_buf = createReadBuffer(object_info->relative_path, object_info->metadata.size_bytes);
-
-        auto input_format = FormatFactory::instance().getInput(
-            configuration->format, *read_buf, read_from_format_info.format_header,
-            getContext(), max_block_size, format_settings, max_parsing_threads,
-            std::nullopt, /* is_remote_fs */ true, compression_method);
-
-        if (need_only_count)
-            input_format->needOnlyCount();
-
-        builder.init(Pipe(input_format));
-
-        if (columns_desc.hasDefaults())
-        {
-            builder.addSimpleTransform(
-                [&](const Block & header)
-                {
-                    return std::make_shared<AddingDefaultsTransform>(header, columns_desc, *input_format, getContext());
-                });
-        }
-
-        source = input_format;
-    }
-
-    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
-    /// from chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, read_from_format_info.requested_columns);
-    });
-
-    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
-    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
-
-    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
-
-    return ReaderHolder{object_info, std::move(read_buf),
-                        std::move(source), std::move(pipeline), std::move(current_reader)};
-}
-
-template <typename StorageSettings>
-std::future<typename StorageObjectStorageSource<StorageSettings>::ReaderHolder>
-StorageObjectStorageSource<StorageSettings>::createReaderAsync(size_t processor)
-{
-    return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{});
-}
-
-template <typename StorageSettings>
-std::unique_ptr<ReadBuffer> StorageObjectStorageSource<StorageSettings>::createReadBuffer(const String & key, size_t object_size)
-{
-    auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
-    read_settings.enable_filesystem_cache = false;
-    read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size;
-
-    // auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
-    // const bool object_too_small = object_size <= 2 * download_buffer_size;
-
-    // Create a read buffer that will prefetch the first ~1 MB of the file.
-    // When reading lots of tiny files, this prefetching almost doubles the throughput.
-    // For bigger files, parallel reading is more useful.
-    // if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool)
-    // {
-    //     LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
-
-    //     auto async_reader = object_storage->readObjects(
-    //         StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings);
-
-    //     async_reader->setReadUntilEnd();
-    //     if (read_settings.remote_fs_prefetch)
-    //         async_reader->prefetch(DEFAULT_PREFETCH_PRIORITY);
-
-    //     return async_reader;
-    // }
-    // else
-    return object_storage->readObject(StoredObject(key), read_settings);
-}
-
-template class StorageObjectStorageSource<S3StorageSettings>;
-template class StorageObjectStorageSource<AzureStorageSettings>;
-template class StorageObjectStorageSource<HDFSStorageSettings>;
-
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index f68a5d47456..0d6a6b71271 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -1,31 +1,19 @@
 #pragma once
-#include <Processors/ISource.h>
+#include <Processors/SourceWithKeyCondition.h>
+#include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
+#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 
 
 namespace DB
 {
-template <typename StorageSettings>
-class StorageObjectStorageSource : public ISource, WithContext
+class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext
 {
     friend class StorageS3QueueSource;
 public:
-    using Source = StorageObjectStorageSource<StorageSettings>;
-    using Storage = StorageObjectStorage<StorageSettings>;
-    using ObjectInfo = Storage::ObjectInfo;
-    using ObjectInfoPtr = Storage::ObjectInfoPtr;
-    using ObjectInfos = Storage::ObjectInfos;
-
-    class IIterator : public WithContext
-    {
-    public:
-        virtual ~IIterator() = default;
-
-        virtual size_t estimatedKeysCount() = 0;
-        virtual ObjectInfoPtr next(size_t processor) = 0;
-    };
-
+    class IIterator;
     class ReadTaskIterator;
     class GlobIterator;
     class KeysIterator;
@@ -33,13 +21,16 @@ public:
     StorageObjectStorageSource(
         String name_,
         ObjectStoragePtr object_storage_,
-        Storage::ConfigurationPtr configuration,
+        ConfigurationPtr configuration,
         const ReadFromFormatInfo & info,
         std::optional<FormatSettings> format_settings_,
+        const StorageObjectStorageSettings & query_settings_,
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IIterator> file_iterator_,
-        bool need_only_count_);
+        bool need_only_count_,
+        SchemaCache & schema_cache_,
+        std::shared_ptr<ThreadPool> reader_pool_);
 
     ~StorageObjectStorageSource() override;
 
@@ -48,32 +39,35 @@ public:
     Chunk generate() override;
 
     static std::shared_ptr<IIterator> createFileIterator(
-        Storage::ConfigurationPtr configuration,
+        ConfigurationPtr configuration,
         ObjectStoragePtr object_storage,
         bool distributed_processing,
         const ContextPtr & local_context,
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
+        size_t list_object_keys_size,
         std::function<void(FileProgress)> file_progress_callback = {});
 
 protected:
-    void addNumRowsToCache(const String & path, size_t num_rows);
-    std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfoPtr & object_info);
-
     const String name;
     ObjectStoragePtr object_storage;
-    const Storage::ConfigurationPtr configuration;
+    const ConfigurationPtr configuration;
     const std::optional<FormatSettings> format_settings;
+    const StorageObjectStorageSettings query_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
     const ReadFromFormatInfo read_from_format_info;
-
+    const std::shared_ptr<ThreadPool> create_reader_pool;
     ColumnsDescription columns_desc;
     std::shared_ptr<IIterator> file_iterator;
-    size_t total_rows_in_file = 0;
+    SchemaCache & schema_cache;
+    bool initialized = false;
 
-    struct ReaderHolder
+    size_t total_rows_in_file = 0;
+    LoggerPtr log = getLogger("StorageObjectStorageSource");
+
+    struct ReaderHolder : private boost::noncopyable
     {
     public:
         ReaderHolder(
@@ -86,15 +80,15 @@ protected:
             , read_buf(std::move(read_buf_))
             , source(std::move(source_))
             , pipeline(std::move(pipeline_))
-            , reader(std::move(reader_))
-        {
-        }
+            , reader(std::move(reader_)) {}
 
         ReaderHolder() = default;
-        ReaderHolder(const ReaderHolder & other) = delete;
-        ReaderHolder & operator=(const ReaderHolder & other) = delete;
         ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); }
 
+        explicit operator bool() const { return reader != nullptr; }
+        PullingPipelineExecutor * operator->() { return reader.get(); }
+        const PullingPipelineExecutor * operator->() const { return reader.get(); }
+
         ReaderHolder & operator=(ReaderHolder && other) noexcept
         {
             /// The order of destruction is important.
@@ -107,9 +101,6 @@ protected:
             return *this;
         }
 
-        explicit operator bool() const { return reader != nullptr; }
-        PullingPipelineExecutor * operator->() { return reader.get(); }
-        const PullingPipelineExecutor * operator->() const { return reader.get(); }
         const String & getRelativePath() const { return object_info->relative_path; }
         const ObjectInfo & getObjectInfo() const { return *object_info; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
@@ -123,20 +114,29 @@ protected:
     };
 
     ReaderHolder reader;
-    LoggerPtr log = getLogger("StorageObjectStorageSource");
-    ThreadPool create_reader_pool;
     ThreadPoolCallbackRunner<ReaderHolder> create_reader_scheduler;
     std::future<ReaderHolder> reader_future;
 
     /// Recreate ReadBuffer and Pipeline for each file.
     ReaderHolder createReader(size_t processor = 0);
     std::future<ReaderHolder> createReaderAsync(size_t processor = 0);
-
     std::unique_ptr<ReadBuffer> createReadBuffer(const String & key, size_t object_size);
+
+    void addNumRowsToCache(const String & path, size_t num_rows);
+    std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfoPtr & object_info);
+    void lazyInitialize(size_t processor);
 };
 
-template <typename StorageSettings>
-class StorageObjectStorageSource<StorageSettings>::ReadTaskIterator : public IIterator
+class StorageObjectStorageSource::IIterator
+{
+public:
+    virtual ~IIterator() = default;
+
+    virtual size_t estimatedKeysCount() = 0;
+    virtual ObjectInfoPtr next(size_t processor) = 0;
+};
+
+class StorageObjectStorageSource::ReadTaskIterator : public IIterator
 {
 public:
     explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {}
@@ -149,16 +149,17 @@ private:
     ReadTaskCallback callback;
 };
 
-template <typename StorageSettings>
-class StorageObjectStorageSource<StorageSettings>::GlobIterator : public IIterator
+class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext
 {
 public:
     GlobIterator(
         ObjectStoragePtr object_storage_,
-        Storage::ConfigurationPtr configuration_,
+        ConfigurationPtr configuration_,
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns_,
+        ContextPtr context_,
         ObjectInfos * read_keys_,
+        size_t list_object_keys_size,
         std::function<void(FileProgress)> file_progress_callback_ = {});
 
     ~GlobIterator() override = default;
@@ -169,7 +170,7 @@ public:
 
 private:
     ObjectStoragePtr object_storage;
-    Storage::ConfigurationPtr configuration;
+    ConfigurationPtr configuration;
     ActionsDAGPtr filter_dag;
     NamesAndTypesList virtual_columns;
 
@@ -189,13 +190,12 @@ private:
     std::function<void(FileProgress)> file_progress_callback;
 };
 
-template <typename StorageSettings>
-class StorageObjectStorageSource<StorageSettings>::KeysIterator : public IIterator
+class StorageObjectStorageSource::KeysIterator : public IIterator
 {
 public:
     KeysIterator(
         ObjectStoragePtr object_storage_,
-        Storage::ConfigurationPtr configuration_,
+        ConfigurationPtr configuration_,
         const NamesAndTypesList & virtual_columns_,
         ObjectInfos * read_keys_,
         std::function<void(FileProgress)> file_progress_callback = {});
@@ -208,7 +208,7 @@ public:
 
 private:
     const ObjectStoragePtr object_storage;
-    const Storage::ConfigurationPtr configuration;
+    const ConfigurationPtr configuration;
     const NamesAndTypesList virtual_columns;
     const std::function<void(FileProgress)> file_progress_callback;
     const std::vector<String> keys;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
new file mode 100644
index 00000000000..51be7419e1c
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
@@ -0,0 +1,11 @@
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+
+namespace DB
+{
+
+using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+using ObjectInfo = RelativePathWithMetadata;
+using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
+using ObjectInfos = std::vector<ObjectInfoPtr>;
+
+}
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index bc9f93690f5..f7ab37490e1 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -8,18 +8,6 @@
 namespace DB
 {
 
-static void initializeConfiguration(
-    StorageObjectStorageConfiguration & configuration,
-    ASTs & engine_args,
-    ContextPtr local_context,
-    bool with_table_structure)
-{
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-        configuration.fromNamedCollection(*named_collection);
-    else
-        configuration.fromAST(engine_args, local_context, with_table_structure);
-}
-
 template <typename StorageSettings>
 static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjectStorage(
     const StorageFactory::Arguments & args,
@@ -82,7 +70,7 @@ void registerStorageAzure(StorageFactory & factory)
     {
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
-        initializeConfiguration(*configuration, args.engine_args, context, false);
+        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
         return createStorageObjectStorage<AzureStorageSettings>(args, configuration, "Azure", context);
     },
     {
@@ -101,7 +89,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
     {
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageS3Configuration>();
-        initializeConfiguration(*configuration, args.engine_args, context, false);
+        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
         return createStorageObjectStorage<S3StorageSettings>(args, configuration, name, context);
     },
     {
@@ -136,7 +124,7 @@ void registerStorageHDFS(StorageFactory & factory)
     {
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageHDFSConfiguration>();
-        initializeConfiguration(*configuration, args.engine_args, context, false);
+        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
         return createStorageObjectStorage<HDFSStorageSettings>(args, configuration, "HDFS", context);
     },
     {
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index bd34d1ec093..b64aa23d47c 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -35,7 +35,7 @@ StorageS3QueueSource::S3QueueObjectInfo::S3QueueObjectInfo(
         const std::string & key_,
         const ObjectMetadata & object_metadata_,
         Metadata::ProcessingNodeHolderPtr processing_holder_)
-    : Source::ObjectInfo(key_, object_metadata_)
+    : ObjectInfo(key_, object_metadata_)
     , processing_holder(processing_holder_)
 {
 }
@@ -55,15 +55,15 @@ StorageS3QueueSource::FileIterator::FileIterator(
     if (sharded_processing)
     {
         for (const auto & id : metadata->getProcessingIdsForShard(current_shard))
-            sharded_keys.emplace(id, std::deque<Source::ObjectInfoPtr>{});
+            sharded_keys.emplace(id, std::deque<ObjectInfoPtr>{});
     }
 }
 
-StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor)
+StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor)
 {
     while (!shutdown_called)
     {
-        Source::ObjectInfoPtr val{nullptr};
+        ObjectInfoPtr val{nullptr};
 
         {
             std::unique_lock lk(sharded_keys_mutex, std::defer_lock);
@@ -140,7 +140,7 @@ StorageS3QueueSource::Source::ObjectInfoPtr StorageS3QueueSource::FileIterator::
 
         if (processing_holder)
         {
-            return std::make_shared<S3QueueObjectInfo>(val->relative_path, val->metadata, processing_holder);
+            return std::make_shared<S3QueueObjectInfo>(val->relative_path, val->metadata.value(), processing_holder);
         }
         else if (sharded_processing
                  && metadata->getFileStatus(val->relative_path)->state == S3QueueFilesMetadata::FileStatus::State::Processing)
@@ -161,7 +161,7 @@ size_t StorageS3QueueSource::FileIterator::estimatedKeysCount()
 StorageS3QueueSource::StorageS3QueueSource(
     String name_,
     const Block & header_,
-    std::unique_ptr<Source> internal_source_,
+    std::unique_ptr<StorageObjectStorageSource> internal_source_,
     std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
     size_t processing_id_,
     const S3QueueAction & action_,
@@ -273,7 +273,8 @@ Chunk StorageS3QueueSource::generate()
                 file_status->processed_rows += chunk.getNumRows();
                 processed_rows_from_file += chunk.getNumRows();
 
-                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata.size_bytes);
+                VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
+                    chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata->size_bytes);
                 return chunk;
             }
         }
@@ -311,7 +312,7 @@ Chunk StorageS3QueueSource::generate()
 
         /// Even if task is finished the thread may be not freed in pool.
         /// So wait until it will be freed before scheduling a new task.
-        internal_source->create_reader_pool.wait();
+        internal_source->create_reader_pool->wait();
         reader_future = internal_source->createReaderAsync(processing_id);
     }
 
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index fcf5c5c0160..2bdac7f2311 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -7,7 +7,7 @@
 #include <Storages/S3Queue/S3QueueFilesMetadata.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/Settings.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Interpreters/S3QueueLog.h>
 
 
@@ -22,16 +22,19 @@ class StorageS3QueueSource : public ISource, WithContext
 {
 public:
     using Storage = StorageObjectStorage<S3StorageSettings>;
-    using Source = StorageObjectStorageSource<S3StorageSettings>;
 
     using ConfigurationPtr = Storage::ConfigurationPtr;
-    using GlobIterator = Source::GlobIterator;
+    using GlobIterator = StorageObjectStorageSource::GlobIterator;
     using ZooKeeperGetter = std::function<zkutil::ZooKeeperPtr()>;
     using RemoveFileFunc = std::function<void(std::string)>;
     using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr;
+    using ReaderHolder = StorageObjectStorageSource::ReaderHolder;
     using Metadata = S3QueueFilesMetadata;
+    using ObjectInfo = RelativePathWithMetadata;
+    using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
+    using ObjectInfos = std::vector<ObjectInfoPtr>;
 
-    struct S3QueueObjectInfo : public Source::ObjectInfo
+    struct S3QueueObjectInfo : public ObjectInfo
     {
         S3QueueObjectInfo(
             const std::string & key_,
@@ -41,7 +44,7 @@ public:
         Metadata::ProcessingNodeHolderPtr processing_holder;
     };
 
-    class FileIterator : public Source::IIterator
+    class FileIterator : public StorageObjectStorageSource::IIterator
     {
     public:
         FileIterator(
@@ -53,7 +56,7 @@ public:
         /// Note:
         /// List results in s3 are always returned in UTF-8 binary order.
         /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html)
-        Source::ObjectInfoPtr next(size_t processor) override;
+        ObjectInfoPtr next(size_t processor) override;
 
         size_t estimatedKeysCount() override;
 
@@ -66,14 +69,14 @@ public:
 
         const bool sharded_processing;
         const size_t current_shard;
-        std::unordered_map<size_t, std::deque<Source::ObjectInfoPtr>> sharded_keys;
+        std::unordered_map<size_t, std::deque<ObjectInfoPtr>> sharded_keys;
         std::mutex sharded_keys_mutex;
     };
 
     StorageS3QueueSource(
         String name_,
         const Block & header_,
-        std::unique_ptr<Source> internal_source_,
+        std::unique_ptr<StorageObjectStorageSource> internal_source_,
         std::shared_ptr<S3QueueFilesMetadata> files_metadata_,
         size_t processing_id_,
         const S3QueueAction & action_,
@@ -97,7 +100,7 @@ private:
     const S3QueueAction action;
     const size_t processing_id;
     const std::shared_ptr<S3QueueFilesMetadata> files_metadata;
-    const std::shared_ptr<Source> internal_source;
+    const std::shared_ptr<StorageObjectStorageSource> internal_source;
     const NamesAndTypesList requested_virtual_columns;
     const std::atomic<bool> & shutdown_called;
     const std::atomic<bool> & table_is_being_dropped;
@@ -107,8 +110,8 @@ private:
     RemoveFileFunc remove_file_func;
     LoggerPtr log;
 
-    Source::ReaderHolder reader;
-    std::future<Source::ReaderHolder> reader_future;
+    ReaderHolder reader;
+    std::future<ReaderHolder> reader_future;
     std::atomic<bool> initialized{false};
     size_t processed_rows_from_file = 0;
 
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index 942ce7973ef..70dd8f27d71 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -3,7 +3,7 @@
 #if USE_AWS_S3
 
 #include <Storages/S3Queue/S3QueueSettings.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <base/types.h>
 
 namespace DB
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index fa7132f705a..fc4ef77ebb9 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -341,16 +341,23 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
     size_t max_block_size,
     ContextPtr local_context)
 {
-    auto internal_source = std::make_unique<Source>(
+    auto threadpool = std::make_shared<ThreadPool>(CurrentMetrics::ObjectStorageS3Threads,
+                                                   CurrentMetrics::ObjectStorageS3ThreadsActive,
+                                                   CurrentMetrics::ObjectStorageS3ThreadsScheduled,
+                                                   /* max_threads */1);
+    auto internal_source = std::make_unique<StorageObjectStorageSource>(
         getName(),
         object_storage,
         configuration,
         info,
         format_settings,
+        S3StorageSettings::create(local_context->getSettingsRef()),
         local_context,
         max_block_size,
         file_iterator,
-        false);
+        false,
+        Storage::getSchemaCache(local_context),
+        threadpool);
 
     auto file_deleter = [=, this](const std::string & path) mutable
     {
@@ -555,25 +562,14 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
     }
 }
 
-std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr , const ActionsDAG::Node * predicate)
+std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
 {
-    auto glob_iterator = std::make_unique<Source::GlobIterator>(object_storage, configuration, predicate, virtual_columns, nullptr);
-
+    auto settings = S3StorageSettings::create(local_context->getSettingsRef());
+    auto glob_iterator = std::make_unique<StorageObjectStorageSource::GlobIterator>(
+        object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size);
     return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called);
 }
 
-static void initializeConfiguration(
-    StorageObjectStorageConfiguration & configuration,
-    ASTs & engine_args,
-    ContextPtr local_context,
-    bool with_table_structure)
-{
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-        configuration.fromNamedCollection(*named_collection);
-    else
-        configuration.fromAST(engine_args, local_context, with_table_structure);
-}
-
 void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
 {
     factory.registerStorage(
@@ -585,7 +581,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
 
             auto configuration = std::make_shared<StorageS3Configuration>();
-            initializeConfiguration(*configuration, args.engine_args, args.getContext(), false);
+            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getContext(), false);
 
             // Use format settings from global server context + settings from
             // the SETTINGS clause of the create query. Settings from current
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index 88f9bd65093..46a8b8d82c1 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -27,7 +27,6 @@ class StorageS3Queue : public IStorage, WithContext
 {
 public:
     using Storage = StorageObjectStorage<S3StorageSettings>;
-    using Source = StorageObjectStorageSource<S3StorageSettings>;
     using ConfigurationPtr = Storage::ConfigurationPtr;
 
     StorageS3Queue(
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 884e1f5c4a2..0ffa1460d78 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -10,7 +10,7 @@
 #    include <Interpreters/parseColumnsListForTableFunction.h>
 #    include <Storages/IStorage.h>
 #    include <TableFunctions/ITableFunction.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/DataLakes/Iceberg/StorageIceberg.h>
 
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index d009a9347f3..de46c13af37 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -7,7 +7,7 @@
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Access/Common/AccessFlags.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/Configuration.h>
+#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
 #include <Storages/ObjectStorage/S3Configuration.h>
 #include <Storages/ObjectStorage/HDFSConfiguration.h>
 #include <Storages/ObjectStorage/AzureConfiguration.h>
@@ -27,20 +27,9 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-static void initializeConfiguration(
-    StorageObjectStorageConfiguration & configuration,
-    ASTs & engine_args,
-    ContextPtr local_context,
-    bool with_table_structure)
-{
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-        configuration.fromNamedCollection(*named_collection);
-    else
-        configuration.fromAST(engine_args, local_context, with_table_structure);
-}
-
 template <typename Definition, typename StorageSettings, typename Configuration>
-ObjectStoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
+ObjectStoragePtr TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
 {
     if (!object_storage)
         object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly);
@@ -48,7 +37,8 @@ ObjectStoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configu
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-std::vector<size_t> TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
+std::vector<size_t> TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
 {
     auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
     auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
@@ -65,16 +55,18 @@ std::vector<size_t> TableFunctionObjectStorage<Definition, StorageSettings, Conf
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
+void TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
 {
     Configuration::addStructureToArgs(args, structure, context);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
+void TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
 {
     configuration = std::make_shared<Configuration>();
-    initializeConfiguration(*configuration, engine_args, local_context, true);
+    StorageObjectStorageConfiguration::initialize(*configuration, engine_args, local_context, true);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
@@ -91,7 +83,8 @@ void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::par
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-ColumnsDescription TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
+ColumnsDescription TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
 {
     if (configuration->structure == "auto")
     {
@@ -104,13 +97,15 @@ ColumnsDescription TableFunctionObjectStorage<Definition, StorageSettings, Confi
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-bool TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
+bool TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
 {
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-std::unordered_set<String> TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
+std::unordered_set<String> TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
 {
     auto virtual_column_names = StorageObjectStorage<StorageSettings>::getVirtualColumnNames();
     return {virtual_column_names.begin(), virtual_column_names.end()};
@@ -166,15 +161,33 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 
     factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>>(
     {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on GCS.)",
+            .examples{{"gcs", "SELECT * FROM gcs(url, access_key_id, secret_access_key)", ""}
+        },
+        .categories{"DataLake"}},
         .allow_readonly = false
     });
 
     factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>>(
     {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on COSN.)",
+            .examples{{"cosn", "SELECT * FROM cosn(url, access_key_id, secret_access_key)", ""}
+        },
+        .categories{"DataLake"}},
         .allow_readonly = false
     });
     factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>>(
     {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on OSS.)",
+            .examples{{"oss", "SELECT * FROM oss(url, access_key_id, secret_access_key)", ""}
+        },
+        .categories{"DataLake"}},
         .allow_readonly = false
     });
 #endif
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 1d27a857cea..8e6c96a3f2a 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -6,7 +6,6 @@
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/Configuration.h>
 #include <Storages/ObjectStorage/S3Configuration.h>
 #include <Storages/ObjectStorage/HDFSConfiguration.h>
 #include <Storages/ObjectStorage/AzureConfiguration.h>
@@ -76,8 +75,8 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
     factory.registerFunction<TableFunctionS3Cluster>(
     {
         .documentation = {
-            .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
-            .examples{{"azureBlobStorageCluster", "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
+            .description=R"(The table function can be used to read the data stored on S3 in parallel for many nodes in a specified cluster.)",
+            .examples{{"s3Cluster", "SELECT * FROM  s3Cluster(cluster, url, format, structure)", ""}}},
             .allow_readonly = false
         }
     );
@@ -95,7 +94,14 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
 #endif
 
 #if USE_HDFS
-    factory.registerFunction<TableFunctionHDFSCluster>();
+    factory.registerFunction<TableFunctionHDFSCluster>(
+    {
+        .documentation = {
+            .description=R"(The table function can be used to read the data stored on HDFS in parallel for many nodes in a specified cluster.)",
+            .examples{{"HDFSCluster", "SELECT * FROM HDFSCluster(cluster_name, uri, format)", ""}}},
+            .allow_readonly = false
+        }
+    );
 #endif
 }
 

From 27a8bcc4383578b267ebcf0c8e0f65e83053c750 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 13 Feb 2024 20:16:37 +0100
Subject: [PATCH 012/392] Update ReadHelpers.cpp to fix failing style check

---
 src/IO/ReadHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 3f9ceef50d4..ddf932b98a6 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -540,7 +540,7 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
 
         if (*buf.position() == '\r')
         {
-            ++buf.position(); // advance to \n after \r   
+            ++buf.position();   
         }
     }
 }

From 80b2276599024032ca656206042b2d5f1fdc1571 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 14 Feb 2024 10:38:20 +0100
Subject: [PATCH 013/392] fix style check

---
 src/IO/ReadHelpers.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index ddf932b98a6..af66cbb4cb5 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -510,7 +510,6 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
         {
             next_pos = find_first_symbols<'\t', '\n', '\\'>(buf.position(), buf.buffer().end());
         }
-        
         appendToStringOrVector(s, buf, next_pos);
         buf.position() = next_pos;
 
@@ -539,9 +538,8 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
         }
 
         if (*buf.position() == '\r')
-        {
-            ++buf.position();   
-        }
+            ++buf.position();
+
     }
 }
 
@@ -1987,7 +1985,4 @@ void readTSVField(String & s, ReadBuffer & buf)
 
 template void readTSVField<true>(String & s, ReadBuffer & buf);
 template void readTSVField<false>(String & s, ReadBuffer & buf);
-
 }
-
-

From 84b0fe670a4d73cc0b5c26bb922e90369025dae6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 13 Feb 2024 17:03:11 +0100
Subject: [PATCH 014/392] Refactor data lakes

---
 src/Backups/BackupIO_AzureBlobStorage.h       |   2 +-
 .../registerBackupEngineAzureBlobStorage.cpp  |   5 +-
 src/CMakeLists.txt                            |   7 +-
 .../AzureBlobStorage/AzureObjectStorage.cpp   |   6 +-
 ...jectStorageRemoteMetadataRestoreHelper.cpp |  28 ++--
 src/Disks/ObjectStorages/IObjectStorage.h     |   4 +-
 .../ObjectStorageIteratorAsync.cpp            |  30 ++++-
 .../ObjectStorageIteratorAsync.h              |   6 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |   2 +-
 .../DataLakes/DeltaLakeMetadataParser.h       |  26 ----
 src/Storages/DataLakes/HudiMetadataParser.h   |  18 ---
 src/Storages/DataLakes/IStorageDataLake.h     |  98 --------------
 .../DataLakes/Iceberg/StorageIceberg.cpp      |  11 --
 src/Storages/DataLakes/StorageDeltaLake.h     |  20 ---
 src/Storages/DataLakes/StorageHudi.h          |  20 ---
 src/Storages/DataLakes/registerDataLakes.cpp  |  50 -------
 .../Configuration.cpp}                        |  49 +++----
 .../Configuration.h}                          |  11 +-
 .../ObjectStorage/DataLakes/Common.cpp        |  28 ++++
 src/Storages/ObjectStorage/DataLakes/Common.h |  15 +++
 .../DataLakes/DeltaLakeMetadata.cpp}          | 110 +++++++--------
 .../DataLakes/DeltaLakeMetadata.h             |  48 +++++++
 .../DataLakes/HudiMetadata.cpp}               |  55 ++++----
 .../ObjectStorage/DataLakes/HudiMetadata.h    |  51 +++++++
 .../DataLakes/IDataLakeMetadata.h             |  19 +++
 .../DataLakes/IStorageDataLake.h}             |  58 ++++----
 .../DataLakes}/IcebergMetadata.cpp            |  36 ++---
 .../DataLakes}/IcebergMetadata.h              |  40 +++---
 .../DataLakes/registerDataLakeStorages.cpp    |  83 ++++++++++++
 .../ObjectStorage/HDFS/Configuration.cpp      |  57 ++++++++
 .../ObjectStorage/HDFS/Configuration.h        |  45 +++++++
 .../ObjectStorage/HDFSConfiguration.h         |  81 -----------
 .../ObjectStorage/ReadBufferIterator.cpp      |   4 +-
 .../ReadFromStorageObjectStorage.cpp          |   1 -
 .../Configuration.cpp}                        |  30 +++--
 .../{S3Configuration.h => S3/Configuration.h} |  15 ++-
 .../ObjectStorage/StorageObjectStorage.cpp    |  10 +-
 .../ObjectStorage/StorageObjectStorage.h      |   5 +-
 .../StorageObjectStorageCluster.cpp           |   2 +-
 .../StorageObjectStorageCluster.h             |   3 +
 .../StorageObjectStorageConfiguration.cpp     |   2 +-
 ....h => StorageObjectStorageConfiguration.h} |   3 +-
 .../StorageObjectStorageSink.cpp              | 127 ++++++++++++++++++
 .../ObjectStorage/StorageObjectStorageSink.h  | 113 ++--------------
 .../StorageObjectStorageSource.cpp            |  33 ++++-
 .../StorageObjectStorageSource.h              |  22 +--
 .../StorageObjectStorage_fwd_internal.h       |   3 +-
 .../registerStorageObjectStorage.cpp          |  12 +-
 src/Storages/ObjectStorageConfiguration.h     |   0
 src/Storages/S3Queue/S3QueueTableMetadata.h   |   2 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |   9 +-
 .../StorageSystemSchemaInferenceCache.cpp     |   2 +-
 src/TableFunctions/ITableFunctionDataLake.h   |  76 +++++++----
 src/TableFunctions/TableFunctionDeltaLake.cpp |  33 -----
 src/TableFunctions/TableFunctionHudi.cpp      |  31 -----
 src/TableFunctions/TableFunctionIceberg.cpp   |  37 -----
 .../TableFunctionObjectStorage.cpp            |  22 ++-
 .../TableFunctionObjectStorage.h              |  13 +-
 .../TableFunctionObjectStorageCluster.cpp     |   8 +-
 .../registerDataLakeTableFunctions.cpp        |  69 ++++++++++
 src/TableFunctions/registerTableFunctions.cpp |   3 +-
 src/TableFunctions/registerTableFunctions.h   |  10 +-
 62 files changed, 946 insertions(+), 873 deletions(-)
 delete mode 100644 src/Storages/DataLakes/DeltaLakeMetadataParser.h
 delete mode 100644 src/Storages/DataLakes/HudiMetadataParser.h
 delete mode 100644 src/Storages/DataLakes/IStorageDataLake.h
 delete mode 100644 src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
 delete mode 100644 src/Storages/DataLakes/StorageDeltaLake.h
 delete mode 100644 src/Storages/DataLakes/StorageHudi.h
 delete mode 100644 src/Storages/DataLakes/registerDataLakes.cpp
 rename src/Storages/ObjectStorage/{AzureConfiguration.cpp => AzureBlob/Configuration.cpp} (92%)
 rename src/Storages/ObjectStorage/{AzureConfiguration.h => AzureBlob/Configuration.h} (88%)
 create mode 100644 src/Storages/ObjectStorage/DataLakes/Common.cpp
 create mode 100644 src/Storages/ObjectStorage/DataLakes/Common.h
 rename src/Storages/{DataLakes/DeltaLakeMetadataParser.cpp => ObjectStorage/DataLakes/DeltaLakeMetadata.cpp} (79%)
 create mode 100644 src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
 rename src/Storages/{DataLakes/HudiMetadataParser.cpp => ObjectStorage/DataLakes/HudiMetadata.cpp} (68%)
 create mode 100644 src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
 create mode 100644 src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
 rename src/Storages/{DataLakes/Iceberg/StorageIceberg.h => ObjectStorage/DataLakes/IStorageDataLake.h} (61%)
 rename src/Storages/{DataLakes/Iceberg => ObjectStorage/DataLakes}/IcebergMetadata.cpp (96%)
 rename src/Storages/{DataLakes/Iceberg => ObjectStorage/DataLakes}/IcebergMetadata.h (76%)
 create mode 100644 src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
 create mode 100644 src/Storages/ObjectStorage/HDFS/Configuration.cpp
 create mode 100644 src/Storages/ObjectStorage/HDFS/Configuration.h
 delete mode 100644 src/Storages/ObjectStorage/HDFSConfiguration.h
 rename src/Storages/ObjectStorage/{S3Configuration.cpp => S3/Configuration.cpp} (97%)
 rename src/Storages/ObjectStorage/{S3Configuration.h => S3/Configuration.h} (81%)
 rename src/Storages/ObjectStorage/{StorageObejctStorageConfiguration.h => StorageObjectStorageConfiguration.h} (99%)
 create mode 100644 src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
 delete mode 100644 src/Storages/ObjectStorageConfiguration.h
 delete mode 100644 src/TableFunctions/TableFunctionDeltaLake.cpp
 delete mode 100644 src/TableFunctions/TableFunctionHudi.cpp
 delete mode 100644 src/TableFunctions/TableFunctionIceberg.cpp
 create mode 100644 src/TableFunctions/registerDataLakeTableFunctions.cpp

diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 99002c53769..9f1702cb3a3 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -6,7 +6,7 @@
 #include <Backups/BackupIO_Default.h>
 #include <Disks/DiskType.h>
 #include <Interpreters/Context_fwd.h>
-#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 
 
 namespace DB
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 9408c7ccdcf..c4c04bbc057 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -9,7 +9,7 @@
 #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 #include <filesystem>
 #endif
 
@@ -59,9 +59,6 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
             if (!config.has(config_prefix))
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", id_arg);
 
-            if (!config.has(config_prefix))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no `{}` in config", config_prefix);
-
             if (config.has(config_prefix + ".connection_string"))
             {
                 configuration.connection_url = config.getString(config_prefix + ".connection_string");
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 50130e6abd0..118e0131b37 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -105,6 +105,7 @@ add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhous
 
 add_headers_and_sources(dbms Disks/IO)
 add_headers_and_sources(dbms Disks/ObjectStorages)
+add_headers_and_sources(dbms Disks/ObjectStorages)
 if (TARGET ch_contrib::sqlite)
     add_headers_and_sources(dbms Databases/SQLite)
 endif()
@@ -117,9 +118,11 @@ if (TARGET ch_contrib::nats_io)
     add_headers_and_sources(dbms Storages/NATS)
 endif()
 
-add_headers_and_sources(dbms Storages/DataLakes)
-add_headers_and_sources(dbms Storages/DataLakes/Iceberg)
 add_headers_and_sources(dbms Storages/ObjectStorage)
+add_headers_and_sources(dbms Storages/ObjectStorage/AzureBlob)
+add_headers_and_sources(dbms Storages/ObjectStorage/S3)
+add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
+add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
 add_headers_and_sources(dbms Common/NamedCollections)
 
 if (TARGET ch_contrib::amqp_cpp)
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index bbbb5357505..bcc75f91e2a 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -323,10 +323,8 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
     {
         removeObjectIfExists(object);
     }
-
 }
 
-
 ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto client_ptr = client.get();
@@ -338,9 +336,9 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
     {
         result.attributes.emplace();
         for (const auto & [key, value] : properties.Metadata)
-            (*result.attributes)[key] = value;
+            result.attributes[key] = value;
     }
-    result.last_modified.emplace(static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count());
+    result.last_modified = static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count();
     return result;
 }
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index cc9ee3db505..9f9efad9615 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -404,26 +404,20 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
 {
     for (const auto & key : keys)
     {
-        auto meta = source_object_storage->getObjectMetadata(key);
-        auto object_attributes = meta.attributes;
+        auto metadata = source_object_storage->getObjectMetadata(key);
+        auto object_attributes = metadata.attributes;
 
         String path;
-        if (object_attributes.has_value())
+        /// Restore file if object has 'path' in metadata.
+        auto path_entry = object_attributes.find("path");
+        if (path_entry == object_attributes.end())
         {
-            /// Restore file if object has 'path' in metadata.
-            auto path_entry = object_attributes->find("path");
-            if (path_entry == object_attributes->end())
-            {
-                /// Such keys can remain after migration, we can skip them.
-                LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key);
-                continue;
-            }
-
-            path = path_entry->second;
-        }
-        else
+            /// Such keys can remain after migration, we can skip them.
+            LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key);
             continue;
+        }
 
+        path = path_entry->second;
         disk->createDirectories(directoryPath(path));
         auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key));
 
@@ -435,7 +429,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
             source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage);
 
         auto tx = disk->metadata_storage->createTransaction();
-        tx->addBlobToMetadata(path, object_key, meta.size_bytes);
+        tx->addBlobToMetadata(path, object_key, metadata.size_bytes);
         tx->commit();
 
         LOG_TRACE(disk->log, "Restored file {}", path);
@@ -490,7 +484,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
             if (send_metadata)
                 revision_counter = revision - 1;
 
-            auto object_attributes = *(source_object_storage->getObjectMetadata(object->relative_path).attributes);
+            auto object_attributes = source_object_storage->getObjectMetadata(object->relative_path).attributes;
             if (operation == rename)
             {
                 auto from_path = object_attributes["from_path"];
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 4955b0e6924..8a5352e71ca 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -47,8 +47,8 @@ using ObjectAttributes = std::map<std::string, std::string>;
 struct ObjectMetadata
 {
     uint64_t size_bytes = 0;
-    std::optional<Poco::Timestamp> last_modified;
-    std::optional<ObjectAttributes> attributes;
+    Poco::Timestamp last_modified;
+    ObjectAttributes attributes;
 };
 
 struct RelativePathWithMetadata
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index 62bdd0ed0c8..f441b18d59d 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -11,18 +11,26 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
+IObjectStorageIteratorAsync::IObjectStorageIteratorAsync(
+    CurrentMetrics::Metric threads_metric,
+    CurrentMetrics::Metric threads_active_metric,
+    CurrentMetrics::Metric threads_scheduled_metric,
+    const std::string & thread_name)
+    : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
+    , list_objects_scheduler(threadPoolCallbackRunner<BatchAndHasNext>(list_objects_pool, thread_name))
+{
+}
+
 void IObjectStorageIteratorAsync::nextBatch()
 {
     std::lock_guard lock(mutex);
     if (is_finished)
     {
-        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 3");
         current_batch.clear();
         current_batch_iterator = current_batch.begin();
     }
     else
     {
-        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 4");
         if (!is_initialized)
         {
             outcome_future = scheduleBatch();
@@ -30,13 +38,23 @@ void IObjectStorageIteratorAsync::nextBatch()
         }
 
         chassert(outcome_future.valid());
-        auto [batch, has_next] = outcome_future.get();
-        current_batch = std::move(batch);
+        BatchAndHasNext result;
+        try
+        {
+            result = outcome_future.get();
+        }
+        catch (...)
+        {
+            is_finished = true;
+            throw;
+        }
+
+        current_batch = std::move(result.batch);
         current_batch_iterator = current_batch.begin();
 
         accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
 
-        if (has_next)
+        if (result.has_next)
             outcome_future = scheduleBatch();
         else
             is_finished = true;
@@ -100,12 +118,10 @@ std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrent
 
     if (current_batch_iterator == current_batch.end())
     {
-        LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 2");
         return std::nullopt;
     }
 
     auto temp_current_batch = std::move(current_batch);
-    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: here 1: {}", temp_current_batch.size());
     nextBatch();
     return temp_current_batch;
 }
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index 8d155f7ec8d..86e5feb3010 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -17,11 +17,7 @@ public:
         CurrentMetrics::Metric threads_metric,
         CurrentMetrics::Metric threads_active_metric,
         CurrentMetrics::Metric threads_scheduled_metric,
-        const std::string & thread_name)
-        : list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
-        , list_objects_scheduler(threadPoolCallbackRunner<BatchAndHasNext>(list_objects_pool, thread_name))
-    {
-    }
+        const std::string & thread_name);
 
     void next() override;
     void nextBatch() override;
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index d697d90c8a6..36f5bd73ca6 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -480,7 +480,7 @@ BlockIO InterpreterSystemQuery::execute()
                 StorageURL::getSchemaCache(getContext()).clear();
 #if USE_AZURE_BLOB_STORAGE
             if (caches_to_drop.contains("AZURE"))
-                StorageAzureBlobStorage::getSchemaCache(getContext()).clear();
+                StorageAzureBlob::getSchemaCache(getContext()).clear();
 #endif
             break;
         }
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.h b/src/Storages/DataLakes/DeltaLakeMetadataParser.h
deleted file mode 100644
index 251ea3e3f15..00000000000
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#pragma once
-
-#include <Interpreters/Context_fwd.h>
-#include <Core/Types.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
-
-namespace DB
-{
-
-struct DeltaLakeMetadataParser
-{
-public:
-    DeltaLakeMetadataParser();
-
-    Strings getFiles(
-        ObjectStoragePtr object_storage,
-        StorageObjectStorageConfigurationPtr configuration,
-        ContextPtr context);
-
-private:
-    struct Impl;
-    std::shared_ptr<Impl> impl;
-};
-
-}
diff --git a/src/Storages/DataLakes/HudiMetadataParser.h b/src/Storages/DataLakes/HudiMetadataParser.h
deleted file mode 100644
index 72766a95876..00000000000
--- a/src/Storages/DataLakes/HudiMetadataParser.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-
-#include <Interpreters/Context_fwd.h>
-#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
-#include <Core/Types.h>
-
-namespace DB
-{
-
-struct HudiMetadataParser
-{
-    Strings getFiles(
-        ObjectStoragePtr object_storage,
-        StorageObjectStorageConfigurationPtr configuration, ContextPtr context);
-};
-
-}
diff --git a/src/Storages/DataLakes/IStorageDataLake.h b/src/Storages/DataLakes/IStorageDataLake.h
deleted file mode 100644
index 934bf227c42..00000000000
--- a/src/Storages/DataLakes/IStorageDataLake.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#pragma once
-
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <Storages/IStorage.h>
-#include <Common/logger_useful.h>
-#include <Storages/StorageFactory.h>
-#include <Formats/FormatFactory.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
-
-
-namespace DB
-{
-
-template <typename StorageSettings, typename Name, typename MetadataParser>
-class IStorageDataLake : public StorageObjectStorage<StorageSettings>
-{
-public:
-    static constexpr auto name = Name::name;
-
-    using Storage = StorageObjectStorage<StorageSettings>;
-    using ConfigurationPtr = Storage::ConfigurationPtr;
-
-    static StoragePtr create(
-        ConfigurationPtr base_configuration,
-        ContextPtr context,
-        const String & engine_name_,
-        const StorageID & table_id_,
-        const ColumnsDescription & columns_,
-        const ConstraintsDescription & constraints_,
-        const String & comment_,
-        std::optional<FormatSettings> format_settings_,
-        bool /* attach */)
-    {
-        auto object_storage = base_configuration->createOrUpdateObjectStorage(context);
-
-        auto configuration = base_configuration->clone();
-        configuration->getPaths() = MetadataParser().getFiles(object_storage, configuration, context);
-
-        return std::make_shared<IStorageDataLake<StorageSettings, Name, MetadataParser>>(
-            base_configuration, configuration, object_storage, engine_name_, context,
-            table_id_, columns_, constraints_, comment_, format_settings_);
-    }
-
-    String getName() const override { return name; }
-
-    static ColumnsDescription getTableStructureFromData(
-        ObjectStoragePtr object_storage_,
-        ConfigurationPtr base_configuration,
-        const std::optional<FormatSettings> &,
-        ContextPtr local_context)
-    {
-        auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context);
-        return ColumnsDescription(metadata->getTableSchema());
-    }
-
-    std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context) override
-    {
-        std::lock_guard lock(Storage::configuration_update_mutex);
-
-        auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context);
-        bool updated = new_object_storage != nullptr;
-        if (updated)
-            Storage::object_storage = new_object_storage;
-
-        auto new_keys = MetadataParser().getFiles(Storage::object_storage, base_configuration, local_context);
-
-        if (updated || new_keys != Storage::configuration->getPaths())
-        {
-            auto updated_configuration = base_configuration->clone();
-            /// If metadata wasn't changed, we won't list data files again.
-            updated_configuration->getPaths() = new_keys;
-            Storage::configuration = updated_configuration;
-        }
-        return {Storage::configuration, Storage::object_storage};
-    }
-
-    template <typename... Args>
-    explicit IStorageDataLake(
-        ConfigurationPtr base_configuration_,
-        Args &&... args)
-        : Storage(std::forward<Args>(args)...)
-        , base_configuration(base_configuration_)
-    {
-    }
-
-private:
-    ConfigurationPtr base_configuration;
-    LoggerPtr log;
-};
-
-
-}
-
-#endif
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp b/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
deleted file mode 100644
index ad1a27c312b..00000000000
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
-
-#if USE_AWS_S3 && USE_AVRO
-
-namespace DB
-{
-
-
-}
-
-#endif
diff --git a/src/Storages/DataLakes/StorageDeltaLake.h b/src/Storages/DataLakes/StorageDeltaLake.h
deleted file mode 100644
index 07c2205d2df..00000000000
--- a/src/Storages/DataLakes/StorageDeltaLake.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <Storages/IStorage.h>
-#include <Storages/DataLakes/IStorageDataLake.h>
-#include <Storages/DataLakes/DeltaLakeMetadataParser.h>
-#include "config.h"
-
-namespace DB
-{
-
-struct StorageDeltaLakeName
-{
-    static constexpr auto name = "DeltaLake";
-};
-
-#if USE_AWS_S3 && USE_PARQUET
-using StorageDeltaLakeS3 = IStorageDataLake<S3StorageSettings, StorageDeltaLakeName, DeltaLakeMetadataParser>;
-#endif
-
-}
diff --git a/src/Storages/DataLakes/StorageHudi.h b/src/Storages/DataLakes/StorageHudi.h
deleted file mode 100644
index 3fd52c82d32..00000000000
--- a/src/Storages/DataLakes/StorageHudi.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#pragma once
-
-#include <Storages/IStorage.h>
-#include <Storages/DataLakes/IStorageDataLake.h>
-#include <Storages/DataLakes/HudiMetadataParser.h>
-#include "config.h"
-
-namespace DB
-{
-
-struct StorageHudiName
-{
-    static constexpr auto name = "Hudi";
-};
-
-#if USE_AWS_S3
-using StorageHudiS3 = IStorageDataLake<S3StorageSettings, StorageHudiName, HudiMetadataParser>;
-#endif
-
-}
diff --git a/src/Storages/DataLakes/registerDataLakes.cpp b/src/Storages/DataLakes/registerDataLakes.cpp
deleted file mode 100644
index 2647fbce39d..00000000000
--- a/src/Storages/DataLakes/registerDataLakes.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <Storages/DataLakes/IStorageDataLake.h>
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <Storages/DataLakes/StorageDeltaLake.h>
-#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
-#include <Storages/DataLakes/StorageHudi.h>
-#include <Storages/DataLakes/DeltaLakeMetadataParser.h>
-
-
-namespace DB
-{
-
-#if USE_PARQUET
-void registerStorageDeltaLake(StorageFactory & )
-{
-    // factory.registerStorage(
-    //     StorageDeltaLakeName::name,
-    //     [&](const StorageFactory::Arguments & args)
-    //     {
-    //         auto configuration = std::make_shared<StorageS3Configuration>();
-    //         return IStorageDataLake<StorageS3Settings, StorageDeltaLakeName, DeltaLakeMetadataParser>::create(
-    //             configuration, args.getContext(), "deltaLake", args.table_id, args.columns,
-    //             args.constraints, args.comment, std::nullopt, args.attach);
-    //     },
-    //     {
-    //         .supports_settings = false,
-    //         .supports_schema_inference = true,
-    //         .source_access_type = AccessType::S3,
-    //     });
-}
-#endif
-
-#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
-
-void registerStorageIceberg(StorageFactory &)
-{
-    // REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name)
-}
-
-#endif
-
-void registerStorageHudi(StorageFactory &)
-{
-}
-
-}
-
-#endif
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
similarity index 92%
rename from src/Storages/ObjectStorage/AzureConfiguration.cpp
rename to src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 04f6f26111b..109918dfc8b 100644
--- a/src/Storages/ObjectStorage/AzureConfiguration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -1,4 +1,7 @@
-#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+
+#if USE_AZURE_BLOB_STORAGE
+
 #include <azure/storage/common/storage_credential.hpp>
 #include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
 #include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
@@ -44,21 +47,19 @@ namespace
         return !candidate.starts_with("http");
     }
 
-    bool containerExists(std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient> & blob_service_client, std::string container_name)
+    bool containerExists(Azure::Storage::Blobs::BlobServiceClient & blob_service_client, std::string container_name)
     {
         Azure::Storage::Blobs::ListBlobContainersOptions options;
         options.Prefix = container_name;
         options.PageSizeHint = 1;
 
-        auto containers_list_response = blob_service_client->ListBlobContainers(options);
+        auto containers_list_response = blob_service_client.ListBlobContainers(options);
         auto containers_list = containers_list_response.BlobContainers;
 
-        for (const auto & container : containers_list)
-        {
-            if (container_name == container.Name)
-                return true;
-        }
-        return false;
+        auto it = std::find_if(
+            containers_list.begin(), containers_list.end(),
+            [&](const auto & c) { return c.Name == container_name; });
+        return it != containers_list.end();
     }
 }
 
@@ -76,19 +77,6 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check);
 }
 
-StorageObjectStorageConfigurationPtr StorageAzureBlobConfiguration::clone()
-{
-    auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
-    configuration->connection_url = connection_url;
-    configuration->is_connection_string = is_connection_string;
-    configuration->account_name = account_name;
-    configuration->account_key = account_key;
-    configuration->container = container;
-    configuration->blob_path = blob_path;
-    configuration->blobs_paths = blobs_paths;
-    return configuration;
-}
-
 StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other)
 {
     connection_url = other.connection_url;
@@ -98,6 +86,10 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB
     container = other.container;
     blob_path = other.blob_path;
     blobs_paths = other.blobs_paths;
+
+    format = other.format;
+    compression_method = other.compression_method;
+    structure = other.structure;
 }
 
 AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context)
@@ -127,7 +119,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
     {
         auto blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
         result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(connection_url, container));
-        bool container_exists = containerExists(blob_service_client, container);
+        bool container_exists = containerExists(*blob_service_client, container);
 
         if (!container_exists)
         {
@@ -140,10 +132,11 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
             try
             {
                 result->CreateIfNotExists();
-            } catch (const Azure::Storage::StorageException & e)
+            }
+            catch (const Azure::Storage::StorageException & e)
             {
-                if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
-                    && e.ReasonPhrase == "The specified container already exists."))
+                if (e.StatusCode != Azure::Core::Http::HttpStatusCode::Conflict
+                    || e.ReasonPhrase != "The specified container already exists.")
                 {
                     throw;
                 }
@@ -169,7 +162,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
             blob_service_client = std::make_unique<BlobServiceClient>(connection_url);
         }
 
-        bool container_exists = containerExists(blob_service_client, container);
+        bool container_exists = containerExists(*blob_service_client, container);
 
         std::string final_url;
         size_t pos = connection_url.find('?');
@@ -460,3 +453,5 @@ void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String
 }
 
 }
+
+#endif
diff --git a/src/Storages/ObjectStorage/AzureConfiguration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
similarity index 88%
rename from src/Storages/ObjectStorage/AzureConfiguration.h
rename to src/Storages/ObjectStorage/AzureBlob/Configuration.h
index 4f285128241..deeb365d012 100644
--- a/src/Storages/ObjectStorage/AzureConfiguration.h
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
@@ -1,6 +1,11 @@
 #pragma once
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 
 namespace DB
 {
@@ -26,8 +31,8 @@ public:
     String getNamespace() const override { return container; }
 
     void check(ContextPtr context) const override;
-    StorageObjectStorageConfigurationPtr clone() override;
     ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageAzureBlobConfiguration>(*this); }
 
     void fromNamedCollection(const NamedCollection & collection) override;
     void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
@@ -52,3 +57,5 @@ protected:
 };
 
 }
+
+#endif
diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp
new file mode 100644
index 00000000000..5f0138078d4
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp
@@ -0,0 +1,28 @@
+#include "Common.h"
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+std::vector<String> listFiles(
+    const IObjectStorage & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const String & prefix, const String & suffix)
+{
+    auto key = std::filesystem::path(configuration.getPath()) / prefix;
+    RelativePathsWithMetadata files_with_metadata;
+    object_storage.listObjects(key, files_with_metadata, 0);
+    Strings res;
+    for (const auto & file_with_metadata : files_with_metadata)
+    {
+        const auto & filename = file_with_metadata->relative_path;
+        if (filename.ends_with(suffix))
+            res.push_back(filename);
+    }
+    LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files", res.size());
+    return res;
+}
+
+}
diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h
new file mode 100644
index 00000000000..ae3767f2eec
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/Common.h
@@ -0,0 +1,15 @@
+#pragma once
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class IObjectStorage;
+class StorageObjectStorageConfiguration;
+
+std::vector<String> listFiles(
+    const IObjectStorage & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const String & prefix, const String & suffix);
+
+}
diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
similarity index 79%
rename from src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
rename to src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 55ff8fefdd5..903558b73ab 100644
--- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -1,4 +1,4 @@
-#include <Storages/DataLakes/DeltaLakeMetadataParser.h>
+#include <Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h>
 #include <base/JSON.h>
 #include "config.h"
 #include <set>
@@ -15,8 +15,7 @@
 #include <IO/ReadHelpers.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <parquet/arrow/reader.h>
-
-namespace fs = std::filesystem;
+#include <Storages/ObjectStorage/DataLakes/Common.h>
 
 namespace DB
 {
@@ -27,12 +26,23 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-struct DeltaLakeMetadataParser::Impl
+struct DeltaLakeMetadata::Impl final : private WithContext
 {
+    ObjectStoragePtr object_storage;
+    ConfigurationPtr configuration;
+
     /**
      * Useful links:
      *  - https://github.com/delta-io/delta/blob/master/PROTOCOL.md#data-files
      */
+     Impl(ObjectStoragePtr object_storage_,
+          ConfigurationPtr configuration_,
+          ContextPtr context_)
+        : WithContext(context_)
+        , object_storage(object_storage_)
+        , configuration(configuration_)
+    {
+    }
 
     /**
      * DeltaLake tables store metadata files and data files.
@@ -62,13 +72,10 @@ struct DeltaLakeMetadataParser::Impl
      * An action changes one aspect of the table's state, for example, adding or removing a file.
      * Note: it is not a valid json, but a list of json's, so we read it in a while cycle.
      */
-    std::set<String> processMetadataFiles(
-        ObjectStoragePtr object_storage,
-        const StorageObjectStorageConfiguration & configuration,
-        ContextPtr context)
+    std::set<String> processMetadataFiles()
     {
         std::set<String> result_files;
-        const auto checkpoint_version = getCheckpointIfExists(result_files, object_storage, configuration, context);
+        const auto checkpoint_version = getCheckpointIfExists(result_files);
 
         if (checkpoint_version)
         {
@@ -76,12 +83,12 @@ struct DeltaLakeMetadataParser::Impl
             while (true)
             {
                 const auto filename = withPadding(++current_version) + metadata_file_suffix;
-                const auto file_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / filename;
+                const auto file_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / filename;
 
                 if (!object_storage->exists(StoredObject(file_path)))
                     break;
 
-                processMetadataFile(file_path, result_files, object_storage, configuration, context);
+                processMetadataFile(file_path, result_files);
             }
 
             LOG_TRACE(
@@ -90,33 +97,14 @@ struct DeltaLakeMetadataParser::Impl
         }
         else
         {
-            const auto keys = listFiles(object_storage, configuration, deltalake_metadata_directory, metadata_file_suffix);
+            const auto keys = listFiles(*object_storage, *configuration, deltalake_metadata_directory, metadata_file_suffix);
             for (const String & key : keys)
-                processMetadataFile(key, result_files, object_storage, configuration, context);
+                processMetadataFile(key, result_files);
         }
 
         return result_files;
     }
 
-    std::vector<String> listFiles(
-        const ObjectStoragePtr & object_storage,
-        const StorageObjectStorageConfiguration & configuration,
-        const String & prefix, const String & suffix)
-    {
-        auto key = std::filesystem::path(configuration.getPath()) / prefix;
-        RelativePathsWithMetadata files_with_metadata;
-        object_storage->listObjects(key, files_with_metadata, 0);
-        Strings res;
-        for (const auto & file_with_metadata : files_with_metadata)
-        {
-            const auto & filename = file_with_metadata->relative_path;
-            if (filename.ends_with(suffix))
-                res.push_back(filename);
-        }
-        LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
-        return res;
-    }
-
     /**
      * Example of content of a single .json metadata file:
      * "
@@ -146,14 +134,9 @@ struct DeltaLakeMetadataParser::Impl
      *             \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}}
      * "
      */
-    void processMetadataFile(
-        const String & key,
-        std::set<String> & result,
-        ObjectStoragePtr object_storage,
-        const StorageObjectStorageConfiguration & configuration,
-        ContextPtr context)
+    void processMetadataFile(const String & key, std::set<String> & result)
     {
-        auto read_settings = context->getReadSettings();
+        auto read_settings = getContext()->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(key), read_settings);
 
         char c;
@@ -176,12 +159,12 @@ struct DeltaLakeMetadataParser::Impl
             if (json.has("add"))
             {
                 const auto path = json["add"]["path"].getString();
-                result.insert(fs::path(configuration.getPath()) / path);
+                result.insert(fs::path(configuration->getPath()) / path);
             }
             else if (json.has("remove"))
             {
                 const auto path = json["remove"]["path"].getString();
-                result.erase(fs::path(configuration.getPath()) / path);
+                result.erase(fs::path(configuration->getPath()) / path);
             }
         }
     }
@@ -199,17 +182,14 @@ struct DeltaLakeMetadataParser::Impl
      *
      *  We need to get "version", which is the version of the checkpoint we need to read.
      */
-    size_t readLastCheckpointIfExists(
-        ObjectStoragePtr object_storage,
-        const StorageObjectStorageConfiguration & configuration,
-        ContextPtr context) const
+    size_t readLastCheckpointIfExists()
     {
-        const auto last_checkpoint_file = fs::path(configuration.getPath()) / deltalake_metadata_directory / "_last_checkpoint";
+        const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint";
         if (!object_storage->exists(StoredObject(last_checkpoint_file)))
             return 0;
 
         String json_str;
-        auto read_settings = context->getReadSettings();
+        auto read_settings = getContext()->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings);
         readJSONObjectPossiblyInvalid(json_str, *buf);
 
@@ -260,21 +240,18 @@ struct DeltaLakeMetadataParser::Impl
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \
         } while (false)
 
-    size_t getCheckpointIfExists(
-        std::set<String> & result,
-        ObjectStoragePtr object_storage,
-        const StorageObjectStorageConfiguration & configuration,
-        ContextPtr context)
+    size_t getCheckpointIfExists(std::set<String> & result)
     {
-        const auto version = readLastCheckpointIfExists(object_storage, configuration, context);
+        const auto version = readLastCheckpointIfExists();
         if (!version)
             return 0;
 
         const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet";
-        const auto checkpoint_path = fs::path(configuration.getPath()) / deltalake_metadata_directory / checkpoint_filename;
+        const auto checkpoint_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename;
 
         LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
 
+        auto context = getContext();
         auto read_settings = context->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings);
         auto format_settings = getFormatSettings(context);
@@ -334,7 +311,7 @@ struct DeltaLakeMetadataParser::Impl
             if (filename.empty())
                 continue;
             LOG_TEST(log, "Adding {}", filename);
-            const auto [_, inserted] = result.insert(fs::path(configuration.getPath()) / filename);
+            const auto [_, inserted] = result.insert(fs::path(configuration->getPath()) / filename);
             if (!inserted)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename);
         }
@@ -345,15 +322,22 @@ struct DeltaLakeMetadataParser::Impl
     LoggerPtr log = getLogger("DeltaLakeMetadataParser");
 };
 
-DeltaLakeMetadataParser::DeltaLakeMetadataParser() : impl(std::make_unique<Impl>()) {}
-
-Strings DeltaLakeMetadataParser::getFiles(
-        ObjectStoragePtr object_storage,
-        StorageObjectStorageConfigurationPtr configuration,
-        ContextPtr context)
+DeltaLakeMetadata::DeltaLakeMetadata(
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    ContextPtr context_)
+    : impl(std::make_unique<Impl>(object_storage_, configuration_, context_))
 {
-    auto result = impl->processMetadataFiles(object_storage, *configuration, context);
-    return Strings(result.begin(), result.end());
+}
+
+Strings DeltaLakeMetadata::getDataFiles() const
+{
+    if (!data_files.empty())
+        return data_files;
+
+    auto result = impl->processMetadataFiles();
+    data_files = Strings(result.begin(), result.end());
+    return data_files;
 }
 
 }
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
new file mode 100644
index 00000000000..1a5bb85586a
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <Interpreters/Context_fwd.h>
+#include <Core/Types.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+
+namespace DB
+{
+
+class DeltaLakeMetadata final : public IDataLakeMetadata, private WithContext
+{
+public:
+    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+
+    static constexpr auto name = "DeltaLake";
+
+    DeltaLakeMetadata(
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr configuration_,
+        ContextPtr context_);
+
+    Strings getDataFiles() const override;
+
+    NamesAndTypesList getTableSchema() const override { return {}; }
+
+    bool operator ==(const IDataLakeMetadata & other) const override
+    {
+        const auto * deltalake_metadata = dynamic_cast<const DeltaLakeMetadata *>(&other);
+        return deltalake_metadata && getDataFiles() == deltalake_metadata->getDataFiles();
+    }
+
+    static DataLakeMetadataPtr create(
+        ObjectStoragePtr object_storage,
+        ConfigurationPtr configuration,
+        ContextPtr local_context)
+    {
+        return std::make_unique<DeltaLakeMetadata>(object_storage, configuration, local_context);
+    }
+
+private:
+    struct Impl;
+    const std::shared_ptr<Impl> impl;
+    mutable Strings data_files;
+};
+
+}
diff --git a/src/Storages/DataLakes/HudiMetadataParser.cpp b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp
similarity index 68%
rename from src/Storages/DataLakes/HudiMetadataParser.cpp
rename to src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp
index 8571c035b32..91a586ccbf9 100644
--- a/src/Storages/DataLakes/HudiMetadataParser.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.cpp
@@ -1,4 +1,5 @@
-#include <Storages/DataLakes/HudiMetadataParser.h>
+#include <Storages/ObjectStorage/DataLakes/HudiMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/Common.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/logger_useful.h>
 #include <base/find_symbols.h>
@@ -40,33 +41,10 @@ namespace ErrorCodes
     *    hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group.
     *    Each file group is identified by File Id.
     */
-std::vector<String> listFiles(
-    const ObjectStoragePtr & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
-    const String & prefix, const String & suffix)
+Strings HudiMetadata::getDataFilesImpl() const
 {
-    auto key = std::filesystem::path(configuration.getPath()) / prefix;
-    RelativePathsWithMetadata files_with_metadata;
-    object_storage->listObjects(key, files_with_metadata, 0);
-    Strings res;
-    for (const auto & file_with_metadata : files_with_metadata)
-    {
-        const auto & filename = file_with_metadata->relative_path;
-        if (filename.ends_with(suffix))
-            res.push_back(filename);
-    }
-    LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
-    return res;
-}
-
-Strings HudiMetadataParser::getFiles(
-    ObjectStoragePtr object_storage,
-    StorageObjectStorageConfigurationPtr configuration,
-    ContextPtr)
-{
-    auto log = getLogger("HudiMetadataParser");
-
-    const auto keys = listFiles(object_storage, *configuration, "", Poco::toLower(configuration->format));
+    auto log = getLogger("HudiMetadata");
+    const auto keys = listFiles(*object_storage, *configuration, "", Poco::toLower(configuration->format));
 
     using Partition = std::string;
     using FileID = std::string;
@@ -75,7 +53,7 @@ Strings HudiMetadataParser::getFiles(
         String key;
         UInt64 timestamp = 0;
     };
-    std::unordered_map<Partition, std::unordered_map<FileID, FileInfo>> data_files;
+    std::unordered_map<Partition, std::unordered_map<FileID, FileInfo>> files;
 
     for (const auto & key : keys)
     {
@@ -90,7 +68,7 @@ Strings HudiMetadataParser::getFiles(
         const auto & file_id = file_parts[0];
         const auto timestamp = parse<UInt64>(file_parts[2]);
 
-        auto & file_info = data_files[partition][file_id];
+        auto & file_info = files[partition][file_id];
         if (file_info.timestamp == 0 || file_info.timestamp < timestamp)
         {
             file_info.key = key;
@@ -99,7 +77,7 @@ Strings HudiMetadataParser::getFiles(
     }
 
     Strings result;
-    for (auto & [partition, partition_data] : data_files)
+    for (auto & [partition, partition_data] : files)
     {
         LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size());
         for (auto & [file_id, file_data] : partition_data)
@@ -108,4 +86,21 @@ Strings HudiMetadataParser::getFiles(
     return result;
 }
 
+HudiMetadata::HudiMetadata(
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    ContextPtr context_)
+    : WithContext(context_)
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+{
+}
+
+Strings HudiMetadata::getDataFiles() const
+{
+    if (data_files.empty())
+        data_files = getDataFilesImpl();
+    return data_files;
+}
+
 }
diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
new file mode 100644
index 00000000000..ee8b1ea4978
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include <Interpreters/Context_fwd.h>
+#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class HudiMetadata final : public IDataLakeMetadata, private WithContext
+{
+public:
+    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+
+    static constexpr auto name = "Hudi";
+
+    HudiMetadata(
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr configuration_,
+        ContextPtr context_);
+
+    Strings getDataFiles() const override;
+
+    NamesAndTypesList getTableSchema() const override { return {}; }
+
+    bool operator ==(const IDataLakeMetadata & other) const override
+    {
+        const auto * hudi_metadata = dynamic_cast<const HudiMetadata *>(&other);
+        return hudi_metadata && getDataFiles() == hudi_metadata->getDataFiles();
+    }
+
+    static DataLakeMetadataPtr create(
+        ObjectStoragePtr object_storage,
+        ConfigurationPtr configuration,
+        ContextPtr local_context)
+    {
+        return std::make_unique<HudiMetadata>(object_storage, configuration, local_context);
+    }
+
+private:
+    const ObjectStoragePtr object_storage;
+    const ConfigurationPtr configuration;
+    mutable Strings data_files;
+
+    Strings getDataFilesImpl() const;
+};
+
+}
diff --git a/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
new file mode 100644
index 00000000000..a2bd5adb947
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h
@@ -0,0 +1,19 @@
+#pragma once
+#include <boost/noncopyable.hpp>
+#include <Core/Types.h>
+#include <Core/NamesAndTypes.h>
+
+namespace DB
+{
+
+class IDataLakeMetadata : boost::noncopyable
+{
+public:
+    virtual ~IDataLakeMetadata() = default;
+    virtual Strings getDataFiles() const = 0;
+    virtual NamesAndTypesList getTableSchema() const = 0;
+    virtual bool operator==(const IDataLakeMetadata & other) const = 0;
+};
+using DataLakeMetadataPtr = std::unique_ptr<IDataLakeMetadata>;
+
+}
diff --git a/src/Storages/DataLakes/Iceberg/StorageIceberg.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
similarity index 61%
rename from src/Storages/DataLakes/Iceberg/StorageIceberg.h
rename to src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index bca6e3c868f..95196cdd000 100644
--- a/src/Storages/DataLakes/Iceberg/StorageIceberg.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -5,11 +5,13 @@
 #if USE_AWS_S3 && USE_AVRO
 
 #include <Formats/FormatFactory.h>
-#include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
 #include <Storages/IStorage.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/HudiMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h>
 #include <Common/logger_useful.h>
 
 
@@ -19,13 +21,10 @@ namespace DB
 /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/)
 /// Right now it's implemented on top of StorageS3 and right now it doesn't support
 /// many Iceberg features like schema evolution, partitioning, positional and equality deletes.
-/// TODO: Implement Iceberg as a separate storage using IObjectStorage
-/// (to support all object storages, not only S3) and add support for missing Iceberg features.
-template <typename StorageSettings>
-class StorageIceberg : public StorageObjectStorage<StorageSettings>
+template <typename DataLakeMetadata, typename StorageSettings>
+class IStorageDataLake final : public StorageObjectStorage<StorageSettings>
 {
 public:
-    static constexpr auto name = "Iceberg";
     using Storage = StorageObjectStorage<StorageSettings>;
     using ConfigurationPtr = Storage::ConfigurationPtr;
 
@@ -41,12 +40,14 @@ public:
         bool attach)
     {
         auto object_storage = base_configuration->createOrUpdateObjectStorage(context);
-        std::unique_ptr<IcebergMetadata> metadata;
+        DataLakeMetadataPtr metadata;
         NamesAndTypesList schema_from_metadata;
+        ConfigurationPtr configuration = base_configuration->clone();
         try
         {
-            metadata = parseIcebergMetadata(object_storage, base_configuration, context);
+            metadata = DataLakeMetadata::create(object_storage, base_configuration, context);
             schema_from_metadata = metadata->getTableSchema();
+            configuration->getPaths() = metadata->getDataFiles();
         }
         catch (...)
         {
@@ -55,17 +56,14 @@ public:
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
-        auto configuration = base_configuration->clone();
-        configuration->getPaths() = metadata->getDataFiles();
-
-        return std::make_shared<StorageIceberg<StorageSettings>>(
+        return std::make_shared<IStorageDataLake<DataLakeMetadata, StorageSettings>>(
             base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context,
             table_id_,
             columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
             constraints_, comment_, format_settings_);
     }
 
-    String getName() const override { return name; }
+    String getName() const override { return DataLakeMetadata::name; }
 
     static ColumnsDescription getTableStructureFromData(
         ObjectStoragePtr object_storage_,
@@ -73,7 +71,7 @@ public:
         const std::optional<FormatSettings> &,
         ContextPtr local_context)
     {
-        auto metadata = parseIcebergMetadata(object_storage_, base_configuration, local_context);
+        auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context);
         return ColumnsDescription(metadata->getTableSchema());
     }
 
@@ -86,24 +84,25 @@ public:
         if (updated)
             Storage::object_storage = new_object_storage;
 
-        auto new_metadata = parseIcebergMetadata(Storage::object_storage, base_configuration, local_context);
+        auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context);
 
-        if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion())
+        if (!current_metadata || !(*current_metadata == *new_metadata))
             current_metadata = std::move(new_metadata);
-        else if (updated)
-        {
-            auto updated_configuration = base_configuration->clone();
-            /// If metadata wasn't changed, we won't list data files again.
-            updated_configuration->getPaths() = current_metadata->getDataFiles();
-            Storage::configuration = updated_configuration;
-        }
+        else if (!updated)
+            return {Storage::configuration, Storage::object_storage};
+
+        auto updated_configuration = base_configuration->clone();
+        /// If metadata wasn't changed, we won't list data files again.
+        updated_configuration->getPaths() = current_metadata->getDataFiles();
+        Storage::configuration = updated_configuration;
+
         return {Storage::configuration, Storage::object_storage};
     }
 
     template <typename... Args>
-    StorageIceberg(
+    IStorageDataLake(
         ConfigurationPtr base_configuration_,
-        std::unique_ptr<IcebergMetadata> metadata_,
+        DataLakeMetadataPtr metadata_,
         Args &&... args)
         : Storage(std::forward<Args>(args)...)
         , base_configuration(base_configuration_)
@@ -113,8 +112,13 @@ public:
 
 private:
     ConfigurationPtr base_configuration;
-    std::unique_ptr<IcebergMetadata> current_metadata;
+    DataLakeMetadataPtr current_metadata;
 };
+
+using StorageIceberg = IStorageDataLake<IcebergMetadata, S3StorageSettings>;
+using StorageDeltaLake = IStorageDataLake<DeltaLakeMetadata, S3StorageSettings>;
+using StorageHudi = IStorageDataLake<HudiMetadata, S3StorageSettings>;
+
 }
 
 #endif
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
similarity index 96%
rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
index 5543e60e7a7..8ee6f002ca6 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
@@ -24,7 +24,8 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
-#include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/Common.h>
 
 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
@@ -332,25 +333,6 @@ MutableColumns parseAvro(
     return columns;
 }
 
-std::vector<String> listFiles(
-    const ObjectStoragePtr & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
-    const String & prefix, const String & suffix)
-{
-    auto key = std::filesystem::path(configuration.getPath()) / prefix;
-    RelativePathsWithMetadata files_with_metadata;
-    object_storage->listObjects(key, files_with_metadata, 0);
-    Strings res;
-    for (const auto & file_with_metadata : files_with_metadata)
-    {
-        const auto & filename = file_with_metadata->relative_path;
-        if (filename.ends_with(suffix))
-            res.push_back(filename);
-    }
-    LOG_TRACE(getLogger("DataLakeMetadataReadHelper"), "Listed {} files", res.size());
-    return res;
-}
-
 /**
  * Each version of table metadata is stored in a `metadata` directory and
  * has one of 2 formats:
@@ -361,7 +343,7 @@ std::pair<Int32, String> getMetadataFileAndVersion(
     ObjectStoragePtr object_storage,
     const StorageObjectStorageConfiguration & configuration)
 {
-    const auto metadata_files = listFiles(object_storage, configuration, "metadata", ".metadata.json");
+    const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json");
     if (metadata_files.empty())
     {
         throw Exception(
@@ -394,14 +376,14 @@ std::pair<Int32, String> getMetadataFileAndVersion(
 
 }
 
-std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
+DataLakeMetadataPtr IcebergMetadata::create(
     ObjectStoragePtr object_storage,
     StorageObjectStorageConfigurationPtr configuration,
-    ContextPtr context_)
+    ContextPtr local_context)
 {
     const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration);
     LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path);
-    auto read_settings = context_->getReadSettings();
+    auto read_settings = local_context->getReadSettings();
     auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
     String json_str;
     readJSONObjectPossiblyInvalid(json_str, *buf);
@@ -411,7 +393,7 @@ std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
     Poco::JSON::Object::Ptr object = json.extract<Poco::JSON::Object::Ptr>();
 
     auto format_version = object->getValue<int>("format-version");
-    auto [schema, schema_id] = parseTableSchema(object, format_version, context_->getSettingsRef().iceberg_engine_ignore_schema_evolution);
+    auto [schema, schema_id] = parseTableSchema(object, format_version, local_context->getSettingsRef().iceberg_engine_ignore_schema_evolution);
 
     auto current_snapshot_id = object->getValue<Int64>("current-snapshot-id");
     auto snapshots = object->get("snapshots").extract<Poco::JSON::Array::Ptr>();
@@ -428,7 +410,7 @@ std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
         }
     }
 
-    return std::make_unique<IcebergMetadata>(object_storage, configuration, context_, metadata_version, format_version, manifest_list_file, schema_id, schema);
+    return std::make_unique<IcebergMetadata>(object_storage, configuration, local_context, metadata_version, format_version, manifest_list_file, schema_id, schema);
 }
 
 /**
@@ -456,7 +438,7 @@ std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
  * │      1 │ 2252246380142525104 │ ('/iceberg_data/db/table_name/data/a=2/00000-1-c9535a00-2f4f-405c-bcfa-6d4f9f477235-00003.parquet','PARQUET',(2),1,631,67108864,[(1,46),(2,48)],[(1,1),(2,1)],[(1,0),(2,0)],[],[(1,'\0\0\0\0\0\0\0'),(2,'3')],[(1,'\0\0\0\0\0\0\0'),(2,'3')],NULL,[4],0) │
  * └────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
  */
-Strings IcebergMetadata::getDataFiles()
+Strings IcebergMetadata::getDataFiles() const
 {
     if (!data_files.empty())
         return data_files;
diff --git a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
similarity index 76%
rename from src/Storages/DataLakes/Iceberg/IcebergMetadata.h
rename to src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
index a289715848f..f88e3eecc67 100644
--- a/src/Storages/DataLakes/Iceberg/IcebergMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
@@ -5,7 +5,8 @@
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
 
 namespace DB
 {
@@ -57,12 +58,16 @@ namespace DB
  *     "metadata-log" : [ ]
  * }
  */
-class IcebergMetadata : WithContext
+class IcebergMetadata : public IDataLakeMetadata, private WithContext
 {
 public:
+    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+
+    static constexpr auto name = "Iceberg";
+
     IcebergMetadata(
         ObjectStoragePtr object_storage_,
-        StorageObjectStorageConfigurationPtr configuration_,
+        ConfigurationPtr configuration_,
         ContextPtr context_,
         Int32 metadata_version_,
         Int32 format_version_,
@@ -72,31 +77,36 @@ public:
 
     /// Get data files. On first request it reads manifest_list file and iterates through manifest files to find all data files.
     /// All subsequent calls will return saved list of files (because it cannot be changed without changing metadata file)
-    Strings getDataFiles();
+    Strings getDataFiles() const override;
 
     /// Get table schema parsed from metadata.
-    NamesAndTypesList getTableSchema() const { return schema; }
+    NamesAndTypesList getTableSchema() const override { return schema; }
 
-    size_t getVersion() const { return metadata_version; }
+    bool operator ==(const IDataLakeMetadata & other) const override
+    {
+        const auto * iceberg_metadata = dynamic_cast<const IcebergMetadata *>(&other);
+        return iceberg_metadata && getVersion() == iceberg_metadata->getVersion();
+    }
+
+    static DataLakeMetadataPtr create(
+        ObjectStoragePtr object_storage,
+        ConfigurationPtr configuration,
+        ContextPtr local_context);
 
 private:
-    ObjectStoragePtr object_storage;
-    StorageObjectStorageConfigurationPtr configuration;
+    size_t getVersion() const { return metadata_version; }
+
+    const ObjectStoragePtr object_storage;
+    const ConfigurationPtr configuration;
     Int32 metadata_version;
     Int32 format_version;
     String manifest_list_file;
     Int32 current_schema_id;
     NamesAndTypesList schema;
-    Strings data_files;
+    mutable Strings data_files;
     LoggerPtr log;
-
 };
 
-std::unique_ptr<IcebergMetadata> parseIcebergMetadata(
-    ObjectStoragePtr object_storage,
-    StorageObjectStorageConfigurationPtr configuration,
-    ContextPtr context);
-
 }
 
 #endif
diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
new file mode 100644
index 00000000000..d93c14dfe32
--- /dev/null
+++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
@@ -0,0 +1,83 @@
+#include "config.h"
+
+#if USE_AWS_S3
+
+#include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
+#include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
+#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
+
+
+namespace DB
+{
+
+#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
+
+void registerStorageIceberg(StorageFactory & factory)
+{
+    factory.registerStorage(
+        "Iceberg",
+        [&](const StorageFactory::Arguments & args)
+        {
+            auto configuration = std::make_shared<StorageS3Configuration>();
+            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+
+            return StorageIceberg::create(
+                configuration, args.getContext(), "Iceberg", args.table_id, args.columns,
+                args.constraints, args.comment, std::nullopt, args.attach);
+        },
+        {
+            .supports_settings = false,
+            .supports_schema_inference = true,
+            .source_access_type = AccessType::S3,
+        });
+}
+
+#endif
+
+#if USE_PARQUET
+void registerStorageDeltaLake(StorageFactory & factory)
+{
+    factory.registerStorage(
+        "DeltaLake",
+        [&](const StorageFactory::Arguments & args)
+        {
+            auto configuration = std::make_shared<StorageS3Configuration>();
+            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+
+            return StorageDeltaLake::create(
+                configuration, args.getContext(), "DeltaLake", args.table_id, args.columns,
+                args.constraints, args.comment, std::nullopt, args.attach);
+        },
+        {
+            .supports_settings = false,
+            .supports_schema_inference = true,
+            .source_access_type = AccessType::S3,
+        });
+}
+#endif
+
+void registerStorageHudi(StorageFactory & factory)
+{
+    factory.registerStorage(
+        "Hudi",
+        [&](const StorageFactory::Arguments & args)
+        {
+            auto configuration = std::make_shared<StorageS3Configuration>();
+            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+
+            return StorageHudi::create(
+                configuration, args.getContext(), "Hudi", args.table_id, args.columns,
+                args.constraints, args.comment, std::nullopt, args.attach);
+        },
+        {
+            .supports_settings = false,
+            .supports_schema_inference = true,
+            .source_access_type = AccessType::S3,
+        });
+}
+
+}
+
+#endif
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
new file mode 100644
index 00000000000..c80237b3055
--- /dev/null
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -0,0 +1,57 @@
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+
+#if USE_HDFS
+#include <Storages/HDFS/HDFSCommon.h>
+#include <Interpreters/Context.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Parsers/IAST.h>
+#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
+#include <Formats/FormatFactory.h>
+
+namespace DB
+{
+
+StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)
+{
+    url = other.url;
+    path = other.path;
+    paths = other.paths;
+    format = other.format;
+    compression_method = other.compression_method;
+    structure = other.structure;
+}
+
+void StorageHDFSConfiguration::check(ContextPtr context) const
+{
+    context->getRemoteHostFilter().checkURL(Poco::URI(url));
+    checkHDFSURL(url);
+}
+
+ObjectStoragePtr StorageHDFSConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+{
+    UNUSED(is_readonly);
+    auto settings = std::make_unique<HDFSObjectStorageSettings>();
+    return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
+}
+
+void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_structure */)
+{
+    url = checkAndGetLiteralArgument<String>(args[0], "url");
+
+    String format_name = "auto";
+    if (args.size() > 1)
+        format_name = checkAndGetLiteralArgument<String>(args[1], "format_name");
+
+    if (format_name == "auto")
+        format_name = FormatFactory::instance().getFormatFromFileName(url, true);
+
+    String compression_method;
+    if (args.size() == 3)
+        compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
+    else
+        compression_method = "auto";
+
+}
+}
+
+#endif
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
new file mode 100644
index 00000000000..03fb0824123
--- /dev/null
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -0,0 +1,45 @@
+#pragma once
+#include "config.h"
+
+#if USE_HDFS
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Interpreters/Context_fwd.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/IAST_fwd.h>
+
+namespace DB
+{
+
+class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
+{
+public:
+    StorageHDFSConfiguration() = default;
+    StorageHDFSConfiguration(const StorageHDFSConfiguration & other);
+
+    Path getPath() const override { return path; }
+    void setPath(const Path & path_) override { path = path_; }
+
+    const Paths & getPaths() const override { return paths; }
+    Paths & getPaths() override { return paths; }
+
+    String getNamespace() const override { return ""; }
+    String getDataSourceDescription() override { return url; }
+
+    void check(ContextPtr context) const override;
+    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
+
+    void fromNamedCollection(const NamedCollection &) override {}
+    void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override;
+
+    static void addStructureToArgs(ASTs &, const String &, ContextPtr) {}
+
+private:
+    String url;
+    String path;
+    std::vector<String> paths;
+};
+
+}
+
+#endif
diff --git a/src/Storages/ObjectStorage/HDFSConfiguration.h b/src/Storages/ObjectStorage/HDFSConfiguration.h
deleted file mode 100644
index aa45c634042..00000000000
--- a/src/Storages/ObjectStorage/HDFSConfiguration.h
+++ /dev/null
@@ -1,81 +0,0 @@
-#pragma once
-#include "config.h"
-
-#if USE_HDFS
-
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
-#include <Storages/HDFS/HDFSCommon.h>
-#include <Interpreters/Context.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-#include <Formats/FormatFactory.h>
-#include <Parsers/ASTFunction.h>
-#include <Parsers/IAST.h>
-#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-
-class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
-{
-public:
-    Path getPath() const override { return path; }
-    void setPath(const Path & path_) override { path = path_; }
-
-    const Paths & getPaths() const override { return paths; }
-    Paths & getPaths() override { return paths; }
-
-    String getNamespace() const override { return ""; }
-    String getDataSourceDescription() override { return url; }
-
-    void check(ContextPtr context) const override
-    {
-        context->getRemoteHostFilter().checkURL(Poco::URI(url));
-        checkHDFSURL(url);
-    }
-    StorageObjectStorageConfigurationPtr clone() override
-    {
-        auto configuration = std::make_shared<StorageHDFSConfiguration>();
-        return configuration;
-    }
-
-    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override /// NOLINT
-    {
-        UNUSED(is_readonly);
-        auto settings = std::make_unique<HDFSObjectStorageSettings>();
-        return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
-    }
-
-    void fromNamedCollection(const NamedCollection &) override {}
-    void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override
-    {
-        url = checkAndGetLiteralArgument<String>(args[0], "url");
-
-        String format_name = "auto";
-        if (args.size() > 1)
-            format_name = checkAndGetLiteralArgument<String>(args[1], "format_name");
-
-        if (format_name == "auto")
-            format_name = FormatFactory::instance().getFormatFromFileName(url, true);
-
-        String compression_method;
-        if (args.size() == 3)
-        {
-            compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
-        } else compression_method = "auto";
-
-    }
-    static void addStructureToArgs(ASTs &, const String &, ContextPtr) {}
-
-private:
-    String url;
-    String path;
-    std::vector<String> paths;
-};
-
-}
-
-#endif
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index dcdf36dbcf5..a3e19b907bc 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -67,11 +67,11 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
         auto get_last_mod_time = [&] -> std::optional<time_t>
         {
             if (object_info->metadata)
-                return object_info->metadata->last_modified->epochMicroseconds();
+                return object_info->metadata->last_modified.epochMicroseconds();
             else
             {
                 object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-                return object_info->metadata->last_modified->epochMicroseconds();
+                return object_info->metadata->last_modified.epochMicroseconds();
             }
         };
 
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
index 2c27c816078..b33eea7d354 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
@@ -59,7 +59,6 @@ void ReadFromStorageObejctStorage::applyFilters()
     const ActionsDAG::Node * predicate = nullptr;
     if (filter_actions_dag)
         predicate = filter_actions_dag->getOutputs().at(0);
-
     createIterator(predicate);
 }
 
diff --git a/src/Storages/ObjectStorage/S3Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
similarity index 97%
rename from src/Storages/ObjectStorage/S3Configuration.cpp
rename to src/Storages/ObjectStorage/S3/Configuration.cpp
index 5a5412019f5..f057745d669 100644
--- a/src/Storages/ObjectStorage/S3Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -1,4 +1,7 @@
-#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+
+#if USE_AWS_S3
+
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/StorageURL.h>
 #include <Formats/FormatFactory.h>
@@ -14,6 +17,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int LOGICAL_ERROR;
 }
 
 static const std::unordered_set<std::string_view> required_configuration_keys = {
@@ -51,17 +55,19 @@ void StorageS3Configuration::check(ContextPtr context) const
     context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast);
 }
 
-StorageObjectStorageConfigurationPtr StorageS3Configuration::clone()
+StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other)
 {
-    auto configuration = std::make_shared<StorageS3Configuration>();
-    configuration->url = url;
-    configuration->auth_settings = auth_settings;
-    configuration->request_settings = request_settings;
-    configuration->static_configuration = static_configuration;
-    configuration->headers_from_ast = headers_from_ast;
-    configuration->keys = keys;
-    configuration->initialized = initialized;
-    return configuration;
+    url = other.url;
+    auth_settings = other.auth_settings;
+    request_settings = other.request_settings;
+    static_configuration = other.static_configuration;
+    headers_from_ast = other.headers_from_ast;
+    keys = other.keys;
+    initialized = other.initialized;
+
+    format = other.format;
+    compression_method = other.compression_method;
+    structure = other.structure;
 }
 
 ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
@@ -489,3 +495,5 @@ void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & stru
 }
 
 }
+
+#endif
diff --git a/src/Storages/ObjectStorage/S3Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
similarity index 81%
rename from src/Storages/ObjectStorage/S3Configuration.h
rename to src/Storages/ObjectStorage/S3/Configuration.h
index c953bc25c4e..037cf2eae87 100644
--- a/src/Storages/ObjectStorage/S3Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -1,7 +1,12 @@
 #pragma once
+
+#include "config.h"
+
+#if USE_AWS_S3
+
 #include <IO/S3/getObjectInfo.h>
 #include <Storages/StorageS3Settings.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 
 namespace DB
 {
@@ -9,6 +14,9 @@ namespace DB
 class StorageS3Configuration : public StorageObjectStorageConfiguration
 {
 public:
+    StorageS3Configuration() = default;
+    StorageS3Configuration(const StorageS3Configuration & other);
+
     Path getPath() const override { return url.key; }
     void setPath(const Path & path) override { url.key = path; }
 
@@ -19,9 +27,8 @@ public:
     String getDataSourceDescription() override;
 
     void check(ContextPtr context) const override;
-    StorageObjectStorageConfigurationPtr clone() override;
-
     ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
 
     void fromNamedCollection(const NamedCollection & collection) override;
     void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
@@ -44,3 +51,5 @@ private:
 };
 
 }
+
+#endif
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 9a7260ea47c..08d7c9d0014 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -9,7 +9,7 @@
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
@@ -24,8 +24,6 @@ namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
     extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int LOGICAL_ERROR;
     extern const int NOT_IMPLEMENTED;
 
 }
@@ -59,7 +57,6 @@ std::unique_ptr<StorageInMemoryMetadata> getStorageMetadata(
 
         storage_metadata->setColumns(columns);
     }
-
     storage_metadata->setConstraints(constraints);
     storage_metadata->setComment(comment);
     return storage_metadata;
@@ -264,10 +261,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 
 template <typename StorageSettings>
 void StorageObjectStorage<StorageSettings>::truncate(
-    const ASTPtr &,
-    const StorageMetadataPtr &,
-    ContextPtr,
-    TableExclusiveLockHolder &)
+    const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
 {
     if (configuration->withGlobs())
     {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 0b29845ba5c..6f18153c7af 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -95,8 +95,7 @@ public:
         ContextPtr context);
 
 protected:
-    virtual std::pair<ConfigurationPtr, ObjectStoragePtr>
-    updateConfigurationAndGetCopy(ContextPtr local_context);
+    virtual std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context);
 
     const std::string engine_name;
     const NamesAndTypesList virtual_columns;
@@ -110,7 +109,7 @@ protected:
 };
 
 using StorageS3 = StorageObjectStorage<S3StorageSettings>;
-using StorageAzureBlobStorage = StorageObjectStorage<AzureStorageSettings>;
+using StorageAzureBlob = StorageObjectStorage<AzureStorageSettings>;
 using StorageHDFS = StorageObjectStorage<HDFSStorageSettings>;
 
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 39cd5d8eca6..c03bbd1a45d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -12,7 +12,7 @@
 #include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Common/Exception.h>
 #include <Parsers/queryToString.h>
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index aae8f704a73..507de20e888 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -67,5 +67,8 @@ using StorageS3Cluster = StorageObjectStorageCluster<S3ClusterDefinition, S3Stor
 #if USE_AZURE_BLOB_STORAGE
 using StorageAzureBlobCluster = StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
 #endif
+#if USE_HDFS
+using StorageHDFSCluster = StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+#endif
 
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 2d5760ed9d8..651f1d25ec1 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -1,4 +1,4 @@
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 
 
 namespace DB
diff --git a/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
similarity index 99%
rename from src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h
rename to src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 427d6a8d453..04b2d8e8fd9 100644
--- a/src/Storages/ObjectStorage/StorageObejctStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -39,9 +39,8 @@ public:
     std::string getPathWithoutGlob() const;
 
     virtual void check(ContextPtr context) const = 0;
-    virtual StorageObjectStorageConfigurationPtr clone() = 0;
-
     virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
+    virtual StorageObjectStorageConfigurationPtr clone() = 0;
 
     String format = "auto";
     String compression_method = "auto";
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
new file mode 100644
index 00000000000..37f93a2b82f
--- /dev/null
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -0,0 +1,127 @@
+#include "StorageObjectStorageSink.h"
+#include <Formats/FormatFactory.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+
+namespace DB
+{
+
+StorageObjectStorageSink::StorageObjectStorageSink(
+    ObjectStoragePtr object_storage,
+    StorageObjectStorageConfigurationPtr configuration,
+    std::optional<FormatSettings> format_settings_,
+    const Block & sample_block_,
+    ContextPtr context,
+    const std::string & blob_path)
+    : SinkToStorage(sample_block_)
+    , sample_block(sample_block_)
+    , format_settings(format_settings_)
+{
+    const auto & settings = context->getSettingsRef();
+    const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path;
+    const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method);
+
+    auto buffer = object_storage->writeObject(
+        StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings());
+
+    write_buf = wrapWriteBufferWithCompressionMethod(
+                    std::move(buffer),
+                    chosen_compression_method,
+                    static_cast<int>(settings.output_format_compression_level),
+                    static_cast<int>(settings.output_format_compression_zstd_window_log));
+
+    writer = FormatFactory::instance().getOutputFormatParallelIfPossible(
+        configuration->format, *write_buf, sample_block, context, format_settings);
+}
+
+void StorageObjectStorageSink::consume(Chunk chunk)
+{
+    std::lock_guard lock(cancel_mutex);
+    if (cancelled)
+        return;
+    writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
+}
+
+void StorageObjectStorageSink::onCancel()
+{
+    std::lock_guard lock(cancel_mutex);
+    finalize();
+    cancelled = true;
+}
+
+void StorageObjectStorageSink::onException(std::exception_ptr exception)
+{
+    std::lock_guard lock(cancel_mutex);
+    try
+    {
+        std::rethrow_exception(exception);
+    }
+    catch (...)
+    {
+        /// An exception context is needed to proper delete write buffers without finalization.
+        release();
+    }
+}
+
+void StorageObjectStorageSink::onFinish()
+{
+    std::lock_guard lock(cancel_mutex);
+    finalize();
+}
+
+void StorageObjectStorageSink::finalize()
+{
+    if (!writer)
+        return;
+
+    try
+    {
+        writer->finalize();
+        writer->flush();
+        write_buf->finalize();
+    }
+    catch (...)
+    {
+        /// Stop ParallelFormattingOutputFormat correctly.
+        release();
+        throw;
+    }
+}
+
+void StorageObjectStorageSink::release()
+{
+    writer.reset();
+    write_buf->finalize();
+}
+
+PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
+    ObjectStoragePtr object_storage_,
+    StorageObjectStorageConfigurationPtr configuration_,
+    std::optional<FormatSettings> format_settings_,
+    const Block & sample_block_,
+    ContextPtr context_,
+    const ASTPtr & partition_by)
+    : PartitionedSink(partition_by, context_, sample_block_)
+    , object_storage(object_storage_)
+    , configuration(configuration_)
+    , format_settings(format_settings_)
+    , sample_block(sample_block_)
+    , context(context_)
+{
+}
+
+SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id)
+{
+    auto blob = configuration->getPaths().back();
+    auto partition_key = replaceWildcards(blob, partition_id);
+    validatePartitionKey(partition_key, true);
+    return std::make_shared<StorageObjectStorageSink>(
+        object_storage,
+        configuration,
+        format_settings,
+        sample_block,
+        context,
+        partition_key
+    );
+}
+
+}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index a2d42d7fa9f..14298376d0e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -1,9 +1,8 @@
 #pragma once
 #include <Storages/PartitionedSink.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
-#include <Formats/FormatFactory.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Processors/Formats/IOutputFormat.h>
-#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
 
 namespace DB
 {
@@ -16,64 +15,17 @@ public:
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context,
-        const std::string & blob_path = "")
-        : SinkToStorage(sample_block_)
-        , sample_block(sample_block_)
-        , format_settings(format_settings_)
-    {
-        const auto & settings = context->getSettingsRef();
-        const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path;
-        const auto chosen_compression_method = chooseCompressionMethod(path, configuration->compression_method);
-
-        auto buffer = object_storage->writeObject(
-            StoredObject(path), WriteMode::Rewrite, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, context->getWriteSettings());
-
-        write_buf = wrapWriteBufferWithCompressionMethod(
-                        std::move(buffer),
-                        chosen_compression_method,
-                        static_cast<int>(settings.output_format_compression_level),
-                        static_cast<int>(settings.output_format_compression_zstd_window_log));
-
-        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(
-            configuration->format, *write_buf, sample_block, context, format_settings);
-    }
+        const std::string & blob_path = "");
 
     String getName() const override { return "StorageObjectStorageSink"; }
 
-    void consume(Chunk chunk) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        if (cancelled)
-            return;
-        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
-    }
+    void consume(Chunk chunk) override;
 
-    void onCancel() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-        cancelled = true;
-    }
+    void onCancel() override;
 
-    void onException(std::exception_ptr exception) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        try
-        {
-            std::rethrow_exception(exception);
-        }
-        catch (...)
-        {
-            /// An exception context is needed to proper delete write buffers without finalization.
-            release();
-        }
-    }
+    void onException(std::exception_ptr exception) override;
 
-    void onFinish() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-    }
+    void onFinish() override;
 
 private:
     const Block sample_block;
@@ -84,30 +36,8 @@ private:
     bool cancelled = false;
     std::mutex cancel_mutex;
 
-    void finalize()
-    {
-        if (!writer)
-            return;
-
-        try
-        {
-            writer->finalize();
-            writer->flush();
-            write_buf->finalize();
-        }
-        catch (...)
-        {
-            /// Stop ParallelFormattingOutputFormat correctly.
-            release();
-            throw;
-        }
-    }
-
-    void release()
-    {
-        writer.reset();
-        write_buf->finalize();
-    }
+    void finalize();
+    void release();
 };
 
 class PartitionedStorageObjectStorageSink : public PartitionedSink
@@ -119,30 +49,9 @@ public:
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context_,
-        const ASTPtr & partition_by)
-        : PartitionedSink(partition_by, context_, sample_block_)
-        , object_storage(object_storage_)
-        , configuration(configuration_)
-        , format_settings(format_settings_)
-        , sample_block(sample_block_)
-        , context(context_)
-    {
-    }
+        const ASTPtr & partition_by);
 
-    SinkPtr createSinkForPartition(const String & partition_id) override
-    {
-        auto blob = configuration->getPaths().back();
-        auto partition_key = replaceWildcards(blob, partition_id);
-        validatePartitionKey(partition_key, true);
-        return std::make_shared<StorageObjectStorageSink>(
-            object_storage,
-            configuration,
-            format_settings,
-            sample_block,
-            context,
-            partition_key
-        );
-    }
+    SinkPtr createSinkForPartition(const String & partition_id) override;
 
 private:
     ObjectStoragePtr object_storage;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index f170a46112f..1fda75897f9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -9,7 +9,7 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/parseGlobs.h>
@@ -26,6 +26,8 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_COMPILE_REGEXP;
+    extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 StorageObjectStorageSource::StorageObjectStorageSource(
@@ -182,8 +184,8 @@ std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const O
 
     auto get_last_mod_time = [&]() -> std::optional<time_t>
     {
-        return object_info->metadata && object_info->metadata->last_modified
-            ? object_info->metadata->last_modified->epochMicroseconds()
+        return object_info->metadata
+            ? object_info->metadata->last_modified.epochMicroseconds()
             : 0;
     };
     return schema_cache.tryGetNumRows(cache_key, get_last_mod_time);
@@ -472,4 +474,29 @@ ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor
     return std::make_shared<ObjectInfo>(key, metadata);
 }
 
+StorageObjectStorageSource::ReaderHolder::ReaderHolder(
+    ObjectInfoPtr object_info_,
+    std::unique_ptr<ReadBuffer> read_buf_,
+    std::shared_ptr<ISource> source_,
+    std::unique_ptr<QueryPipeline> pipeline_,
+    std::unique_ptr<PullingPipelineExecutor> reader_)
+    : object_info(std::move(object_info_))
+    , read_buf(std::move(read_buf_))
+    , source(std::move(source_))
+    , pipeline(std::move(pipeline_))
+    , reader(std::move(reader_))
+{
+}
+
+StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept
+{
+    /// The order of destruction is important.
+    /// reader uses pipeline, pipeline uses read_buf.
+    reader = std::move(other.reader);
+    pipeline = std::move(other.pipeline);
+    source = std::move(other.source);
+    read_buf = std::move(other.read_buf);
+    object_info = std::move(other.object_info);
+    return *this;
+}
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 0d6a6b71271..214a7de14d6 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -75,32 +75,16 @@ protected:
             std::unique_ptr<ReadBuffer> read_buf_,
             std::shared_ptr<ISource> source_,
             std::unique_ptr<QueryPipeline> pipeline_,
-            std::unique_ptr<PullingPipelineExecutor> reader_)
-            : object_info(std::move(object_info_))
-            , read_buf(std::move(read_buf_))
-            , source(std::move(source_))
-            , pipeline(std::move(pipeline_))
-            , reader(std::move(reader_)) {}
+            std::unique_ptr<PullingPipelineExecutor> reader_);
 
         ReaderHolder() = default;
         ReaderHolder(ReaderHolder && other) noexcept { *this = std::move(other); }
+        ReaderHolder & operator=(ReaderHolder && other) noexcept;
 
         explicit operator bool() const { return reader != nullptr; }
         PullingPipelineExecutor * operator->() { return reader.get(); }
         const PullingPipelineExecutor * operator->() const { return reader.get(); }
 
-        ReaderHolder & operator=(ReaderHolder && other) noexcept
-        {
-            /// The order of destruction is important.
-            /// reader uses pipeline, pipeline uses read_buf.
-            reader = std::move(other.reader);
-            pipeline = std::move(other.pipeline);
-            source = std::move(other.source);
-            read_buf = std::move(other.read_buf);
-            object_info = std::move(other.object_info);
-            return *this;
-        }
-
         const String & getRelativePath() const { return object_info->relative_path; }
         const ObjectInfo & getObjectInfo() const { return *object_info; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
@@ -143,7 +127,7 @@ public:
 
     size_t estimatedKeysCount() override { return 0; } /// TODO FIXME
 
-    ObjectInfoPtr next(size_t) override { return std::make_shared<ObjectInfo>( callback(), ObjectMetadata{} ); }
+    ObjectInfoPtr next(size_t) override { return std::make_shared<ObjectInfo>(callback(), ObjectMetadata{}); }
 
 private:
     ReadTaskCallback callback;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
index 51be7419e1c..241e2f20962 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
@@ -1,4 +1,5 @@
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#pragma once
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 
 namespace DB
 {
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index f7ab37490e1..e23457c04e9 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -1,6 +1,6 @@
-#include <Storages/ObjectStorage/AzureConfiguration.h>
-#include <Storages/ObjectStorage/S3Configuration.h>
-#include <Storages/ObjectStorage/HDFSConfiguration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/StorageFactory.h>
 #include <Formats/FormatFactory.h>
@@ -8,6 +8,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 template <typename StorageSettings>
 static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjectStorage(
     const StorageFactory::Arguments & args,
@@ -149,6 +154,7 @@ void registerStorageObjectStorage(StorageFactory & factory)
 #if USE_HDFS
     registerStorageHDFS(factory);
 #endif
+    UNUSED(factory);
 }
 
 }
diff --git a/src/Storages/ObjectStorageConfiguration.h b/src/Storages/ObjectStorageConfiguration.h
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index 70dd8f27d71..9502a3c5e70 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -3,7 +3,7 @@
 #if USE_AWS_S3
 
 #include <Storages/S3Queue/S3QueueSettings.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <base/types.h>
 
 namespace DB
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index fc4ef77ebb9..b03224cedff 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -23,7 +23,7 @@
 #include <Storages/StorageSnapshot.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/prepareReadingFromFormat.h>
-#include <Storages/ObjectStorage/S3Configuration.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 #include <filesystem>
 
@@ -36,6 +36,13 @@ namespace ProfileEvents
     extern const Event S3ListObjects;
 }
 
+namespace CurrentMetrics
+{
+    extern const Metric ObjectStorageS3Threads;
+    extern const Metric ObjectStorageS3ThreadsActive;
+    extern const Metric ObjectStorageS3ThreadsScheduled;
+}
+
 namespace DB
 {
 
diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
index 77d5be3698c..a53ce440c3f 100644
--- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
+++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
@@ -81,7 +81,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C
 #endif
     fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL");
 #if USE_AZURE_BLOB_STORAGE
-    fillDataImpl(res_columns, StorageAzureBlobStorage::getSchemaCache(context), "Azure"); /// FIXME
+    fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); /// FIXME
 #endif
 }
 
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 0ffa1460d78..8edba4e6e4b 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -1,18 +1,17 @@
 #pragma once
 
 #include "config.h"
-
-#if USE_AWS_S3
-
-#    include <Access/Common/AccessFlags.h>
-#    include <Formats/FormatFactory.h>
-#    include <Interpreters/Context.h>
-#    include <Interpreters/parseColumnsListForTableFunction.h>
-#    include <Storages/IStorage.h>
-#    include <TableFunctions/ITableFunction.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
+#include <Access/Common/AccessFlags.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/parseColumnsListForTableFunction.h>
+#include <TableFunctions/ITableFunction.h>
+#include <TableFunctions/TableFunctionObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
+#include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
+#include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
+#include <TableFunctions/TableFunctionFactory.h>
+
 
 namespace DB
 {
@@ -26,18 +25,20 @@ public:
 
 protected:
     StoragePtr executeImpl(
-        const ASTPtr & /*ast_function*/,
+        const ASTPtr & /* ast_function */,
         ContextPtr context,
         const std::string & table_name,
-        ColumnsDescription /*cached_columns*/,
+        ColumnsDescription cached_columns,
         bool /*is_insert_query*/) const override
     {
         ColumnsDescription columns;
-        if (TableFunction::configuration->structure != "auto")
-            columns = parseColumnsListFromString(TableFunction::configuration->structure, context);
+        auto configuration = TableFunction::getConfiguration();
+        if (configuration->structure != "auto")
+            columns = parseColumnsListFromString(configuration->structure, context);
+        else if (!cached_columns.empty())
+            columns = cached_columns;
 
-        StorageObjectStorageConfigurationPtr configuration = TableFunction::configuration;
-        StoragePtr storage = StorageIceberg<StorageObjectStorage<StorageS3Settings>>::create(
+        StoragePtr storage = Storage::create(
             configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name),
             columns, ConstraintsDescription{}, String{}, std::nullopt, false);
 
@@ -45,26 +46,53 @@ protected:
         return storage;
     }
 
-    const char * getStorageTypeName() const override { return Storage::name; }
+    const char * getStorageTypeName() const override { return name; }
 
-    ColumnsDescription getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const override
+    ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override
     {
-        if (TableFunction::configuration->structure == "auto")
+        auto configuration = TableFunction::getConfiguration();
+        if (configuration->structure == "auto")
         {
             context->checkAccess(TableFunction::getSourceAccessType());
-            return Storage::getTableStructureFromData(TableFunction::object_storage, TableFunction::configuration, std::nullopt, context);
+            auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query);
+            return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context);
         }
 
-        return parseColumnsListFromString(TableFunction::configuration->structure, context);
+        return parseColumnsListFromString(configuration->structure, context);
     }
 
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override
     {
+        auto configuration = TableFunction::getConfiguration();
+        configuration->format = "Parquet";
         /// Set default format to Parquet if it's not specified in arguments.
-        TableFunction::configuration->format = "Parquet";
         TableFunction::parseArguments(ast_function, context);
     }
 };
-}
 
+struct TableFunctionIcebergName
+{
+    static constexpr auto name = "iceberg";
+};
+
+struct TableFunctionDeltaLakeName
+{
+    static constexpr auto name = "deltaLake";
+};
+
+struct TableFunctionHudiName
+{
+    static constexpr auto name = "hudi";
+};
+
+#if USE_AWS_S3
+#if USE_AVRO
+using TableFunctionIceberg = ITableFunctionDataLake<TableFunctionIcebergName, StorageIceberg, TableFunctionS3>;
 #endif
+#if USE_PARQUET
+using TableFunctionDeltaLake = ITableFunctionDataLake<TableFunctionDeltaLakeName, StorageDeltaLake, TableFunctionS3>;
+#endif
+using TableFunctionHudi = ITableFunctionDataLake<TableFunctionHudiName, StorageHudi, TableFunctionS3>;
+#endif
+
+}
diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp
deleted file mode 100644
index 08b62ed2612..00000000000
--- a/src/TableFunctions/TableFunctionDeltaLake.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3 && USE_PARQUET
-
-#include <Storages/DataLakes/StorageDeltaLake.h>
-#include <TableFunctions/ITableFunctionDataLake.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionObjectStorage.h>
-#include "registerTableFunctions.h"
-
-namespace DB
-{
-
-struct TableFunctionDeltaLakeName
-{
-    static constexpr auto name = "deltaLake";
-};
-
-// using TableFunctionDeltaLake = ITableFunctionDataLake<TableFunctionDeltaLakeName, StorageDeltaLakeS3, TableFunctionS3>;
-//
-// void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
-// {
-//     factory.registerFunction<TableFunctionDeltaLake>(
-//         {.documentation = {
-//             .description=R"(The table function can be used to read the DeltaLake table stored on object store.)",
-//             .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}},
-//             .categories{"DataLake"}},
-//          .allow_readonly = false});
-// }
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp
deleted file mode 100644
index c6d84504c40..00000000000
--- a/src/TableFunctions/TableFunctionHudi.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3
-
-#include <Storages/DataLakes/StorageHudi.h>
-#include <TableFunctions/ITableFunctionDataLake.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionObjectStorage.h>
-#include "registerTableFunctions.h"
-
-namespace DB
-{
-
-struct TableFunctionHudiName
-{
-    static constexpr auto name = "hudi";
-};
-// using TableFunctionHudi = ITableFunctionDataLake<TableFunctionHudiName, StorageHudiS3, TableFunctionS3>;
-//
-// void registerTableFunctionHudi(TableFunctionFactory & factory)
-// {
-//     factory.registerFunction<TableFunctionHudi>(
-//         {.documentation
-//          = {.description=R"(The table function can be used to read the Hudi table stored on object store.)",
-//             .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}},
-//             .categories{"DataLake"}},
-//          .allow_readonly = false});
-// }
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionIceberg.cpp b/src/TableFunctions/TableFunctionIceberg.cpp
deleted file mode 100644
index 1a28f9292d1..00000000000
--- a/src/TableFunctions/TableFunctionIceberg.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "config.h"
-
-#if USE_AWS_S3 && USE_AVRO
-
-#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
-#include <TableFunctions/ITableFunctionDataLake.h>
-#include <TableFunctions/TableFunctionFactory.h>
-#include <TableFunctions/TableFunctionObjectStorage.h>
-#include "registerTableFunctions.h"
-
-
-namespace DB
-{
-
-struct TableFunctionIcebergName
-{
-    static constexpr auto name = "iceberg";
-};
-
-using TableFunctionIceberg = ITableFunctionDataLake<
-    TableFunctionIcebergName,
-    StorageIceberg<S3StorageSettings>,
-    TableFunctionS3>;
-
-void registerTableFunctionIceberg(TableFunctionFactory & factory)
-{
-    factory.registerFunction<TableFunctionIceberg>(
-        {.documentation
-         = {.description=R"(The table function can be used to read the Iceberg table stored on object store.)",
-            .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}},
-            .categories{"DataLake"}},
-         .allow_readonly = false});
-}
-
-}
-
-#endif
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index de46c13af37..a948102ac2b 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -7,10 +7,10 @@
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Access/Common/AccessFlags.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObejctStorageConfiguration.h>
-#include <Storages/ObjectStorage/S3Configuration.h>
-#include <Storages/ObjectStorage/HDFSConfiguration.h>
-#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 #include <Storages/NamedCollectionsHelpers.h>
 #include <Analyzer/TableFunctionNode.h>
 #include <Formats/FormatFactory.h>
@@ -24,7 +24,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int BAD_ARGUMENTS;
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
@@ -36,6 +35,15 @@ ObjectStoragePtr TableFunctionObjectStorage<
     return object_storage;
 }
 
+template <typename Definition, typename StorageSettings, typename Configuration>
+StorageObjectStorageConfigurationPtr TableFunctionObjectStorage<
+    Definition, StorageSettings, Configuration>::getConfiguration() const
+{
+    if (!configuration)
+        configuration = std::make_shared<Configuration>();
+    return configuration;
+}
+
 template <typename Definition, typename StorageSettings, typename Configuration>
 std::vector<size_t> TableFunctionObjectStorage<
     Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
@@ -65,8 +73,7 @@ template <typename Definition, typename StorageSettings, typename Configuration>
 void TableFunctionObjectStorage<
     Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
 {
-    configuration = std::make_shared<Configuration>();
-    StorageObjectStorageConfiguration::initialize(*configuration, engine_args, local_context, true);
+    StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
@@ -147,6 +154,7 @@ StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration
 
 void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 {
+    UNUSED(factory);
 #if USE_AWS_S3
     factory.registerFunction<TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>>(
     {
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
index 1df0ba2f843..5e180301862 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.h
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -2,10 +2,9 @@
 
 #include "config.h"
 
-#if USE_AZURE_BLOB_STORAGE
-
 #include <TableFunctions/ITableFunction.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
 
 
@@ -114,6 +113,8 @@ public:
     static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
 
 protected:
+    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+
     StoragePtr executeImpl(
         const ASTPtr & ast_function,
         ContextPtr context,
@@ -125,9 +126,11 @@ protected:
 
     ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override;
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
-    ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const;
 
-    mutable typename StorageObjectStorage<StorageSettings>::ConfigurationPtr configuration;
+    ObjectStoragePtr getObjectStorage(const ContextPtr & context, bool create_readonly) const;
+    ConfigurationPtr getConfiguration() const;
+
+    mutable ConfigurationPtr configuration;
     mutable ObjectStoragePtr object_storage;
     ColumnsDescription structure_hint;
 
@@ -146,5 +149,3 @@ using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, Azure
 using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
 #endif
 }
-
-#endif
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 8e6c96a3f2a..c93d816dc07 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -6,9 +6,9 @@
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/S3Configuration.h>
-#include <Storages/ObjectStorage/HDFSConfiguration.h>
-#include <Storages/ObjectStorage/AzureConfiguration.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 
 
 namespace DB
@@ -103,6 +103,8 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
         }
     );
 #endif
+
+    UNUSED(factory);
 }
 
 #if USE_AWS_S3
diff --git a/src/TableFunctions/registerDataLakeTableFunctions.cpp b/src/TableFunctions/registerDataLakeTableFunctions.cpp
new file mode 100644
index 00000000000..15a6668f434
--- /dev/null
+++ b/src/TableFunctions/registerDataLakeTableFunctions.cpp
@@ -0,0 +1,69 @@
+#include <TableFunctions/TableFunctionFactory.h>
+#include <TableFunctions/ITableFunctionDataLake.h>
+
+namespace DB
+{
+
+#if USE_AWS_S3
+#if USE_AVRO
+void registerTableFunctionIceberg(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionIceberg>(
+    {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the Iceberg table stored on object store.)",
+            .examples{{"iceberg", "SELECT * FROM iceberg(url, access_key_id, secret_access_key)", ""}},
+            .categories{"DataLake"}
+        },
+        .allow_readonly = false
+    });
+}
+#endif
+
+#if USE_PARQUET
+void registerTableFunctionDeltaLake(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionDeltaLake>(
+    {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the DeltaLake table stored on object store.)",
+            .examples{{"deltaLake", "SELECT * FROM deltaLake(url, access_key_id, secret_access_key)", ""}},
+            .categories{"DataLake"}
+        },
+        .allow_readonly = false
+    });
+}
+#endif
+
+void registerTableFunctionHudi(TableFunctionFactory & factory)
+{
+    factory.registerFunction<TableFunctionHudi>(
+    {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the Hudi table stored on object store.)",
+            .examples{{"hudi", "SELECT * FROM hudi(url, access_key_id, secret_access_key)", ""}},
+            .categories{"DataLake"}
+        },
+        .allow_readonly = false
+    });
+}
+#endif
+
+void registerDataLakeTableFunctions(TableFunctionFactory & factory)
+{
+    UNUSED(factory);
+#if USE_AWS_S3
+#if USE_AVRO
+    registerTableFunctionIceberg(factory);
+#endif
+#if USE_PARQUET
+    registerTableFunctionDeltaLake(factory);
+#endif
+    registerTableFunctionHudi(factory);
+#endif
+}
+
+}
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index 627d945fbf3..05fe147e076 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -68,8 +68,7 @@ void registerTableFunctions()
 
     registerTableFunctionObjectStorage(factory);
     registerTableFunctionObjectStorageCluster(factory);
-
-
+    registerDataLakeTableFunctions(factory);
 }
 
 }
diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h
index cefb198273e..7998a4b49d9 100644
--- a/src/TableFunctions/registerTableFunctions.h
+++ b/src/TableFunctions/registerTableFunctions.h
@@ -30,13 +30,6 @@ void registerTableFunctionS3Cluster(TableFunctionFactory & factory);
 void registerTableFunctionCOS(TableFunctionFactory & factory);
 void registerTableFunctionOSS(TableFunctionFactory & factory);
 void registerTableFunctionGCS(TableFunctionFactory & factory);
-void registerTableFunctionHudi(TableFunctionFactory & factory);
-#if USE_PARQUET
-void registerTableFunctionDeltaLake(TableFunctionFactory & factory);
-#endif
-#if USE_AVRO
-void registerTableFunctionIceberg(TableFunctionFactory & factory);
-#endif
 #endif
 
 #if USE_HIVE
@@ -67,10 +60,9 @@ void registerTableFunctionFormat(TableFunctionFactory & factory);
 
 void registerTableFunctionExplain(TableFunctionFactory & factory);
 
-#if USE_AZURE_BLOB_STORAGE
 void registerTableFunctionObjectStorage(TableFunctionFactory & factory);
 void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory);
-#endif
+void registerDataLakeTableFunctions(TableFunctionFactory & factory);
 
 void registerTableFunctions();
 

From 7577257df558fb3bd74e862e7da7b0f1b485ffeb Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 14 Feb 2024 17:29:03 +0100
Subject: [PATCH 015/392] Fix cluster functions

---
 .../ReadFromStorageObjectStorage.cpp          |  6 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  3 +-
 .../StorageObjectStorageCluster.cpp           |  9 ++-
 .../StorageObjectStorageSource.cpp            | 63 ++++++++++++++++---
 .../StorageObjectStorageSource.h              | 25 ++++++--
 src/Storages/S3Queue/StorageS3Queue.cpp       |  5 +-
 .../TableFunctionObjectStorage.cpp            |  3 +
 .../TableFunctionObjectStorageCluster.cpp     | 27 ++++----
 8 files changed, 110 insertions(+), 31 deletions(-)

diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
index b33eea7d354..9c58fcdaa9a 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
@@ -49,7 +49,8 @@ void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predi
         auto context = getContext();
         iterator_wrapper = StorageObjectStorageSource::createFileIterator(
             configuration, object_storage, distributed_processing, context, predicate,
-            virtual_columns, nullptr, query_settings.list_object_keys_size, context->getFileProgressCallback());
+            virtual_columns, nullptr, query_settings.list_object_keys_size, metric_threads_count,
+            metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback());
     }
 }
 
@@ -75,7 +76,8 @@ void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pip
 
         auto source = std::make_shared<StorageObjectStorageSource>(
             getName(), object_storage, configuration, info, format_settings, query_settings,
-            context, max_block_size, iterator_wrapper, need_only_count, schema_cache, std::move(threadpool));
+            context, max_block_size, iterator_wrapper, need_only_count, schema_cache,
+            std::move(threadpool), metric_threads_count, metric_threads_active, metric_threads_scheduled);
 
         pipes.emplace_back(std::move(source));
     }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 08d7c9d0014..2e834da5529 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -289,7 +289,8 @@ ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromD
     const auto settings = StorageSettings::create(context->getSettingsRef());
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration, object_storage, /* distributed_processing */false,
-        context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size);
+        context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size,
+        StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled());
 
     ReadBufferIterator read_buffer_iterator(
         object_storage, configuration, file_iterator,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index c03bbd1a45d..f0d9ea400c4 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -88,7 +88,14 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTask
     auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
         object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size);
 
-    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String{ return iterator->next(0)->relative_path; });
+    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
+    {
+        auto object_info = iterator->next(0);
+        if (object_info)
+            return object_info->relative_path;
+        else
+            return "";
+    });
     return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 1fda75897f9..a8bde4cd56f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -42,7 +42,10 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     std::shared_ptr<IIterator> file_iterator_,
     bool need_only_count_,
     SchemaCache & schema_cache_,
-    std::shared_ptr<ThreadPool> reader_pool_)
+    std::shared_ptr<ThreadPool> reader_pool_,
+    CurrentMetrics::Metric metric_threads_,
+    CurrentMetrics::Metric metric_threads_active_,
+    CurrentMetrics::Metric metric_threads_scheduled_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
     , name(std::move(name_))
@@ -57,6 +60,9 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     , columns_desc(info.columns_description)
     , file_iterator(file_iterator_)
     , schema_cache(schema_cache_)
+    , metric_threads(metric_threads_)
+    , metric_threads_active(metric_threads_active_)
+    , metric_threads_scheduled(metric_threads_scheduled_)
     , create_reader_scheduler(threadPoolCallbackRunner<ReaderHolder>(*create_reader_pool, "Reader"))
 {
 }
@@ -75,10 +81,16 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
     size_t list_object_keys_size,
+    CurrentMetrics::Metric metric_threads_,
+    CurrentMetrics::Metric metric_threads_active_,
+    CurrentMetrics::Metric metric_threads_scheduled_,
     std::function<void(FileProgress)> file_progress_callback)
 {
     if (distributed_processing)
-        return std::make_shared<ReadTaskIterator>(local_context->getReadTaskCallback());
+        return std::make_shared<ReadTaskIterator>(
+            local_context->getReadTaskCallback(),
+            local_context->getSettingsRef().max_threads,
+            metric_threads_, metric_threads_active_, metric_threads_scheduled_);
 
     if (configuration->isNamespaceWithGlobs())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
@@ -380,19 +392,16 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
         while (new_batch.empty())
         {
             auto result = object_storage_iterator->getCurrentBatchAndScheduleNext();
-            LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", result.has_value());
-            if (result.has_value())
-            {
-                new_batch = std::move(result.value());
-            }
-            else
+            if (!result.has_value())
             {
                 is_finished = true;
                 return {};
             }
 
+            new_batch = std::move(result.value());
             for (auto it = new_batch.begin(); it != new_batch.end();)
             {
+                chassert(*it);
                 if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher))
                     it = new_batch.erase(it);
                 else
@@ -406,8 +415,11 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
         {
             std::vector<String> paths;
             paths.reserve(new_batch.size());
-            for (auto & object_info : new_batch)
+            for (const auto & object_info : new_batch)
+            {
+                chassert(object_info);
                 paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path);
+            }
 
             VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
         }
@@ -416,6 +428,7 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
             read_keys->insert(read_keys->end(), new_batch.begin(), new_batch.end());
 
         object_infos = std::move(new_batch);
+
         if (file_progress_callback)
         {
             for (const auto & object_info : object_infos)
@@ -499,4 +512,36 @@ StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHol
     object_info = std::move(other.object_info);
     return *this;
 }
+
+StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
+    const ReadTaskCallback & callback_,
+    size_t max_threads_count,
+    CurrentMetrics::Metric metric_threads_,
+    CurrentMetrics::Metric metric_threads_active_,
+    CurrentMetrics::Metric metric_threads_scheduled_)
+    : callback(callback_)
+{
+    ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count);
+    auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "ReadTaskIter");
+
+    std::vector<std::future<String>> keys;
+    keys.reserve(max_threads_count);
+    for (size_t i = 0; i < max_threads_count; ++i)
+        keys.push_back(pool_scheduler([this] { return callback(); }, Priority{}));
+
+    pool.wait();
+    buffer.reserve(max_threads_count);
+    for (auto & key_future : keys)
+        buffer.emplace_back(std::make_shared<ObjectInfo>(key_future.get(), std::nullopt));
+}
+
+ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t)
+{
+    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
+    if (current_index >= buffer.size())
+        return std::make_shared<ObjectInfo>(callback());
+
+    return buffer[current_index];
+}
+
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 214a7de14d6..14e59312c8c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -30,7 +30,10 @@ public:
         std::shared_ptr<IIterator> file_iterator_,
         bool need_only_count_,
         SchemaCache & schema_cache_,
-        std::shared_ptr<ThreadPool> reader_pool_);
+        std::shared_ptr<ThreadPool> reader_pool_,
+        CurrentMetrics::Metric metric_threads_,
+        CurrentMetrics::Metric metric_threads_active_,
+        CurrentMetrics::Metric metric_threads_scheduled_);
 
     ~StorageObjectStorageSource() override;
 
@@ -47,6 +50,9 @@ public:
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
         size_t list_object_keys_size,
+        CurrentMetrics::Metric metric_threads_,
+        CurrentMetrics::Metric metric_threads_active_,
+        CurrentMetrics::Metric metric_threads_scheduled_,
         std::function<void(FileProgress)> file_progress_callback = {});
 
 protected:
@@ -64,6 +70,10 @@ protected:
     SchemaCache & schema_cache;
     bool initialized = false;
 
+    const CurrentMetrics::Metric metric_threads;
+    const CurrentMetrics::Metric metric_threads_active;
+    const CurrentMetrics::Metric metric_threads_scheduled;
+
     size_t total_rows_in_file = 0;
     LoggerPtr log = getLogger("StorageObjectStorageSource");
 
@@ -123,14 +133,21 @@ public:
 class StorageObjectStorageSource::ReadTaskIterator : public IIterator
 {
 public:
-    explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {}
+    ReadTaskIterator(
+        const ReadTaskCallback & callback_,
+        size_t max_threads_count,
+        CurrentMetrics::Metric metric_threads_,
+        CurrentMetrics::Metric metric_threads_active_,
+        CurrentMetrics::Metric metric_threads_scheduled_);
 
-    size_t estimatedKeysCount() override { return 0; } /// TODO FIXME
+    size_t estimatedKeysCount() override { return buffer.size(); }
 
-    ObjectInfoPtr next(size_t) override { return std::make_shared<ObjectInfo>(callback(), ObjectMetadata{}); }
+    ObjectInfoPtr next(size_t) override;
 
 private:
     ReadTaskCallback callback;
+    ObjectInfos buffer;
+    std::atomic_size_t index = 0;
 };
 
 class StorageObjectStorageSource::GlobIterator : public IIterator, WithContext
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index b03224cedff..b256f030da1 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -364,7 +364,10 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         file_iterator,
         false,
         Storage::getSchemaCache(local_context),
-        threadpool);
+        threadpool,
+        CurrentMetrics::ObjectStorageS3Threads,
+        CurrentMetrics::ObjectStorageS3ThreadsActive,
+        CurrentMetrics::ObjectStorageS3ThreadsScheduled);
 
     auto file_deleter = [=, this](const std::string & path) mutable
     {
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index a948102ac2b..a48c95469d0 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -93,6 +93,7 @@ template <typename Definition, typename StorageSettings, typename Configuration>
 ColumnsDescription TableFunctionObjectStorage<
     Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
 {
+    chassert(configuration);
     if (configuration->structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
@@ -107,6 +108,7 @@ template <typename Definition, typename StorageSettings, typename Configuration>
 bool TableFunctionObjectStorage<
     Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
 {
+    chassert(configuration);
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
 }
 
@@ -127,6 +129,7 @@ StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration
     bool is_insert_query) const
 {
     ColumnsDescription columns;
+    chassert(configuration);
     if (configuration->structure != "auto")
         columns = parseColumnsListFromString(configuration->structure, context);
     else if (!structure_hint.empty())
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index c93d816dc07..5a29a693431 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -21,25 +21,23 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
 {
     using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
 
-    StoragePtr storage;
+    auto configuration = Base::getConfiguration();
+    bool structure_argument_was_provided = configuration->structure != "auto";
+
     ColumnsDescription columns;
-    bool structure_argument_was_provided = Base::configuration->structure != "auto";
-
     if (structure_argument_was_provided)
-    {
-        columns = parseColumnsListFromString(Base::configuration->structure, context);
-    }
+        columns = parseColumnsListFromString(configuration->structure, context);
     else if (!Base::structure_hint.empty())
-    {
         columns = Base::structure_hint;
-    }
 
+    auto object_storage = Base::getObjectStorage(context, !is_insert_query);
+    StoragePtr storage;
     if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         /// On worker node this filename won't contains globs
         storage = std::make_shared<StorageObjectStorage<StorageSettings>>(
-            Base::configuration,
-            Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query),
+            configuration,
+            object_storage,
             Definition::storage_type_name,
             context,
             StorageID(Base::getDatabaseName(), table_name),
@@ -54,8 +52,8 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
     {
         storage = std::make_shared<StorageObjectStorageCluster<Definition, StorageSettings, Configuration>>(
             ITableFunctionCluster<Base>::cluster_name,
-            Base::configuration,
-            Base::configuration->createOrUpdateObjectStorage(context, !is_insert_query),
+            configuration,
+            object_storage,
             Definition::storage_type_name,
             StorageID(Base::getDatabaseName(), table_name),
             columns,
@@ -87,7 +85,10 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
     {
         .documentation = {
             .description=R"(The table function can be used to read the data stored on Azure Blob Storage in parallel for many nodes in a specified cluster.)",
-            .examples{{"azureBlobStorageCluster", "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])", ""}}},
+            .examples{{
+                "azureBlobStorageCluster",
+                "SELECT * FROM  azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, "
+                "[account_name, account_key, format, compression, structure])", ""}}},
             .allow_readonly = false
         }
     );

From ba0dc7bc54c8e621f63e3ba2f1bdbec15bdb9114 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 14 Feb 2024 10:32:29 +0100
Subject: [PATCH 016/392] fix failing style check and tests

---
 src/IO/ReadHelpers.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index af66cbb4cb5..53a7229e7d5 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -539,7 +539,6 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
 
         if (*buf.position() == '\r')
             ++buf.position();
-
     }
 }
 

From bf12c376b0dde30092f0588a5439d7c7cab5e08b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 16 Feb 2024 13:30:55 +0100
Subject: [PATCH 017/392] fix for fast tests failing on shell test

---
 tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index 1e8dee22d28..88448171516 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 # Data preparation step
-USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH = $($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv
 DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv
 

From 6e6bc97a3e0d8618dc80f5a26bb59f73623d1ccb Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 16 Feb 2024 13:42:58 +0100
Subject: [PATCH 018/392] fix failing style check

---
 tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index 88448171516..cb7472be418 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -5,7 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 # Data preparation step
-USER_FILES_PATH = $($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv
 DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv
 

From 0552f44f70d76f25f268259a09cbbb10dc3781d7 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 19 Feb 2024 10:45:56 +0100
Subject: [PATCH 019/392] Fixes after merge with master, move some part of code
 to object storage

---
 src/Backups/BackupIO_S3.cpp                   |   8 +-
 src/Disks/ObjectStorages/IObjectStorage.h     |   3 +-
 .../ObjectStorages/ObjectStorageFactory.cpp   |   4 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  57 ++++-
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  12 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  | 110 +++++----
 src/Disks/ObjectStorages/S3/diskSettings.h    |  13 +-
 src/IO/S3Common.cpp                           |   7 +-
 src/Storages/Cache/SchemaCache.cpp            |   2 +
 .../ObjectStorage/AzureBlob/Configuration.cpp |   7 +-
 .../ObjectStorage/AzureBlob/Configuration.h   |   2 +-
 .../DataLakes/IStorageDataLake.h              |  18 +-
 .../ObjectStorage/HDFS/Configuration.cpp      |   7 +-
 .../ObjectStorage/HDFS/Configuration.h        |   2 +-
 .../ObjectStorage/ReadBufferIterator.cpp      | 210 +++++++++++++-----
 .../ObjectStorage/ReadBufferIterator.h        |  12 +-
 .../ObjectStorage/S3/Configuration.cpp        | 108 ++-------
 src/Storages/ObjectStorage/S3/Configuration.h |  18 +-
 .../ObjectStorage/StorageObjectStorage.cpp    | 109 ++++++---
 .../ObjectStorage/StorageObjectStorage.h      |  20 +-
 .../StorageObjectStorageCluster.cpp           |  30 ++-
 .../StorageObjectStorageCluster.h             |   8 +-
 .../StorageObjectStorageConfiguration.cpp     |   6 +-
 .../StorageObjectStorageConfiguration.h       |   3 +-
 .../StorageObjectStorageSource.h              |   4 +
 .../registerStorageObjectStorage.cpp          |   2 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |   8 +-
 src/Storages/StorageS3Settings.cpp            |  11 +-
 src/Storages/StorageS3Settings.h              |   8 +-
 .../TableFunctionObjectStorage.cpp            |   6 +-
 .../TableFunctionObjectStorage.h              |   6 +-
 .../TableFunctionObjectStorageCluster.cpp     |   7 +-
 32 files changed, 498 insertions(+), 330 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index fa4c1af3698..6c7b3674fb7 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -127,10 +127,10 @@ BackupReaderS3::BackupReaderS3(
     : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
     , s3_uri(s3_uri_)
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
-    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()))
+    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).value_or(S3Settings{}))
 {
     auto & request_settings = s3_settings.request_settings;
-    request_settings.updateFromSettings(context_->getSettingsRef());
+    request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
     request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
     request_settings.allow_native_copy = allow_s3_native_copy;
     client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
@@ -217,10 +217,10 @@ BackupWriterS3::BackupWriterS3(
     : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
     , s3_uri(s3_uri_)
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
-    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()))
+    , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).value_or(S3Settings{}))
 {
     auto & request_settings = s3_settings.request_settings;
-    request_settings.updateFromSettings(context_->getSettingsRef());
+    request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
     request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
     request_settings.allow_native_copy = allow_s3_native_copy;
     request_settings.setStorageClassName(storage_class_name);
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 8a5352e71ca..5ff618e08eb 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -193,8 +193,7 @@ public:
     virtual void applyNewSettings(
         const Poco::Util::AbstractConfiguration &,
         const std::string & /*config_prefix*/,
-        ContextPtr)
-    {}
+        ContextPtr) {}
 
     /// Sometimes object storages have something similar to chroot or namespace, for example
     /// buckets in S3. If object storage doesn't have any namepaces return empty string.
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index b3626135177..0855ba54d2f 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -126,7 +126,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
         auto uri = getS3URI(config, config_prefix, context);
         auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
         auto settings = getSettings(config, config_prefix, context);
-        auto client = getClient(config, config_prefix, context, *settings);
+        auto client = getClient(config, config_prefix, context, *settings, true);
         auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
 
         auto object_storage = std::make_shared<S3ObjectStorage>(
@@ -162,7 +162,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
         auto uri = getS3URI(config, config_prefix, context);
         auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
         auto settings = getSettings(config, config_prefix, context);
-        auto client = getClient(config, config_prefix, context, *settings);
+        auto client = getClient(config, config_prefix, context, *settings, true);
         auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix);
 
         auto object_storage = std::make_shared<S3PlainObjectStorage>(
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index a9bd520e6e9..7e856b45aea 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -242,7 +242,12 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
     if (mode != WriteMode::Rewrite)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
 
-    auto settings_ptr = s3_settings.get();
+    S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings;
+    if (auto query_context = CurrentThread::getQueryContext())
+    {
+        request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef());
+    }
+
     ThreadPoolCallbackRunner<void> scheduler;
     if (write_settings.s3_allow_parallel_part_upload)
         scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");
@@ -256,7 +261,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
         uri.bucket,
         object.remote_path,
         buf_size,
-        settings_ptr->request_settings,
+        request_settings,
         std::move(blob_storage_log),
         attributes,
         std::move(scheduler),
@@ -534,19 +539,57 @@ void S3ObjectStorage::startup()
     const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
 }
 
-void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
+void S3ObjectStorage::applyNewSettings(
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & config_prefix,
+    ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
-    auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
+    if (!static_headers.empty())
+    {
+        new_s3_settings->auth_settings.headers.insert(
+            new_s3_settings->auth_settings.headers.end(),
+            static_headers.begin(), static_headers.end());
+    }
+
+    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString()))
+        new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
+
+    auto current_s3_settings = s3_settings.get();
+    if (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3)
+    {
+        auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri);
+        client.set(std::move(new_client));
+    }
+
     s3_settings.set(std::move(new_s3_settings));
-    client.set(std::move(new_client));
 }
 
+// void S3ObjectStorage::applyNewSettings(ContextPtr context)
+// {
+//     auto settings = s3_settings.get();
+//     if (!endpoint_settings || !settings->auth_settings.hasUpdates(endpoint_settings->auth_settings))
+//         return;
+//
+//     const auto & config = context->getConfigRef();
+//     auto new_s3_settings = getSettings(uri, config, "s3.", context);
+//
+//     new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
+//
+//     auto new_client = getClient(config, "s3.", context, *new_s3_settings, false);
+//
+//     s3_settings.set(std::move(new_s3_settings));
+//     client.set(std::move(new_client));
+// }
+
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
-    const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
+    const std::string & new_namespace,
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & config_prefix,
+    ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
-    auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
+    auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true);
     String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
 
     auto new_uri{uri};
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index a6843a383e5..187cdb58447 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -21,11 +21,13 @@ struct S3ObjectStorageSettings
 
     S3ObjectStorageSettings(
         const S3Settings::RequestSettings & request_settings_,
+        const S3::AuthSettings & auth_settings_,
         uint64_t min_bytes_for_seek_,
         int32_t list_object_keys_size_,
         int32_t objects_chunk_size_to_delete_,
         bool read_only_)
         : request_settings(request_settings_)
+        , auth_settings(auth_settings_)
         , min_bytes_for_seek(min_bytes_for_seek_)
         , list_object_keys_size(list_object_keys_size_)
         , objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
@@ -33,6 +35,7 @@ struct S3ObjectStorageSettings
     {}
 
     S3Settings::RequestSettings request_settings;
+    S3::AuthSettings auth_settings;
 
     uint64_t min_bytes_for_seek;
     int32_t list_object_keys_size;
@@ -52,7 +55,9 @@ private:
         S3::URI uri_,
         const S3Capabilities & s3_capabilities_,
         ObjectStorageKeysGeneratorPtr key_generator_,
-        const String & disk_name_)
+        const String & disk_name_,
+        bool for_disk_s3_ = true,
+        const HTTPHeaderEntries & static_headers_ = {})
         : uri(uri_)
         , key_generator(std::move(key_generator_))
         , disk_name(disk_name_)
@@ -60,6 +65,8 @@ private:
         , s3_settings(std::move(s3_settings_))
         , s3_capabilities(s3_capabilities_)
         , log(getLogger(logger_name))
+        , for_disk_s3(for_disk_s3_)
+        , static_headers(static_headers_)
     {
     }
 
@@ -180,6 +187,9 @@ private:
     S3Capabilities s3_capabilities;
 
     LoggerPtr log;
+
+    const bool for_disk_s3;
+    const HTTPHeaderEntries static_headers;
 };
 
 /// Do not encode keys, store as-is, and do not require separate disk for metadata.
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 4fd4b17aabe..cb2bb690292 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -10,8 +10,6 @@
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
-#include "Disks/DiskFactory.h"
-
 #include <aws/core/client/DefaultRetryStrategy.h>
 #include <base/getFQDNOrHostName.h>
 #include <IO/S3Common.h>
@@ -25,13 +23,19 @@
 namespace DB
 {
 
-std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
+std::unique_ptr<S3ObjectStorageSettings> getSettings(
+    const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
 {
     const Settings & settings = context->getSettingsRef();
     S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_");
+    /// TODO: add request settings prefix, becausse for StorageS3 it should be "s3."
+
+    S3::AuthSettings auth_settings;
+    auth_settings.loadFromConfig(config_prefix, config);
 
     return std::make_unique<S3ObjectStorageSettings>(
         request_settings,
+        auth_settings,
         config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
         config.getInt(config_prefix + ".list_object_keys_size", 1000),
         config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
@@ -42,78 +46,92 @@ std::unique_ptr<S3::Client> getClient(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
     ContextPtr context,
-    const S3ObjectStorageSettings & settings)
+    const S3ObjectStorageSettings & settings,
+    bool for_disk_s3,
+    const S3::URI * url_)
 {
     const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
     const Settings & local_settings = context->getSettingsRef();
 
-    String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
-    S3::URI uri(endpoint);
-    if (!uri.key.ends_with('/'))
-        uri.key.push_back('/');
+    const auto & auth_settings = settings.auth_settings;
+    const auto & request_settings = settings.request_settings;
+
+    S3::URI url;
+    if (for_disk_s3)
+    {
+        String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
+        S3::URI uri(endpoint);
+        if (!uri.key.ends_with('/'))
+            uri.key.push_back('/');
+    }
+    else
+    {
+        if (!url_)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed");
+        url = *url_;
+    }
 
     S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
-        config.getString(config_prefix + ".region", ""),
+        auth_settings.region,
         context->getRemoteHostFilter(),
         static_cast<int>(global_settings.s3_max_redirects),
         static_cast<int>(global_settings.s3_retry_attempts),
         global_settings.enable_s3_requests_logging,
-        /* for_disk_s3 = */ true,
+        for_disk_s3,
         settings.request_settings.get_request_throttler,
         settings.request_settings.put_request_throttler,
-        uri.uri.getScheme());
+        url.uri.getScheme());
 
+    client_configuration.endpointOverride = url.endpoint;
+    client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
     client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS);
     client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS);
-    client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
-    client_configuration.endpointOverride = uri.endpoint;
-    client_configuration.http_keep_alive_timeout_ms = config.getUInt(
-        config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
-    client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000);
-    client_configuration.wait_on_pool_size_limit = false;
-    client_configuration.s3_use_adaptive_timeouts = config.getBool(
-        config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
-    /*
-     * Override proxy configuration for backwards compatibility with old configuration format.
-     * */
-    auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
-        ProxyConfiguration::protocolFromString(uri.uri.getScheme()),
-        config_prefix,
-        config
-    );
-    if (proxy_config)
+    client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
+    client_configuration.http_connection_pool_size = config.getUInt(
+        config_prefix + ".http_connection_pool_size", static_cast<UInt32>(global_settings.s3_http_connection_pool_size.value));
+    client_configuration.s3_use_adaptive_timeouts = config.getBool(config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
+    client_configuration.wait_on_pool_size_limit = for_disk_s3;
+
+    if (for_disk_s3)
     {
-        client_configuration.per_request_configuration
-            = [proxy_config]() { return proxy_config->resolve(); };
-        client_configuration.error_report
-            = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
+        /*
+        * Override proxy configuration for backwards compatibility with old configuration format.
+        * */
+        if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
+                ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config))
+        {
+            client_configuration.per_request_configuration
+                = [proxy_config]() { return proxy_config->resolve(); };
+            client_configuration.error_report
+                = [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
+        }
     }
 
-    HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config);
     S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config);
-
     S3::ClientSettings client_settings{
-        .use_virtual_addressing = uri.is_virtual_hosted_style,
+        .use_virtual_addressing = url.is_virtual_hosted_style,
         .disable_checksum = local_settings.s3_disable_checksum,
         .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
     };
 
+    auto credentials_configuration = S3::CredentialsConfiguration
+    {
+        auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
+        auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
+        auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
+        auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
+    };
+
     return S3::ClientFactory::instance().create(
         client_configuration,
         client_settings,
-        config.getString(config_prefix + ".access_key_id", ""),
-        config.getString(config_prefix + ".secret_access_key", ""),
-        config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
+        auth_settings.access_key_id,
+        auth_settings.secret_access_key,
+        auth_settings.server_side_encryption_customer_key_base64,
         std::move(sse_kms_config),
-        std::move(headers),
-        S3::CredentialsConfiguration
-        {
-            config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)),
-            config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)),
-            config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
-            config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false))
-        });
+        auth_settings.headers,
+        credentials_configuration);
 }
 
 }
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h
index 83bf7b179ef..194035365ea 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.h
+++ b/src/Disks/ObjectStorages/S3/diskSettings.h
@@ -22,9 +22,18 @@ namespace DB
 
 struct S3ObjectStorageSettings;
 
-std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
+std::unique_ptr<S3ObjectStorageSettings> getSettings(
+    const Poco::Util::AbstractConfiguration & config,
+    const String & config_prefix,
+    ContextPtr context);
 
-std::unique_ptr<S3::Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings);
+std::unique_ptr<S3::Client> getClient(
+    const Poco::Util::AbstractConfiguration & config,
+    const String & config_prefix,
+    ContextPtr context,
+    const S3ObjectStorageSettings & settings,
+    bool for_disk_s3,
+    const S3::URI * url_ = nullptr);
 
 }
 
diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp
index 5039059f522..d33d5284240 100644
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@@ -157,8 +157,11 @@ void AuthSettings::updateFrom(const AuthSettings & from)
     if (!from.session_token.empty())
         session_token = from.session_token;
 
-    headers = from.headers;
-    region = from.region;
+    if (!from.headers.empty())
+        headers = from.headers;
+    if (!from.region.empty())
+        region = from.region;
+
     server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
     server_side_encryption_kms_config = from.server_side_encryption_kms_config;
 
diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp
index 299dd292772..35fb8d348ef 100644
--- a/src/Storages/Cache/SchemaCache.cpp
+++ b/src/Storages/Cache/SchemaCache.cpp
@@ -1,5 +1,6 @@
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/ProfileEvents.h>
+#include <Common/logger_useful.h>
 #include <ctime>
 
 namespace ProfileEvents
@@ -109,6 +110,7 @@ std::optional<SchemaCache::SchemaInfo> SchemaCache::tryGetImpl(const Key & key,
     }
 
     ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits);
+    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", StackTrace().toString());
 
     auto & schema_info = it->second.schema_info;
     auto & queue_iterator = it->second.iterator;
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 109918dfc8b..9d21541e7e2 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -102,7 +102,7 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co
     return settings_ptr;
 }
 
-ObjectStoragePtr StorageAzureBlobConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
     auto client = createClient(is_readonly);
     auto settings = createSettings(context);
@@ -245,8 +245,6 @@ void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection &
     compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
 
     blobs_paths = {blob_path};
-    if (format == "auto")
-        format = FormatFactory::instance().getFormatFromFileName(blob_path, true);
 }
 
 void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure)
@@ -367,9 +365,6 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
     }
 
     blobs_paths = {blob_path};
-
-    if (format == "auto")
-        format = FormatFactory::instance().getFormatFromFileName(blob_path, true);
 }
 
 void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
index deeb365d012..3d701e72cb4 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
@@ -31,7 +31,7 @@ public:
     String getNamespace() const override { return container; }
 
     void check(ContextPtr context) const override;
-    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageAzureBlobConfiguration>(*this); }
 
     void fromNamedCollection(const NamedCollection & collection) override;
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 95196cdd000..8a21fc1152f 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -39,7 +39,7 @@ public:
         std::optional<FormatSettings> format_settings_,
         bool attach)
     {
-        auto object_storage = base_configuration->createOrUpdateObjectStorage(context);
+        auto object_storage = base_configuration->createObjectStorage(context);
         DataLakeMetadataPtr metadata;
         NamesAndTypesList schema_from_metadata;
         ConfigurationPtr configuration = base_configuration->clone();
@@ -75,28 +75,22 @@ public:
         return ColumnsDescription(metadata->getTableSchema());
     }
 
-    std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context) override
+    void updateConfiguration(ContextPtr local_context) override
     {
         std::lock_guard lock(Storage::configuration_update_mutex);
 
-        auto new_object_storage = base_configuration->createOrUpdateObjectStorage(local_context);
-        bool updated = new_object_storage != nullptr;
-        if (updated)
-            Storage::object_storage = new_object_storage;
+        Storage::updateConfiguration(local_context);
 
         auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context);
 
-        if (!current_metadata || !(*current_metadata == *new_metadata))
-            current_metadata = std::move(new_metadata);
-        else if (!updated)
-            return {Storage::configuration, Storage::object_storage};
+        if (current_metadata && *current_metadata == *new_metadata)
+            return;
 
+        current_metadata = std::move(new_metadata);
         auto updated_configuration = base_configuration->clone();
         /// If metadata wasn't changed, we won't list data files again.
         updated_configuration->getPaths() = current_metadata->getDataFiles();
         Storage::configuration = updated_configuration;
-
-        return {Storage::configuration, Storage::object_storage};
     }
 
     template <typename... Args>
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index c80237b3055..731b05f4621 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -27,7 +27,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const
     checkHDFSURL(url);
 }
 
-ObjectStoragePtr StorageHDFSConfiguration::createOrUpdateObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
     UNUSED(is_readonly);
     auto settings = std::make_unique<HDFSObjectStorageSettings>();
@@ -42,16 +42,13 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_str
     if (args.size() > 1)
         format_name = checkAndGetLiteralArgument<String>(args[1], "format_name");
 
-    if (format_name == "auto")
-        format_name = FormatFactory::instance().getFormatFromFileName(url, true);
-
     String compression_method;
     if (args.size() == 3)
         compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
     else
         compression_method = "auto";
-
 }
+
 }
 
 #endif
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 03fb0824123..1013c2e00c2 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -26,7 +26,7 @@ public:
     String getDataSourceDescription() override { return url; }
 
     void check(ContextPtr context) const override;
-    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
     void fromNamedCollection(const NamedCollection &) override {}
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index a3e19b907bc..a0e719878ac 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -10,6 +10,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int CANNOT_DETECT_FORMAT;
 
 }
 
@@ -30,14 +31,15 @@ ReadBufferIterator::ReadBufferIterator(
     , query_settings(query_settings_)
     , schema_cache(schema_cache_)
     , read_keys(read_keys_)
+    , format(configuration->format.empty() || configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))
     , prev_read_keys_size(read_keys_.size())
 {
 }
 
-SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path) const
+SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const
 {
     auto source = fs::path(configuration->getDataSourceDescription()) / path;
-    return DB::getKeyForSchemaCache(source, configuration->format, format_settings, getContext());
+    return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext());
 }
 
 SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const
@@ -51,7 +53,7 @@ SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const
         {
             return fs::path(configuration->getDataSourceDescription()) / elem->relative_path;
         });
-    return DB::getKeysForSchemaCache(sources, configuration->format, format_settings, getContext());
+    return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext());
 }
 
 std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
@@ -75,10 +77,29 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
             }
         };
 
-        auto cache_key = getKeyForSchemaCache(object_info->relative_path);
-        auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time);
-        if (columns)
-            return columns;
+        chassert(object_info);
+        if (format)
+        {
+            auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format);
+            if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                return columns;
+        }
+        else
+        {
+            /// If format is unknown, we can iterate through all possible input formats
+            /// and check if we have an entry with this format and this file in schema cache.
+            /// If we have such entry for some format, we can use this format to read the file.
+            for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
+            {
+                auto cache_key = getKeyForSchemaCache(object_info->relative_path, format_name);
+                if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
+                {
+                    /// Now format is known. It should be the same for all files.
+                    format = format_name;
+                    return columns;
+                }
+            }
+        }
     }
 
     return std::nullopt;
@@ -86,16 +107,18 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
 
 void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows)
 {
+    chassert(current_object_info);
     if (query_settings.schema_inference_use_cache)
-        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path), num_rows);
+        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows);
 }
 
 void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
 {
+    chassert(current_object_info);
     if (query_settings.schema_inference_use_cache
         && query_settings.schema_inference_mode == SchemaInferenceMode::UNION)
     {
-        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path), columns);
+        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path, *format), columns);
     }
 }
 
@@ -108,6 +131,11 @@ void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns)
     }
 }
 
+void ReadBufferIterator::setFormatName(const String & format_name)
+{
+    format = format_name;
+}
+
 String ReadBufferIterator::getLastFileName() const
 {
     if (current_object_info)
@@ -116,64 +144,128 @@ String ReadBufferIterator::getLastFileName() const
         return "";
 }
 
-std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> ReadBufferIterator::next()
+std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
 {
-    /// For default mode check cached columns for currently read keys on first iteration.
-    if (first && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
-    {
-        if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
-            return {nullptr, cached_columns};
-    }
+    chassert(current_object_info);
 
-    current_object_info = file_iterator->next(0);
-    if (!current_object_info || current_object_info->relative_path.empty())
+    auto impl = object_storage->readObject(
+        StoredObject(current_object_info->relative_path), getContext()->getReadSettings());
+
+    int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
+    return wrapReadBufferWithCompressionMethod(
+        std::move(impl), chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
+        zstd_window_log_max);
+}
+
+ReadBufferIterator::Data ReadBufferIterator::next()
+{
+    if (first)
     {
-        if (first)
+        /// If format is unknown we iterate through all currently read keys on first iteration and
+        /// try to determine format by file name.
+        if (!format)
         {
-            throw Exception(
-                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                "Cannot extract table structure from {} format file, "
-                "because there are no files with provided path. "
-                "You must specify table structure manually",
-                configuration->format);
+            for (const auto & object_info : read_keys)
+            {
+                if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->relative_path))
+                {
+                    format = format_from_file_name;
+                    break;
+                }
+            }
+        }
+
+        /// For default mode check cached columns for currently read keys on first iteration.
+        if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+        {
+            if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+                return {nullptr, cached_columns, format};
         }
-        return {nullptr, std::nullopt};
     }
 
-    first = false;
-
-    /// File iterator could get new keys after new iteration,
-    /// check them in schema cache if schema inference mode is default.
-    if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT
-        && read_keys.size() > prev_read_keys_size)
+    while (true)
     {
-        auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
-        prev_read_keys_size = read_keys.size();
-        if (columns_from_cache)
-            return {nullptr, columns_from_cache};
+        current_object_info = file_iterator->next(0);
+
+        if (!current_object_info || current_object_info->relative_path.empty())
+        {
+            if (first)
+            {
+                if (format)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
+                        "in S3 or all files are empty. You can specify table structure manually",
+                        *format);
+
+                throw Exception(
+                    ErrorCodes::CANNOT_DETECT_FORMAT,
+                    "The data format cannot be detected by the contents of the files, because there are no files with provided path "
+                    "in S3 or all files are empty. You can specify the format manually");
+            }
+
+            return {nullptr, std::nullopt, format};
+        }
+
+        /// S3 file iterator could get new keys after new iteration
+        if (read_keys.size() > prev_read_keys_size)
+        {
+            /// If format is unknown we can try to determine it by new file names.
+            if (!format)
+            {
+                for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
+                {
+                    if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->relative_path))
+                    {
+                        format = format_from_file_name;
+                        break;
+                    }
+                }
+            }
+
+            /// Check new files in schema cache if schema inference mode is default.
+            if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
+            {
+                auto columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end());
+                if (columns_from_cache)
+                    return {nullptr, columns_from_cache, format};
+            }
+
+            prev_read_keys_size = read_keys.size();
+        }
+
+        if (getContext()->getSettingsRef().s3_skip_empty_files
+            && current_object_info->metadata && current_object_info->metadata->size_bytes == 0)
+            continue;
+
+        /// In union mode, check cached columns only for current key.
+        if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
+        {
+            ObjectInfos objects{current_object_info};
+            if (auto columns_from_cache = tryGetColumnsFromCache(objects.begin(), objects.end()))
+            {
+                first = false;
+                return {nullptr, columns_from_cache, format};
+            }
+        }
+
+        std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
+            StoredObject(current_object_info->relative_path),
+            getContext()->getReadSettings(),
+            {},
+            current_object_info->metadata->size_bytes);
+
+        if (!getContext()->getSettingsRef().s3_skip_empty_files || !read_buffer->eof())
+        {
+            first = false;
+
+            read_buffer = wrapReadBufferWithCompressionMethod(
+                std::move(read_buffer),
+                chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
+                static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
+
+            return {std::move(read_buffer), std::nullopt, format};
+        }
     }
-    else if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-    {
-        ObjectInfos paths = {current_object_info};
-        if (auto columns_from_cache = tryGetColumnsFromCache(paths.begin(), paths.end()))
-            return {nullptr, columns_from_cache};
-    }
-
-    first = false;
-
-    chassert(current_object_info->metadata);
-    std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
-        StoredObject(current_object_info->relative_path),
-        getContext()->getReadSettings(),
-        {},
-        current_object_info->metadata->size_bytes);
-
-    read_buffer = wrapReadBufferWithCompressionMethod(
-        std::move(read_buffer),
-        chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
-        static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
-
-    return {std::move(read_buffer), std::nullopt};
 }
-
 }
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 4e9b8cfcfca..053bcbf894f 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -2,6 +2,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Formats/ReadSchemaUtils.h>
 
 
@@ -23,7 +24,7 @@ public:
         ObjectInfos & read_keys_,
         const ContextPtr & context_);
 
-    std::pair<std::unique_ptr<ReadBuffer>, std::optional<ColumnsDescription>> next() override;
+    Data next() override;
 
     void setNumRowsToLastFile(size_t num_rows) override;
 
@@ -33,8 +34,14 @@ public:
 
     String getLastFileName() const override;
 
+    void setFormatName(const String & format_name) override;
+
+    bool supportsLastReadBufferRecreation() const override { return true; }
+
+    std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override;
+
 private:
-    SchemaCache::Key getKeyForSchemaCache(const String & path) const;
+    SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const;
     SchemaCache::Keys getPathsForSchemaCache() const;
     std::optional<ColumnsDescription> tryGetColumnsFromCache(
         const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end);
@@ -46,6 +53,7 @@ private:
     const StorageObjectStorageSettings query_settings;
     SchemaCache & schema_cache;
     ObjectInfos & read_keys;
+    std::optional<String> format;
 
     size_t prev_read_keys_size;
     ObjectInfoPtr current_object_info;
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index f057745d669..896131e74d7 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -7,6 +7,7 @@
 #include <Formats/FormatFactory.h>
 #include <boost/algorithm/string.hpp>
 #include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
+#include <Disks/ObjectStorages/S3/diskSettings.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
@@ -58,106 +59,47 @@ void StorageS3Configuration::check(ContextPtr context) const
 StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other)
 {
     url = other.url;
-    auth_settings = other.auth_settings;
-    request_settings = other.request_settings;
     static_configuration = other.static_configuration;
     headers_from_ast = other.headers_from_ast;
     keys = other.keys;
-    initialized = other.initialized;
 
     format = other.format;
     compression_method = other.compression_method;
     structure = other.structure;
 }
 
-ObjectStoragePtr StorageS3Configuration::createOrUpdateObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
+ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
 {
-    auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString());
-    request_settings = s3_settings.request_settings;
-    request_settings.updateFromSettings(context->getSettings());
+    const auto & config = context->getConfigRef();
+    const std::string config_prefix = "s3.";
 
-    if (!initialized || (!static_configuration && auth_settings.hasUpdates(s3_settings.auth_settings)))
+    auto s3_settings = getSettings(config, config_prefix, context);
+
+    auth_settings.updateFrom(s3_settings->auth_settings);
+    s3_settings->auth_settings = auth_settings;
+    s3_settings->request_settings = request_settings;
+
+    if (!headers_from_ast.empty())
     {
-        auth_settings.updateFrom(s3_settings.auth_settings);
-        keys[0] = url.key;
-        initialized = true;
+        s3_settings->auth_settings.headers.insert(
+            s3_settings->auth_settings.headers.end(),
+            headers_from_ast.begin(), headers_from_ast.end());
     }
 
-    const auto & config = context->getConfigRef();
+    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString()))
+        s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
+
+    auto client = getClient(config, config_prefix, context, *s3_settings, false, &url);
+    auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key);
     auto s3_capabilities = S3Capabilities
     {
         .support_batch_delete = config.getBool("s3.support_batch_delete", true),
         .support_proxy = config.getBool("s3.support_proxy", config.has("s3.proxy")),
     };
 
-    auto s3_storage_settings = std::make_unique<S3ObjectStorageSettings>(
-        request_settings,
-        config.getUInt64("s3.min_bytes_for_seek", 1024 * 1024),
-        config.getInt("s3.list_object_keys_size", 1000),
-        config.getInt("s3.objects_chunk_size_to_delete", 1000),
-        config.getBool("s3.readonly", false));
-
-    auto key_generator = createObjectStorageKeysGeneratorAsIsWithPrefix(url.key);
-    auto client = createClient(context);
-    std::string disk_name = "StorageS3";
-
     return std::make_shared<S3ObjectStorage>(
-        std::move(client), std::move(s3_storage_settings), url, s3_capabilities, key_generator, /*disk_name*/disk_name);
-}
-
-std::unique_ptr<S3::Client> StorageS3Configuration::createClient(ContextPtr context)
-{
-    const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
-    const Settings & local_settings = context->getSettingsRef();
-
-    auto client_configuration = S3::ClientFactory::instance().createClientConfiguration(
-        auth_settings.region,
-        context->getRemoteHostFilter(),
-        static_cast<unsigned>(global_settings.s3_max_redirects),
-        static_cast<unsigned>(global_settings.s3_retry_attempts),
-        global_settings.enable_s3_requests_logging,
-        /* for_disk_s3 = */ false,
-        request_settings.get_request_throttler,
-        request_settings.put_request_throttler,
-        url.uri.getScheme());
-
-    client_configuration.endpointOverride = url.endpoint;
-    client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
-    client_configuration.http_connection_pool_size = global_settings.s3_http_connection_pool_size;
-
-    auto headers = auth_settings.headers;
-    if (!headers_from_ast.empty())
-        headers.insert(headers.end(), headers_from_ast.begin(), headers_from_ast.end());
-
-    client_configuration.requestTimeoutMs = request_settings.request_timeout_ms;
-
-    S3::ClientSettings client_settings{
-        .use_virtual_addressing = url.is_virtual_hosted_style,
-        .disable_checksum = local_settings.s3_disable_checksum,
-        .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false),
-    };
-
-    auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id,
-                                                 auth_settings.secret_access_key,
-                                                 auth_settings.session_token);
-
-    auto credentials_configuration = S3::CredentialsConfiguration
-    {
-        auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
-        auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
-        auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
-        auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
-    };
-
-    return S3::ClientFactory::instance().create(
-        client_configuration,
-        client_settings,
-        credentials.GetAWSAccessKeyId(),
-        credentials.GetAWSSecretKey(),
-        auth_settings.server_side_encryption_customer_key_base64,
-        auth_settings.server_side_encryption_kms_config,
-        std::move(headers),
-        credentials_configuration);
+        std::move(client), std::move(s3_settings), url, s3_capabilities,
+        key_generator, "StorageS3", false, headers_from_ast);
 }
 
 void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection)
@@ -185,10 +127,6 @@ void StorageS3Configuration::fromNamedCollection(const NamedCollection & collect
     static_configuration = !auth_settings.access_key_id.empty() || auth_settings.no_sign_request.has_value();
 
     keys = {url.key};
-
-    //if (format == "auto" && get_format_from_file)
-    if (format == "auto")
-        format = FormatFactory::instance().getFormatFromFileName(url.key, true);
 }
 
 void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
@@ -386,10 +324,6 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_
     auth_settings.no_sign_request = no_sign_request;
 
     keys = {url.key};
-
-    // if (format == "auto" && get_format_from_file)
-    if (format == "auto")
-        format = FormatFactory::instance().getFormatFromFileName(url.key, true);
 }
 
 void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index 037cf2eae87..88a084f29b3 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -27,27 +27,25 @@ public:
     String getDataSourceDescription() override;
 
     void check(ContextPtr context) const override;
-    ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
+    bool isStaticConfiguration() const override { return static_configuration; }
 
-    void fromNamedCollection(const NamedCollection & collection) override;
-    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context);
 
 private:
+    void fromNamedCollection(const NamedCollection & collection) override;
+    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+
     S3::URI url;
+    std::vector<String> keys;
+
     S3::AuthSettings auth_settings;
     S3Settings::RequestSettings request_settings;
+    HTTPHeaderEntries headers_from_ast; /// Headers from ast is a part of static configuration.
     /// If s3 configuration was passed from ast, then it is static.
     /// If from config - it can be changed with config reload.
     bool static_configuration = true;
-    /// Headers from ast is a part of static configuration.
-    HTTPHeaderEntries headers_from_ast;
-    std::vector<String> keys;
-
-    std::unique_ptr<S3::Client> createClient(ContextPtr context);
-
-    bool initialized = false;
 };
 
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 2e834da5529..7337a528a76 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -3,6 +3,7 @@
 #include <Formats/FormatFactory.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Processors/Formats/IOutputFormat.h>
+#include <Formats/ReadSchemaUtils.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
@@ -13,8 +14,9 @@
 #include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/ReadBufferIterator.h>
 #include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
+#include <Storages/ObjectStorage/ReadBufferIterator.h>
+#include <Storages/Cache/SchemaCache.h>
 
 
 namespace DB
@@ -39,21 +41,24 @@ std::unique_ptr<StorageInMemoryMetadata> getStorageMetadata(
     const std::string & engine_name,
     const ContextPtr & context)
 {
+    using Storage = StorageObjectStorage<StorageSettings>;
+
     auto storage_metadata = std::make_unique<StorageInMemoryMetadata>();
     if (columns.empty())
     {
-        auto fetched_columns = StorageObjectStorage<StorageSettings>::getTableStructureFromData(
-            object_storage, configuration, format_settings, context);
+        auto fetched_columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context);
         storage_metadata->setColumns(fetched_columns);
     }
+    else if (!columns.hasOnlyOrdinary())
+    {
+        /// We don't allow special columns.
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine {} doesn't support special columns "
+                        "like MATERIALIZED, ALIAS or EPHEMERAL", engine_name);
+    }
     else
     {
-        /// We don't allow special columns.
-        if (!columns.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                            "Table engine {} doesn't support special columns "
-                            "like MATERIALIZED, ALIAS or EPHEMERAL",
-                            engine_name);
+        if (configuration->format == "auto")
+            Storage::setFormatFromData(object_storage, configuration, format_settings, context);
 
         storage_metadata->setColumns(columns);
     }
@@ -120,14 +125,10 @@ bool StorageObjectStorage<StorageSettings>::parallelizeOutputAfterReading(Contex
 }
 
 template <typename StorageSettings>
-std::pair<StorageObjectStorageConfigurationPtr, ObjectStoragePtr>
-StorageObjectStorage<StorageSettings>::updateConfigurationAndGetCopy(ContextPtr local_context)
+void StorageObjectStorage<StorageSettings>::updateConfiguration(ContextPtr context)
 {
-    std::lock_guard lock(configuration_update_mutex);
-    auto new_object_storage = configuration->createOrUpdateObjectStorage(local_context);
-    if (new_object_storage)
-        object_storage = new_object_storage;
-    return {configuration, object_storage};
+    if (!configuration->isStaticConfiguration())
+        object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
 }
 
 template <typename StorageSettings>
@@ -151,8 +152,8 @@ void StorageObjectStorage<StorageSettings>::read(
     size_t max_block_size,
     size_t num_streams)
 {
-    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
-    if (partition_by && query_configuration->withWildcard())
+    updateConfiguration(local_context);
+    if (partition_by && configuration->withWildcard())
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "Reading from a partitioned {} storage is not implemented yet",
@@ -165,8 +166,8 @@ void StorageObjectStorage<StorageSettings>::read(
         && local_context->getSettingsRef().optimize_count_from_files;
 
     auto read_step = std::make_unique<ReadFromStorageObejctStorage>(
-        query_object_storage,
-        query_configuration,
+        object_storage,
+        configuration,
         getName(),
         virtual_columns,
         format_settings,
@@ -192,10 +193,10 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
     ContextPtr local_context,
     bool /* async_insert */)
 {
-    auto [query_configuration, query_object_storage] = updateConfigurationAndGetCopy(local_context);
+    updateConfiguration(local_context);
     const auto sample_block = metadata_snapshot->getSampleBlock();
 
-    if (query_configuration->withWildcard())
+    if (configuration->withWildcard())
     {
         ASTPtr partition_by_ast = nullptr;
         if (auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query))
@@ -209,24 +210,28 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
         if (partition_by_ast)
         {
             return std::make_shared<PartitionedStorageObjectStorageSink>(
-                object_storage, query_configuration, format_settings, sample_block, local_context, partition_by_ast);
+                object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
         }
     }
 
-    if (query_configuration->withGlobs())
+    if (configuration->withGlobs())
     {
         throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
                         "{} key '{}' contains globs, so the table is in readonly mode",
-                        getName(), query_configuration->getPath());
+                        getName(), configuration->getPath());
     }
 
     const auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
+
+    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII: {}", object_storage->exists(StoredObject(configuration->getPath())));
+    auto configuration_copy = configuration->clone();
     if (!storage_settings.truncate_on_insert
-        && object_storage->exists(StoredObject(query_configuration->getPath())))
+        && object_storage->exists(StoredObject(configuration->getPath())))
     {
+        LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 2: {}", storage_settings.create_new_file_on_insert);
         if (storage_settings.create_new_file_on_insert)
         {
-            auto & paths = query_configuration->getPaths();
+            auto & paths = configuration_copy->getPaths();
             size_t index = paths.size();
             const auto & first_key = paths[0];
             auto pos = first_key.find_first_of('.');
@@ -243,6 +248,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
             while (object_storage->exists(StoredObject(new_key)));
 
             paths.push_back(new_key);
+            configuration->getPaths().push_back(new_key);
         }
         else
         {
@@ -251,12 +257,13 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
                 "Object in bucket {} with key {} already exists. "
                 "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you "
                 "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert",
-                query_configuration->getNamespace(), query_configuration->getPaths().back());
+                configuration_copy->getNamespace(), configuration_copy->getPaths().back());
         }
     }
+    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 3: {}", configuration_copy->getPaths().size());
 
     return std::make_shared<StorageObjectStorageSink>(
-        object_storage, query_configuration, format_settings, sample_block, local_context);
+        object_storage, configuration_copy, format_settings, sample_block, local_context);
 }
 
 template <typename StorageSettings>
@@ -279,25 +286,55 @@ void StorageObjectStorage<StorageSettings>::truncate(
 }
 
 template <typename StorageSettings>
-ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromData(
-    ObjectStoragePtr object_storage,
+std::unique_ptr<ReadBufferIterator> StorageObjectStorage<StorageSettings>::createReadBufferIterator(
+    const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
-    ContextPtr context)
+    ObjectInfos & read_keys,
+    const ContextPtr & context)
 {
-    ObjectInfos read_keys;
     const auto settings = StorageSettings::create(context->getSettingsRef());
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
         configuration, object_storage, /* distributed_processing */false,
         context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size,
         StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled());
 
-    ReadBufferIterator read_buffer_iterator(
+    return std::make_unique<ReadBufferIterator>(
         object_storage, configuration, file_iterator,
         format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context);
+}
 
-    const bool retry = configuration->withGlobs();
-    return readSchemaFromFormat(configuration->format, format_settings, read_buffer_iterator, retry, context);
+template <typename StorageSettings>
+ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromData(
+    const ObjectStoragePtr & object_storage,
+    const ConfigurationPtr & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context)
+{
+    ObjectInfos read_keys;
+    auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    if (configuration->format == "auto")
+    {
+        auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
+        configuration->format = format;
+        return columns;
+    }
+    else
+    {
+        return readSchemaFromFormat(configuration->format, format_settings, *read_buffer_iterator, context);
+    }
+}
+
+template <typename StorageSettings>
+void StorageObjectStorage<StorageSettings>::setFormatFromData(
+    const ObjectStoragePtr & object_storage,
+    const ConfigurationPtr & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context)
+{
+    ObjectInfos read_keys;
+    auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second;
 }
 
 template class StorageObjectStorage<S3StorageSettings>;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 6f18153c7af..64c4c74ab22 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -21,6 +21,7 @@ using ReadTaskCallback = std::function<String()>;
 class IOutputFormat;
 class IInputFormat;
 class SchemaCache;
+class ReadBufferIterator;
 
 
 template <typename StorageSettings>
@@ -89,13 +90,26 @@ public:
     static SchemaCache & getSchemaCache(const ContextPtr & context);
 
     static ColumnsDescription getTableStructureFromData(
-        ObjectStoragePtr object_storage,
+        const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
-        ContextPtr context);
+        const ContextPtr & context);
+
+    static void setFormatFromData(
+        const ObjectStoragePtr & object_storage,
+        const ConfigurationPtr & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context);
 
 protected:
-    virtual std::pair<ConfigurationPtr, ObjectStoragePtr> updateConfigurationAndGetCopy(ContextPtr local_context);
+    virtual void updateConfiguration(ContextPtr local_context);
+
+    static std::unique_ptr<ReadBufferIterator> createReadBufferIterator(
+        const ObjectStoragePtr & object_storage,
+        const ConfigurationPtr & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        ObjectInfos & read_keys,
+        const ContextPtr & context);
 
     const std::string engine_name;
     const NamesAndTypesList virtual_columns;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index f0d9ea400c4..2bd2c022aa8 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -33,12 +33,10 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::Storage
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
-    ContextPtr context_,
-    bool structure_argument_was_provided_)
+    ContextPtr context_)
     : IStorageCluster(cluster_name_,
                       table_id_,
-                      getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)),
-                      structure_argument_was_provided_)
+                      getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)))
     , engine_name(engine_name_)
     , configuration{configuration_}
     , object_storage(object_storage_)
@@ -48,13 +46,16 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::Storage
 
     if (columns_.empty())
     {
-        /// `format_settings` is set to std::nullopt, because StorageObjectStorageCluster is used only as table function
-        auto columns = StorageObjectStorage<StorageSettings>::getTableStructureFromData(
-            object_storage, configuration, /*format_settings=*/std::nullopt, context_);
+        ColumnsDescription columns = Storage::getTableStructureFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_);
         storage_metadata.setColumns(columns);
     }
     else
+    {
+        if (configuration->format == "auto")
+            StorageS3::setFormatFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_);
+
         storage_metadata.setColumns(columns_);
+    }
 
     storage_metadata.setConstraints(constraints_);
     setInMemoryMetadata(storage_metadata);
@@ -64,9 +65,9 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::Storage
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::addColumnsStructureToQuery(
+void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::updateQueryToSendIfNeeded(
     ASTPtr & query,
-    const String & structure,
+    const DB::StorageSnapshotPtr & storage_snapshot,
     const ContextPtr & context)
 {
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
@@ -76,13 +77,18 @@ void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::ad
                         "Expected SELECT query from table function {}, got '{}'",
                         engine_name, queryToString(query));
     }
-    using TableFunction = TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>;
-    TableFunction::addColumnsStructureToArguments(expression_list->children, structure, context);
+
+    TableFunction::updateStructureAndFormatArgumentsIfNeeded(
+        expression_list->children,
+        storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(),
+        configuration->format,
+        context);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
 RemoteQueryExecutor::Extension
-StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
+StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(
+    const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
 {
     const auto settings = StorageSettings::create(local_context->getSettingsRef());
     auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index 507de20e888..5d77d4ced60 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -21,6 +21,7 @@ class StorageObjectStorageCluster : public IStorageCluster
 {
 public:
     using Storage = StorageObjectStorage<StorageSettings>;
+    using TableFunction = TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>;
 
     StorageObjectStorageCluster(
         const String & cluster_name_,
@@ -30,8 +31,7 @@ public:
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
-        ContextPtr context_,
-        bool structure_argument_was_provided_);
+        ContextPtr context_);
 
     std::string getName() const override { return engine_name; }
 
@@ -49,9 +49,9 @@ public:
 private:
     void updateBeforeRead(const ContextPtr & /* context */) override {}
 
-    void addColumnsStructureToQuery(
+    void updateQueryToSendIfNeeded(
         ASTPtr & query,
-        const String & structure,
+        const StorageSnapshotPtr & storage_snapshot,
         const ContextPtr & context) override;
 
     const String & engine_name;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 651f1d25ec1..a1c7d468fa6 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -1,5 +1,5 @@
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-
+#include <Formats/FormatFactory.h>
 
 namespace DB
 {
@@ -14,6 +14,10 @@ void StorageObjectStorageConfiguration::initialize(
         configuration.fromNamedCollection(*named_collection);
     else
         configuration.fromAST(engine_args, local_context, with_table_structure);
+
+    // FIXME: it should be - if (format == "auto" && get_format_from_file)
+    if (configuration.format == "auto")
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
 }
 
 bool StorageObjectStorageConfiguration::withWildcard() const
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 04b2d8e8fd9..2da262eb55d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -39,8 +39,9 @@ public:
     std::string getPathWithoutGlob() const;
 
     virtual void check(ContextPtr context) const = 0;
-    virtual ObjectStoragePtr createOrUpdateObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
+    virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
     virtual StorageObjectStorageConfigurationPtr clone() = 0;
+    virtual bool isStaticConfiguration() const { return true; }
 
     String format = "auto";
     String compression_method = "auto";
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 14e59312c8c..3b503fd4f0c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -5,10 +5,14 @@
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
+#include <Processors/Formats/IInputFormat.h>
 
 
 namespace DB
 {
+
+class SchemaCache;
+
 class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext
 {
     friend class StorageS3QueueSource;
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index e23457c04e9..3271b766f68 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -56,7 +56,7 @@ static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjec
 
     return std::make_shared<StorageObjectStorage<StorageSettings>>(
         configuration,
-        configuration->createOrUpdateObjectStorage(context),
+        configuration->createObjectStorage(context),
         engine_name,
         args.getContext(),
         args.table_id,
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 2673aa94347..bd526ad687b 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -134,7 +134,7 @@ StorageS3Queue::StorageS3Queue(
 
     checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef());
 
-    object_storage = configuration->createOrUpdateObjectStorage(context_);
+    object_storage = configuration->createObjectStorage(context_);
     FormatFactory::instance().checkFormatName(configuration->format);
     configuration->check(context_);
 
@@ -146,8 +146,10 @@ StorageS3Queue::StorageS3Queue(
     }
     else
     {
-        if (configuration.format == "auto")
-            configuration.format = StorageS3::getTableStructureAndFormatFromData(configuration, format_settings, context_).second;
+        if (configuration->format == "auto")
+        {
+            StorageObjectStorage<S3StorageSettings>::setFormatFromData(object_storage, configuration, format_settings, context_);
+        }
         storage_metadata.setColumns(columns_);
     }
 
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index b0c1160429a..8510a6e4bdd 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -21,7 +21,7 @@ namespace ErrorCodes
 
 S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings)
 {
-    updateFromSettingsImpl(settings, false);
+    updateFromSettings(settings, false);
     validate();
 }
 
@@ -66,7 +66,7 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedC
     validate();
 }
 
-void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed)
+void S3Settings::RequestSettings::PartUploadSettings::updateFromSettings(const Settings & settings, bool if_changed)
 {
     if (!if_changed || settings.s3_strict_upload_part_size.changed)
         strict_upload_part_size = settings.s3_strict_upload_part_size;
@@ -263,13 +263,12 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin
         request_timeout_ms = settings.s3_request_timeout_ms;
 }
 
-void S3Settings::RequestSettings::updateFromSettings(const Settings & settings)
+void S3Settings::RequestSettings::updateFromSettingsIfChanged(const Settings & settings)
 {
     updateFromSettingsImpl(settings, true);
-    upload_settings.updateFromSettings(settings);
+    upload_settings.updateFromSettings(settings, true);
 }
 
-
 void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings)
 {
     std::lock_guard lock(mutex);
@@ -293,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
     }
 }
 
-S3Settings StorageS3Settings::getSettings(const String & endpoint) const
+std::optional<S3Settings> StorageS3Settings::getSettings(const String & endpoint) const
 {
     std::lock_guard lock(mutex);
     auto next_prefix_setting = s3_settings.upper_bound(endpoint);
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index 0e152bb2d31..a4bc9f0b5cf 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -39,7 +39,7 @@ struct S3Settings
             size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024;
             String storage_class_name;
 
-            void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); }
+            void updateFromSettings(const Settings & settings, bool if_changed);
             void validate();
 
         private:
@@ -52,8 +52,6 @@ struct S3Settings
                 const Settings & settings,
                 String setting_name_prefix = {});
 
-            void updateFromSettingsImpl(const Settings & settings, bool if_changed);
-
             friend struct RequestSettings;
         };
 
@@ -96,7 +94,7 @@ struct S3Settings
             const Settings & settings,
             String setting_name_prefix = {});
 
-        void updateFromSettings(const Settings & settings);
+        void updateFromSettingsIfChanged(const Settings & settings);
 
     private:
         void updateFromSettingsImpl(const Settings & settings, bool if_changed);
@@ -112,7 +110,7 @@ class StorageS3Settings
 public:
     void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings);
 
-    S3Settings getSettings(const String & endpoint) const;
+    std::optional<S3Settings> getSettings(const String & endpoint) const;
 
 private:
     mutable std::mutex mutex;
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index a48c95469d0..b07b328eed9 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -31,7 +31,7 @@ ObjectStoragePtr TableFunctionObjectStorage<
     Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
 {
     if (!object_storage)
-        object_storage = configuration->createOrUpdateObjectStorage(context, create_readonly);
+        object_storage = configuration->createObjectStorage(context, create_readonly);
     return object_storage;
 }
 
@@ -63,8 +63,8 @@ std::vector<size_t> TableFunctionObjectStorage<
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context)
+void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
+        ASTs & args, const String & structure, const String & /* format */, const ContextPtr & context)
 {
     Configuration::addStructureToArgs(args, structure, context);
 }
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
index 5e180301862..9022f6e577f 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.h
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -110,7 +110,11 @@ public:
 
     virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
 
-    static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context);
+    static void updateStructureAndFormatArgumentsIfNeeded(
+      ASTs & args,
+      const String & structure,
+      const String & format,
+      const ContextPtr & context);
 
 protected:
     using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 5a29a693431..55b41cf6ca8 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -20,12 +20,10 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
     const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
 {
     using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
-
     auto configuration = Base::getConfiguration();
-    bool structure_argument_was_provided = configuration->structure != "auto";
 
     ColumnsDescription columns;
-    if (structure_argument_was_provided)
+    if (configuration->structure != "auto")
         columns = parseColumnsListFromString(configuration->structure, context);
     else if (!Base::structure_hint.empty())
         columns = Base::structure_hint;
@@ -58,8 +56,7 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
             StorageID(Base::getDatabaseName(), table_name),
             columns,
             ConstraintsDescription{},
-            context,
-            structure_argument_was_provided);
+            context);
     }
 
     storage->startup();

From 2e9b6545b6f060e1fa92970276116734f483f417 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 19 Feb 2024 18:24:23 +0100
Subject: [PATCH 020/392] Fix

---
 src/Disks/ObjectStorages/S3/diskSettings.cpp  | 16 ++++++-------
 src/Storages/Cache/SchemaCache.cpp            |  1 -
 .../ObjectStorage/StorageObjectStorage.cpp    |  3 ---
 .../StorageObjectStorageCluster.cpp           |  3 ++-
 .../StorageObjectStorageSource.cpp            | 24 ++++++++++---------
 .../TableFunctionObjectStorageCluster.cpp     |  2 +-
 6 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index cb2bb690292..43b1cffb3e6 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -27,12 +27,8 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(
     const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
 {
     const Settings & settings = context->getSettingsRef();
-    S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_");
-    /// TODO: add request settings prefix, becausse for StorageS3 it should be "s3."
-
-    S3::AuthSettings auth_settings;
-    auth_settings.loadFromConfig(config_prefix, config);
-
+    auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_");
+    auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
     return std::make_unique<S3ObjectStorageSettings>(
         request_settings,
         auth_settings,
@@ -60,9 +56,9 @@ std::unique_ptr<S3::Client> getClient(
     if (for_disk_s3)
     {
         String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
-        S3::URI uri(endpoint);
-        if (!uri.key.ends_with('/'))
-            uri.key.push_back('/');
+        url = S3::URI(endpoint);
+        if (!url.key.ends_with('/'))
+            url.key.push_back('/');
     }
     else
     {
@@ -123,6 +119,8 @@ std::unique_ptr<S3::Client> getClient(
         auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
     };
 
+    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {} - {}", auth_settings.access_key_id, auth_settings.secret_access_key);
+
     return S3::ClientFactory::instance().create(
         client_configuration,
         client_settings,
diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp
index 35fb8d348ef..5dc39f04ae0 100644
--- a/src/Storages/Cache/SchemaCache.cpp
+++ b/src/Storages/Cache/SchemaCache.cpp
@@ -110,7 +110,6 @@ std::optional<SchemaCache::SchemaInfo> SchemaCache::tryGetImpl(const Key & key,
     }
 
     ProfileEvents::increment(ProfileEvents::SchemaInferenceCacheHits);
-    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {}", StackTrace().toString());
 
     auto & schema_info = it->second.schema_info;
     auto & queue_iterator = it->second.iterator;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 7337a528a76..30f5c36879c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -223,12 +223,10 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 
     const auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
 
-    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII: {}", object_storage->exists(StoredObject(configuration->getPath())));
     auto configuration_copy = configuration->clone();
     if (!storage_settings.truncate_on_insert
         && object_storage->exists(StoredObject(configuration->getPath())))
     {
-        LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 2: {}", storage_settings.create_new_file_on_insert);
         if (storage_settings.create_new_file_on_insert)
         {
             auto & paths = configuration_copy->getPaths();
@@ -260,7 +258,6 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
                 configuration_copy->getNamespace(), configuration_copy->getPaths().back());
         }
     }
-    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII 3: {}", configuration_copy->getPaths().size());
 
     return std::make_shared<StorageObjectStorageSink>(
         object_storage, configuration_copy, format_settings, sample_block, local_context);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 2bd2c022aa8..9b98051086d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -92,7 +92,8 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTask
 {
     const auto settings = StorageSettings::create(local_context->getSettingsRef());
     auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
-        object_storage, configuration, predicate, virtual_columns, local_context, nullptr, settings.list_object_keys_size);
+        object_storage, configuration, predicate, virtual_columns, local_context,
+        nullptr, settings.list_object_keys_size, local_context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index a8bde4cd56f..d91850bf99c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -362,9 +362,9 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
     else
     {
-        const auto key_with_globs = configuration_->getPath();
-        auto object_metadata = object_storage->getObjectMetadata(key_with_globs);
-        auto object_info = std::make_shared<ObjectInfo>(key_with_globs, object_metadata);
+        const auto object_key = configuration_->getPath();
+        auto object_metadata = object_storage->getObjectMetadata(object_key);
+        auto object_info = std::make_shared<ObjectInfo>(object_key, object_metadata);
 
         object_infos.emplace_back(object_info);
         if (read_keys)
@@ -381,12 +381,11 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
 {
     std::lock_guard lock(next_mutex);
 
-    if (is_finished)
+    bool current_batch_processed = object_infos.empty() || index >= object_infos.size();
+    if (is_finished && current_batch_processed)
         return {};
 
-    bool need_new_batch = object_infos.empty() || index >= object_infos.size();
-
-    if (need_new_batch)
+    if (current_batch_processed)
     {
         ObjectInfos new_batch;
         while (new_batch.empty())
@@ -439,11 +438,10 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
         }
     }
 
-    size_t current_index = index++;
-    if (current_index >= object_infos.size())
+    if (index >= object_infos.size())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Index out of bound for blob metadata");
 
-    return object_infos[current_index];
+    return object_infos[index++];
 }
 
 StorageObjectStorageSource::KeysIterator::KeysIterator(
@@ -532,7 +530,11 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
     pool.wait();
     buffer.reserve(max_threads_count);
     for (auto & key_future : keys)
-        buffer.emplace_back(std::make_shared<ObjectInfo>(key_future.get(), std::nullopt));
+    {
+        auto key = key_future.get();
+        if (!key.empty())
+            buffer.emplace_back(std::make_shared<ObjectInfo>(key, std::nullopt));
+    }
 }
 
 ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t)
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 55b41cf6ca8..4ec94cfaf7c 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -43,7 +43,7 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
             ConstraintsDescription{},
             /* comment */String{},
             /* format_settings */std::nullopt, /// No format_settings
-            /* distributed_processing */ true,
+            /* distributed_processing */true,
             /*partition_by_=*/nullptr);
     }
     else

From c146446ab33588973e75e3f57ccbd7bb299d5c06 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 19 Feb 2024 20:29:22 +0100
Subject: [PATCH 021/392] Fix style check

---
 src/Disks/ObjectStorages/S3/diskSettings.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 43b1cffb3e6..6fec4758456 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -22,6 +22,10 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
 
 std::unique_ptr<S3ObjectStorageSettings> getSettings(
     const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)

From d88f8646b180f0ca4fec7bab5c9c9c7cc7574c0c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 21 Feb 2024 11:03:12 +0100
Subject: [PATCH 022/392] Fix after merge with master

---
 src/Coordination/Standalone/Context.cpp         | 15 +++++++++++++++
 src/Coordination/Standalone/Context.h           |  3 +++
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp |  2 +-
 src/Storages/ObjectStorage/S3/Configuration.cpp |  2 +-
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index 374610769c4..c16ecbfd5c3 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -77,6 +77,8 @@ struct ContextSharedPart : boost::noncopyable
 
     mutable ThrottlerPtr local_read_throttler;              /// A server-wide throttler for local IO reads
     mutable ThrottlerPtr local_write_throttler;             /// A server-wide throttler for local IO writes
+
+    std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex);   /// Settings of S3 storage
 };
 
 ContextData::ContextData() = default;
@@ -382,4 +384,17 @@ std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
     throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper");
 }
 
+const StorageS3Settings & Context::getStorageS3Settings() const
+{
+    std::lock_guard lock(shared->mutex);
+
+    if (!shared->storage_s3_settings)
+    {
+        const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config();
+        shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef());
+    }
+
+    return *shared->storage_s3_settings;
+}
+
 }
diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h
index 49ad2b568fe..3346a865f0f 100644
--- a/src/Coordination/Standalone/Context.h
+++ b/src/Coordination/Standalone/Context.h
@@ -36,6 +36,7 @@ class FilesystemCacheLog;
 class FilesystemReadPrefetchesLog;
 class BlobStorageLog;
 class IOUringReader;
+class StorageS3Settings;
 
 /// A small class which owns ContextShared.
 /// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@@ -160,6 +161,8 @@ public:
     void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config);
 
     zkutil::ZooKeeperPtr getZooKeeper() const;
+
+    const StorageS3Settings & getStorageS3Settings() const;
 };
 
 }
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index a75a747f334..0869e2ebbd2 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -552,7 +552,7 @@ void S3ObjectStorage::applyNewSettings(
             static_headers.begin(), static_headers.end());
     }
 
-    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString()))
+    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
         new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
 
     auto current_s3_settings = s3_settings.get();
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 896131e74d7..47e7ebd53a6 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -86,7 +86,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context,
             headers_from_ast.begin(), headers_from_ast.end());
     }
 
-    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString()))
+    if (auto endpoint_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()))
         s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
 
     auto client = getClient(config, config_prefix, context, *s3_settings, false, &url);

From 94c44cefc89fbb471505aedd803600bc8ace7a49 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 21 Feb 2024 16:24:23 +0100
Subject: [PATCH 023/392] Fix clang tidy

---
 src/Storages/ObjectStorage/AzureBlob/Configuration.cpp     | 5 +----
 src/Storages/ObjectStorage/HDFS/Configuration.cpp          | 4 +---
 src/Storages/ObjectStorage/S3/Configuration.cpp            | 5 +----
 .../ObjectStorage/StorageObjectStorageConfiguration.cpp    | 7 +++++++
 .../ObjectStorage/StorageObjectStorageConfiguration.h      | 1 +
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 9d21541e7e2..7a670441e72 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -78,6 +78,7 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const
 }
 
 StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other)
+    : StorageObjectStorageConfiguration(other)
 {
     connection_url = other.connection_url;
     is_connection_string = other.is_connection_string;
@@ -86,10 +87,6 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB
     container = other.container;
     blob_path = other.blob_path;
     blobs_paths = other.blobs_paths;
-
-    format = other.format;
-    compression_method = other.compression_method;
-    structure = other.structure;
 }
 
 AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context)
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 731b05f4621..2f2427edb24 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -12,13 +12,11 @@ namespace DB
 {
 
 StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)
+    : StorageObjectStorageConfiguration(other)
 {
     url = other.url;
     path = other.path;
     paths = other.paths;
-    format = other.format;
-    compression_method = other.compression_method;
-    structure = other.structure;
 }
 
 void StorageHDFSConfiguration::check(ContextPtr context) const
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 47e7ebd53a6..1e14ccc4b31 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -57,15 +57,12 @@ void StorageS3Configuration::check(ContextPtr context) const
 }
 
 StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other)
+    : StorageObjectStorageConfiguration(other)
 {
     url = other.url;
     static_configuration = other.static_configuration;
     headers_from_ast = other.headers_from_ast;
     keys = other.keys;
-
-    format = other.format;
-    compression_method = other.compression_method;
-    structure = other.structure;
 }
 
 ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index a1c7d468fa6..8a4dee2c31b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -20,6 +20,13 @@ void StorageObjectStorageConfiguration::initialize(
         configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
 }
 
+StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other)
+{
+    format = other.format;
+    compression_method = other.compression_method;
+    structure = other.structure;
+}
+
 bool StorageObjectStorageConfiguration::withWildcard() const
 {
     static const String PARTITION_ID_WILDCARD = "{_partition_id}";
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 2da262eb55d..47afbc5d0c6 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -12,6 +12,7 @@ class StorageObjectStorageConfiguration
 {
 public:
     StorageObjectStorageConfiguration() = default;
+    StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other);
     virtual ~StorageObjectStorageConfiguration() = default;
 
     using Path = std::string;

From 6b5953859ec7fbd22728426e8110162b57b1b9aa Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 21 Feb 2024 17:59:11 +0100
Subject: [PATCH 024/392] Check for deserializeTextEscaped in other
 serializations, fix review changes

---
 .../SerializationAggregateFunction.cpp        |  4 +-
 .../SerializationCustomSimpleText.cpp         |  2 +-
 .../Serializations/SerializationEnum.cpp      |  2 +-
 .../Serializations/SerializationObject.cpp    |  4 +-
 .../Serializations/SerializationVariant.cpp   |  4 +-
 src/IO/ReadHelpers.cpp                        | 42 ++++++++++++++-----
 src/IO/ReadHelpers.h                          |  1 -
 .../Impl/TabSeparatedRowInputFormat.cpp       | 14 +++----
 8 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
index c9af5d1f838..28a4fcf86da 100644
--- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
+++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp
@@ -150,10 +150,10 @@ void SerializationAggregateFunction::serializeTextEscaped(const IColumn & column
 }
 
 
-void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void SerializationAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     String s;
-    readEscapedString(s, istr);
+    settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr);
     deserializeFromString(function, column, s, version);
 }
 
diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp
index abe443cab1b..a3b0b088b17 100644
--- a/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp
+++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.cpp
@@ -75,7 +75,7 @@ void SerializationCustomSimpleText::serializeTextEscaped(const IColumn & column,
 void SerializationCustomSimpleText::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     String str;
-    readEscapedString(str, istr);
+    settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(str, istr) : readEscapedString(str, istr);
     deserializeFromString(*this, column, str, settings);
 }
 
diff --git a/src/DataTypes/Serializations/SerializationEnum.cpp b/src/DataTypes/Serializations/SerializationEnum.cpp
index 14b1a33e2ce..f44ae2fd4f9 100644
--- a/src/DataTypes/Serializations/SerializationEnum.cpp
+++ b/src/DataTypes/Serializations/SerializationEnum.cpp
@@ -29,7 +29,7 @@ void SerializationEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffe
     {
         /// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
         std::string field_name;
-        readEscapedString(field_name, istr);
+        settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field_name, istr) : readEscapedString(field_name, istr);
         assert_cast<ColumnType &>(column).getData().push_back(ref_enum_values.getValue(StringRef(field_name), true));
     }
 }
diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp
index e6dc16ef5a0..9d0ff5903b1 100644
--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@@ -105,9 +105,9 @@ void SerializationObject<Parser>::deserializeWholeText(IColumn & column, ReadBuf
 }
 
 template <typename Parser>
-void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
+void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); });
+    deserializeTextImpl(column, [&](String & s) { settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(s, istr) : readEscapedString(s, istr); });
 }
 
 template <typename Parser>
diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp
index 5af94364167..a4e77b9c75f 100644
--- a/src/DataTypes/Serializations/SerializationVariant.cpp
+++ b/src/DataTypes/Serializations/SerializationVariant.cpp
@@ -604,14 +604,14 @@ void SerializationVariant::serializeTextEscaped(const IColumn & column, size_t r
 bool SerializationVariant::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     String field;
-    readEscapedString(field, istr);
+    settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr);
     return tryDeserializeTextEscapedImpl(column, field, settings);
 }
 
 void SerializationVariant::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
     String field;
-    readEscapedString(field, istr);
+    settings.tsv.crlf_end_of_line_input ? readEscapedStringCRLF(field, istr) : readEscapedString(field, istr);
     if (!tryDeserializeTextEscapedImpl(column, field, settings))
         throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse escaped value of type {} here: {}", variant_name, field);
 }
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index 53a7229e7d5..e763d627f40 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -537,8 +537,19 @@ void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf)
             }
         }
 
-        if (*buf.position() == '\r')
-            ++buf.position();
+        if constexpr (support_crlf)
+        {
+            if (*buf.position() == '\r')
+            {
+                ++buf.position();
+                if (!buf.eof() && *buf.position() != '\n')
+                {
+                    s.push_back('\r');
+                    continue;
+                }
+                return;
+            }
+        }
     }
 }
 
@@ -555,11 +566,10 @@ void readEscapedString(String & s, ReadBuffer & buf)
     readEscapedStringInto<String,false>(s, buf);
 }
 
-template<bool support_crlf>
 void readEscapedStringCRLF(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readEscapedStringInto<String,support_crlf>(s, buf);
+    readEscapedStringInto<String,true>(s, buf);
 }
 
 template void readEscapedStringInto<PaddedPODArray<UInt8>,false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
@@ -567,9 +577,6 @@ template void readEscapedStringInto<NullOutput,false>(NullOutput & s, ReadBuffer
 template void readEscapedStringInto<PaddedPODArray<UInt8>,true>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
 template void readEscapedStringInto<NullOutput,true>(NullOutput & s, ReadBuffer & buf);
 
-template void readEscapedStringCRLF<true>(String & s, ReadBuffer & buf);
-template void readEscapedStringCRLF<false>(String & s, ReadBuffer & buf);
-
 /** If enable_sql_style_quoting == true,
   *  strings like 'abc''def' will be parsed as abc'def.
   * Please note, that even with SQL style quoting enabled,
@@ -1975,13 +1982,26 @@ bool tryReadJSONField(String & s, ReadBuffer & buf)
     return readParsedValueInto<bool>(s, buf, parse_func);
 }
 
-template<bool support_crlf>
+template<bool supports_crlf>
+void readTSVFieldImpl(String & s, ReadBuffer & buf)
+{
+    if constexpr (supports_crlf)
+        readEscapedStringIntoImpl<String, false, false>(s, buf);
+    else
+        readEscapedStringIntoImpl<String, false, true>(s, buf);
+}
+
 void readTSVField(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readEscapedStringIntoImpl<String, false, support_crlf>(s, buf);
+    readTSVFieldImpl<false>(s, buf);
 }
 
-template void readTSVField<true>(String & s, ReadBuffer & buf);
-template void readTSVField<false>(String & s, ReadBuffer & buf);
+void readTSVFieldCRLF(String & s, ReadBuffer & buf)
+{
+    s.clear();
+    readTSVFieldImpl<true>(s, buf);
+}
+
+
 }
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 79014666ce1..3a20d2480b8 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -581,7 +581,6 @@ void readString(String & s, ReadBuffer & buf);
 
 void readEscapedString(String & s, ReadBuffer & buf);
 
-template<bool support_crlf>
 void readEscapedStringCRLF(String & s, ReadBuffer & buf);
 
 void readQuotedString(String & s, ReadBuffer & buf);
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index dbd939effe1..c92cd1c39a0 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -24,17 +24,14 @@ namespace ErrorCodes
 
 /** Check for a common error case - usage of Windows line feed.
   */
-template<bool supports_crlf>
 static void checkForCarriageReturn(ReadBuffer & in)
 {
-    bool crlf_escaped = false;
-    if constexpr (supports_crlf)
-        crlf_escaped = true;
-    if (!in.eof() && (in.position()[0] == '\r' || (crlf_escaped ? false : (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))))
+    if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r')))
         throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row."
             "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format."
             " You must transform your file to Unix format."
-            "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.");
+            "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r"
+            "\nor else enable setting 'input_format_tsv_crlf_end_of_line'");
 }
 
 TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(
@@ -104,7 +101,8 @@ void TabSeparatedFormatReader::skipRowEndDelimiter()
     }
     if (unlikely(first_row))
     {
-        supports_crlf ? checkForCarriageReturn<true>(*buf) : checkForCarriageReturn<false>(*buf);
+        if (!supports_crlf)
+            checkForCarriageReturn(*buf);
         first_row = false;
     }
     assertChar('\n', *buf);
@@ -120,7 +118,7 @@ String TabSeparatedFormatReader::readFieldIntoString()
     else
     {
         if constexpr (read_string)
-            support_crlf ? readEscapedStringCRLF<true>(field, *buf) : readEscapedStringCRLF<false>(field, *buf);
+            support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf);
         else
             support_crlf ? readTSVField<true>(field, *buf) : readTSVField<false>(field, *buf);
     }

From e60ead336ab14f0dfaba1a34022d0b9e0bbf82c2 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 21 Feb 2024 18:14:22 +0100
Subject: [PATCH 025/392] remove readEscapedStringInto function

---
 src/IO/ReadHelpers.cpp | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index e763d627f40..dec8a14fae7 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -1982,25 +1982,16 @@ bool tryReadJSONField(String & s, ReadBuffer & buf)
     return readParsedValueInto<bool>(s, buf, parse_func);
 }
 
-template<bool supports_crlf>
-void readTSVFieldImpl(String & s, ReadBuffer & buf)
-{
-    if constexpr (supports_crlf)
-        readEscapedStringIntoImpl<String, false, false>(s, buf);
-    else
-        readEscapedStringIntoImpl<String, false, true>(s, buf);
-}
-
 void readTSVField(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readTSVFieldImpl<false>(s, buf);
+    readEscapedStringIntoImpl<String, false, false>(s, buf);
 }
 
 void readTSVFieldCRLF(String & s, ReadBuffer & buf)
 {
     s.clear();
-    readTSVFieldImpl<true>(s, buf);
+    readEscapedStringIntoImpl<String, false, true>(s, buf);
 }
 
 
From e46c3c63fae5aa1d6ae17b53aa03e5b07ba5220b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 21 Feb 2024 18:24:58 +0100
Subject: [PATCH 026/392] check for return in skipRowEndDelimiter

---
 .../Formats/Impl/TabSeparatedRowInputFormat.cpp     | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index c92cd1c39a0..93982526ddc 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -92,19 +92,20 @@ void TabSeparatedFormatReader::skipFieldDelimiter()
 
 void TabSeparatedFormatReader::skipRowEndDelimiter()
 {
-    bool supports_crlf = format_settings.tsv.crlf_end_of_line_input;
     if (buf->eof())
         return;
-    if (supports_crlf && first_row==false)
+
+    if (format_settings.tsv.crlf_end_of_line_input)
     {
-        ++buf->position();
+        if (*buf->position() == '\r')
+            ++buf->position();
     }
-    if (unlikely(first_row))
+    else if (unlikely(first_row))
     {
-        if (!supports_crlf)
-            checkForCarriageReturn(*buf);
+        checkForCarriageReturn(*buf);
         first_row = false;
     }
+
     assertChar('\n', *buf);
 }
 

From 4e3f2aae408fc8559304fe4f7c4a21db3d9202a6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 21 Feb 2024 18:47:17 +0100
Subject: [PATCH 027/392] Fix keeper build

---
 src/Coordination/Standalone/Context.cpp | 1 +
 src/Coordination/Standalone/Context.h   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp
index c16ecbfd5c3..7e8711c7910 100644
--- a/src/Coordination/Standalone/Context.cpp
+++ b/src/Coordination/Standalone/Context.cpp
@@ -5,6 +5,7 @@
 #include <Common/ThreadPool.h>
 #include <Common/callOnce.h>
 #include <Disks/IO/IOUringReader.h>
+#include <Storages/StorageS3Settings.h>
 
 #include <Core/ServerSettings.h>
 
diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h
index 3346a865f0f..943fcd106df 100644
--- a/src/Coordination/Standalone/Context.h
+++ b/src/Coordination/Standalone/Context.h
@@ -163,6 +163,8 @@ public:
     zkutil::ZooKeeperPtr getZooKeeper() const;
 
     const StorageS3Settings & getStorageS3Settings() const;
+
+    const String & getUserName() const { static std::string user; return user; }
 };
 
 }

From 5f06c72bfc86b20e1ed50a255a121b6a334fa229 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 21 Feb 2024 20:36:10 +0100
Subject: [PATCH 028/392] check for return in deserializeTextEscaped
 implementations for SerializeBool and SerializeNullable

---
 .../Serializations/SerializationBool.cpp      |  7 +++-
 .../Serializations/SerializationNullable.cpp  | 40 ++++++++++++++-----
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp
index f745fac4d30..0cf9cb8be49 100644
--- a/src/DataTypes/Serializations/SerializationBool.cpp
+++ b/src/DataTypes/Serializations/SerializationBool.cpp
@@ -242,8 +242,11 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is
 {
     if (istr.eof())
         throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
-
-    deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
+    
+    if (settings.tsv.crlf_end_of_line_input)
+        deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'; });
+    else
+        deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'; });
 }
 
 bool SerializationBool::tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 4d31451f92d..9e78b1285db 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -286,7 +286,7 @@ bool SerializationNullable::tryDeserializeNullRaw(DB::ReadBuffer & istr, const D
 }
 
 template<typename ReturnType, bool escaped>
-ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
+ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization, bool & is_null)
 {
     static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
 
@@ -319,13 +319,23 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr,
     /// Check if we have enough data in buffer to check if it's a null.
     if (istr.available() > null_representation.size())
     {
-        auto check_for_null = [&null_representation](ReadBuffer & buf)
+        auto check_for_null = [&null_representation, settings](ReadBuffer & buf)
         {
             auto * pos = buf.position();
-            if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'))
-                return true;
-            buf.position() = pos;
-            return false;
+            if (settings.tsv.crlf_end_of_line_input)
+            {
+                if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'|| *buf.position() == '\r'))
+                    return true;
+                buf.position() = pos;
+                return false;
+            }
+            else
+            {
+                if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'))
+                    return true;
+                buf.position() = pos;
+                return false;
+            }
         };
         return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
     }
@@ -334,14 +344,22 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr,
     /// Use PeekableReadBuffer to make a checkpoint before checking null
     /// representation and rollback if check was failed.
     PeekableReadBuffer peekable_buf(istr, true);
-    auto check_for_null = [&null_representation](ReadBuffer & buf_)
+    auto check_for_null = [&null_representation, settings](ReadBuffer & buf_)
     {
         auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
         buf.setCheckpoint();
         SCOPE_EXIT(buf.dropCheckpoint());
-        if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
-            return true;
 
+        if (settings.tsv.crlf_end_of_line_input)
+        {
+            if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'))
+                return true;
+        }
+        else
+        {
+            if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
+                return true;
+        }
         buf.rollbackToCheckpoint();
         return false;
     };
@@ -372,6 +390,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr,
         if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
                                        "containing '\\t' or '\\n' may not work correctly for large input.");
+                                       
+        if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos)
+            throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
+                                       "containing '\\r' may not work correctly for large input.");
 
         WriteBufferFromOwnString parsed_value;
         if constexpr (escaped)

From 80eb0c37826de63d9e2b595c62c37abbbb9c16ab Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 21 Feb 2024 20:47:25 +0100
Subject: [PATCH 029/392] Fix for hdfs

---
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 28 +++++++++++++------
 src/Storages/HDFS/WriteBufferFromHDFS.cpp     |  7 +++--
 .../ObjectStorage/HDFS/Configuration.cpp      | 14 +++++++---
 .../ObjectStorage/ReadBufferIterator.cpp      | 12 ++++----
 4 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index fa5e227d853..360403b7f2d 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -36,10 +36,10 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string &
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
 {
-    const auto & path = object.remote_path;
-    const size_t begin_of_path = path.find('/', path.find("//") + 2);
-    const String remote_fs_object_path = path.substr(begin_of_path);
-    return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str()));
+    // const auto & path = object.remote_path;
+    // const size_t begin_of_path = path.find('/', path.find("//") + 2);
+    // const String remote_fs_object_path = path.substr(begin_of_path);
+    return (0 == hdfsExists(hdfs_fs.get(), object.remote_path.c_str()));
 }
 
 std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLINT
@@ -86,9 +86,12 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
             ErrorCodes::UNSUPPORTED_METHOD,
             "HDFS API doesn't support custom attributes/metadata for stored objects");
 
+    auto path = object.remote_path.starts_with('/') ? object.remote_path.substr(1) : object.remote_path;
+    path = fs::path(hdfs_root_path) / path;
+
     /// Single O_WRONLY in libhdfs adds O_TRUNC
     return std::make_unique<WriteBufferFromHDFS>(
-        object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size,
+        path, config, settings->replication, patchSettings(write_settings), buf_size,
         mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND);
 }
 
@@ -124,11 +127,18 @@ void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
         removeObjectIfExists(object);
 }
 
-ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const
+ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
 {
-    throw Exception(
-        ErrorCodes::UNSUPPORTED_METHOD,
-        "HDFS API doesn't support custom attributes/metadata for stored objects");
+    auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
+    if (!file_info)
+        throw Exception(ErrorCodes::HDFS_ERROR, "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
+
+    ObjectMetadata metadata;
+    metadata.size_bytes = static_cast<size_t>(file_info->mSize);
+    metadata.last_modified = file_info->mLastMod;
+
+    hdfsFreeFileInfo(file_info, 1);
+    return metadata;
 }
 
 void HDFSObjectStorage::copyObject( /// NOLINT
diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp
index 173dd899ada..9d383aa8245 100644
--- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp
+++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp
@@ -48,12 +48,13 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
         const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2);
         const String path = hdfs_uri.substr(begin_of_path);
 
-        fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0);     /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here
+        /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here
+        fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0);
 
         if (fout == nullptr)
         {
-            throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}",
-                path, std::string(hdfsGetLastError()));
+            throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} ({}) error: {}",
+                path, hdfs_uri, std::string(hdfsGetLastError()));
         }
     }
 
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 2f2427edb24..a64faafd53d 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -22,13 +22,14 @@ StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguratio
 void StorageHDFSConfiguration::check(ContextPtr context) const
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(url));
-    checkHDFSURL(url);
+    checkHDFSURL(fs::path(url) / path);
 }
 
 ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
     UNUSED(is_readonly);
     auto settings = std::make_unique<HDFSObjectStorageSettings>();
+    chassert(!url.empty());
     return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
 }
 
@@ -36,15 +37,20 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_str
 {
     url = checkAndGetLiteralArgument<String>(args[0], "url");
 
-    String format_name = "auto";
     if (args.size() > 1)
-        format_name = checkAndGetLiteralArgument<String>(args[1], "format_name");
+        format = checkAndGetLiteralArgument<String>(args[1], "format_name");
+    else
+        format = "auto";
 
-    String compression_method;
     if (args.size() == 3)
         compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
     else
         compression_method = "auto";
+
+    const size_t begin_of_path = url.find('/', url.find("//") + 2);
+    path = url.substr(begin_of_path + 1);
+    url = url.substr(0, begin_of_path);
+    paths = {path};
 }
 
 }
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index a0e719878ac..dd4bfe79b06 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -195,19 +195,19 @@ ReadBufferIterator::Data ReadBufferIterator::next()
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                         "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
-                        "in S3 or all files are empty. You can specify table structure manually",
-                        *format);
+                        "in {} or all files are empty. You can specify table structure manually",
+                        *format, object_storage->getName());
 
                 throw Exception(
                     ErrorCodes::CANNOT_DETECT_FORMAT,
                     "The data format cannot be detected by the contents of the files, because there are no files with provided path "
-                    "in S3 or all files are empty. You can specify the format manually");
+                    "in {} or all files are empty. You can specify the format manually", object_storage->getName());
             }
 
             return {nullptr, std::nullopt, format};
         }
 
-        /// S3 file iterator could get new keys after new iteration
+        /// file iterator could get new keys after new iteration
         if (read_keys.size() > prev_read_keys_size)
         {
             /// If format is unknown we can try to determine it by new file names.
@@ -234,7 +234,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             prev_read_keys_size = read_keys.size();
         }
 
-        if (getContext()->getSettingsRef().s3_skip_empty_files
+        if (query_settings.skip_empty_files
             && current_object_info->metadata && current_object_info->metadata->size_bytes == 0)
             continue;
 
@@ -255,7 +255,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             {},
             current_object_info->metadata->size_bytes);
 
-        if (!getContext()->getSettingsRef().s3_skip_empty_files || !read_buffer->eof())
+        if (!query_settings.skip_empty_files || !read_buffer->eof())
         {
             first = false;
 

From f23ddec69f51481b8a7c3b923ae5e9dbb3891b41 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 22 Feb 2024 11:50:36 +0100
Subject: [PATCH 030/392] Fix unit tests build

---
 src/IO/tests/gtest_writebuffer_s3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp
index ae00bb2e9e2..7856f22ab1a 100644
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@@ -546,7 +546,7 @@ public:
     std::unique_ptr<WriteBufferFromS3> getWriteBuffer(String file_name = "file")
     {
         S3Settings::RequestSettings request_settings;
-        request_settings.updateFromSettings(settings);
+        request_settings.updateFromSettingsIfChanged(settings);
 
         client->resetCounters();
 

From 26a2fcf65a1702f71cc8cb6167d5622d55c00ae6 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 22 Feb 2024 12:46:13 +0100
Subject: [PATCH 031/392] Fix style-check

---
 src/DataTypes/Serializations/SerializationBool.cpp     | 1 -
 src/DataTypes/Serializations/SerializationNullable.cpp | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp
index 0cf9cb8be49..94bc724fd5d 100644
--- a/src/DataTypes/Serializations/SerializationBool.cpp
+++ b/src/DataTypes/Serializations/SerializationBool.cpp
@@ -242,7 +242,6 @@ void SerializationBool::deserializeTextEscaped(IColumn & column, ReadBuffer & is
 {
     if (istr.eof())
         throw Exception(ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
-    
     if (settings.tsv.crlf_end_of_line_input)
         deserializeImpl(column, istr, settings, [](ReadBuffer & buf){ return buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'; });
     else
diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 9e78b1285db..bb6adf77b32 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -389,8 +389,7 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
 
         if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
-                                       "containing '\\t' or '\\n' may not work correctly for large input.");
-                                       
+                                       "containing '\\t' or '\\n' may not work correctly for large input.");              
         if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
                                        "containing '\\r' may not work correctly for large input.");

From 7f452aa830501ec4d800866b69fab7d158d4896c Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 22 Feb 2024 13:41:03 +0100
Subject: [PATCH 032/392] Update SerializationNullable.cpp

Fix style-check
---
 src/DataTypes/Serializations/SerializationNullable.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index bb6adf77b32..566221e2371 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -389,11 +389,11 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
 
         if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
-                                       "containing '\\t' or '\\n' may not work correctly for large input.");              
+                "containing '\\t' or '\\n' may not work correctly for large input.");
         if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
-                                       "containing '\\r' may not work correctly for large input.");
-
+                "containing '\\r' may not work correctly for large input.");
+        
         WriteBufferFromOwnString parsed_value;
         if constexpr (escaped)
             nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings);

From b548ed976d11309f8fb3b643ab71d9fd7d26ab31 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 22 Feb 2024 14:45:29 +0100
Subject: [PATCH 033/392] Fxi

---
 src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index 5d77d4ced60..d7940851b00 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -54,7 +54,7 @@ private:
         const StorageSnapshotPtr & storage_snapshot,
         const ContextPtr & context) override;
 
-    const String & engine_name;
+    const String engine_name;
     const Storage::ConfigurationPtr configuration;
     const ObjectStoragePtr object_storage;
     NamesAndTypesList virtual_columns;

From e78ab3e06377502068830bfe27f69777f3497cdd Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 27 Feb 2024 20:20:54 +0100
Subject: [PATCH 034/392] Update
 src/DataTypes/Serializations/SerializationNullable.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 .../Serializations/SerializationNullable.cpp         | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 566221e2371..5aca15e46f0 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -350,16 +350,8 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
         buf.setCheckpoint();
         SCOPE_EXIT(buf.dropCheckpoint());
 
-        if (settings.tsv.crlf_end_of_line_input)
-        {
-            if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || *buf.position() == '\r'))
-                return true;
-        }
-        else
-        {
-            if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n'))
-                return true;
-        }
+        if (checkString(null_representation, buf) && (buf.eof() || *buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r')))
+            return true;
         buf.rollbackToCheckpoint();
         return false;
     };

From a458797015eb7d136edf878ac9464c8e6ffdad75 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 27 Feb 2024 20:21:13 +0100
Subject: [PATCH 035/392] Update
 src/DataTypes/Serializations/SerializationNullable.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/DataTypes/Serializations/SerializationNullable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 5aca15e46f0..e9acab7a2a3 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -344,7 +344,7 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
     /// Use PeekableReadBuffer to make a checkpoint before checking null
     /// representation and rollback if check was failed.
     PeekableReadBuffer peekable_buf(istr, true);
-    auto check_for_null = [&null_representation, settings](ReadBuffer & buf_)
+    auto check_for_null = [&null_representation, &settings](ReadBuffer & buf_)
     {
         auto & buf = assert_cast<PeekableReadBuffer &>(buf_);
         buf.setCheckpoint();

From 03d0dd661feaf19d62a6969fc8d895200e410b38 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 27 Feb 2024 20:21:19 +0100
Subject: [PATCH 036/392] Update
 src/DataTypes/Serializations/SerializationNullable.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/DataTypes/Serializations/SerializationNullable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index e9acab7a2a3..aef0a814f24 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -319,7 +319,7 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
     /// Check if we have enough data in buffer to check if it's a null.
     if (istr.available() > null_representation.size())
     {
-        auto check_for_null = [&null_representation, settings](ReadBuffer & buf)
+        auto check_for_null = [&null_representation, &settings](ReadBuffer & buf)
         {
             auto * pos = buf.position();
             if (settings.tsv.crlf_end_of_line_input)

From c83179bc70b5363a839d71d6f34af54807ad1d82 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Tue, 27 Feb 2024 20:21:28 +0100
Subject: [PATCH 037/392] Update
 src/DataTypes/Serializations/SerializationNullable.cpp

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 .../Serializations/SerializationNullable.cpp   | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index aef0a814f24..c796c147f1f 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -322,20 +322,10 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
         auto check_for_null = [&null_representation, &settings](ReadBuffer & buf)
         {
             auto * pos = buf.position();
-            if (settings.tsv.crlf_end_of_line_input)
-            {
-                if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'|| *buf.position() == '\r'))
-                    return true;
-                buf.position() = pos;
-                return false;
-            }
-            else
-            {
-                if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n'))
-                    return true;
-                buf.position() = pos;
-                return false;
-            }
+            if (checkString(null_representation, buf) && (*buf.position() == '\t' || *buf.position() == '\n' || (settings.tsv.crlf_end_of_line_input && *buf.position() == '\r')))
+                return true;
+            buf.position() = pos;
+            return false;
         };
         return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null);
     }

From 230cc512f86ede5e989a8a41a8abaaf15cfaebdd Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 27 Feb 2024 20:40:55 +0100
Subject: [PATCH 038/392] Handle CRLF in TabSeparatedRowInputFormat

---
 .../Formats/Impl/TabSeparatedRowInputFormat.cpp    | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 93982526ddc..f60a64b18e0 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -164,7 +164,7 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t
     const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/)
 {
     const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t';
-    const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n');
+    const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r'));
 
     if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end))
     {
@@ -229,7 +229,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 
     try
     {
-        assertChar('\n', *buf);
+        if (!format_settings.tsv.crlf_end_of_line_input)
+            assertChar('\n', *buf);
+        else
+            assertChar('\r', *buf);
     }
     catch (const DB::Exception &)
     {
@@ -242,7 +245,10 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
         else if (*buf->position() == '\r')
         {
             out << "ERROR: Carriage return found where line feed is expected."
-                   " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n";
+                   " It's like your file has DOS/Windows style line separators. \n"
+                   "You must transform your file to Unix format. \n"
+                   "But if you really need carriage return at end of string value of last column, you need to escape it as \\r \n"
+                   "or else enable setting 'input_format_tsv_crlf_end_of_line'";
         }
         else
         {
@@ -357,7 +363,7 @@ void TabSeparatedFormatReader::skipRow()
 
 bool TabSeparatedFormatReader::checkForEndOfRow()
 {
-    return buf->eof() || *buf->position() == '\n';
+    return buf->eof() || *buf->position() == '\n' || (format_settings.tsv.crlf_end_of_line_input && *buf->position() == '\r');
 }
 
 TabSeparatedSchemaReader::TabSeparatedSchemaReader(

From 69bb01e77a15cad1e022b7d8234b61373243070c Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 29 Feb 2024 21:49:27 +0100
Subject: [PATCH 039/392] Fix style-check

---
 src/DataTypes/Serializations/SerializationNullable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index c796c147f1f..06361e24aa2 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -375,7 +375,7 @@ ReturnType  deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr
         if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos)
             throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
                 "containing '\\r' may not work correctly for large input.");
-        
+
         WriteBufferFromOwnString parsed_value;
         if constexpr (escaped)
             nested_serialization->serializeTextEscaped(nested_column, nested_column.size() - 1, parsed_value, settings);

From 2ad8ab2a5719bbaeb8a1c3216cd93b760534c59a Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Tue, 5 Mar 2024 19:12:49 +0100
Subject: [PATCH 040/392] Fix linker errors

---
 src/Formats/EscapingRuleUtils.cpp                          | 2 +-
 src/IO/ReadHelpers.h                                       | 2 +-
 src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 6b254102bdf..c7a6cb18625 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -236,7 +236,7 @@ String readByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escapin
             if constexpr (read_string)
                 readEscapedString(result, buf);
             else
-                readTSVField<false>(result, buf);
+                readTSVField(result, buf);
             break;
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read value with {} escaping rule", escapingRuleToString(escaping_rule));
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index 3a20d2480b8..f8e5887b82b 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -1897,8 +1897,8 @@ bool tryReadQuotedField(String & s, ReadBuffer & buf);
 void readJSONField(String & s, ReadBuffer & buf);
 bool tryReadJSONField(String & s, ReadBuffer & buf);
 
-template<bool support_crlf>
 void readTSVField(String & s, ReadBuffer & buf);
+void readTSVFieldCRLF(String & s, ReadBuffer & buf);
 
 /** Parse the escape sequence, which can be simple (one character after backslash) or more complex (multiple characters).
   * It is assumed that the cursor is located on the `\` symbol
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index f60a64b18e0..a800bf41ac9 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -121,7 +121,7 @@ String TabSeparatedFormatReader::readFieldIntoString()
         if constexpr (read_string)
             support_crlf ? readEscapedStringCRLF(field, *buf) : readEscapedString(field, *buf);
         else
-            support_crlf ? readTSVField<true>(field, *buf) : readTSVField<false>(field, *buf);
+            support_crlf ? readTSVFieldCRLF(field, *buf) : readTSVField(field, *buf);
     }
     return field;
 }

From 2939ea07c8192aa9ed3bd8c75fe9ea42ded0a9cf Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 6 Mar 2024 04:31:49 +0100
Subject: [PATCH 041/392] Update 02973_parse_crlf_with_tsv_files.sh

Fix Fuzzer failing on parallel file creation/deletion issue
---
 .../0_stateless/02973_parse_crlf_with_tsv_files.sh   | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index cb7472be418..df03da4d42b 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -6,8 +6,10 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation step
 USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
-DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/data_without_crlf.tsv
-DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/data_with_crlf.tsv
+FILE_NAME_UNIX = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
+FILE_NAME_CRLF = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
+DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX
+DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF
 
 touch $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_DOS_ENDINGS    
@@ -16,11 +18,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3
 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS
 
 echo -e "<-- Read UNIX endings -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(data_without_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
-$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_UNIX}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
+$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
 
 echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(data_with_crlf.tsv, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
 
 # Test teardown 
 rm $DATA_FILE_UNIX_ENDINGS

From 7fa7d81c92007a8e6e8f4be2c3e348a20c3f3cd2 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 6 Mar 2024 06:03:24 +0100
Subject: [PATCH 042/392] Fix shell style check

Remove space around =
---
 tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index df03da4d42b..345a01bab88 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -6,8 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation step
 USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
-FILE_NAME_UNIX = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
-FILE_NAME_CRLF = "${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
+FILE_NAME_UNIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
+FILE_NAME_CRLF="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
 DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX
 DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF
 

From 2f2139d53b4497e7fc192d53a3474392dac5ad00 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 6 Mar 2024 07:58:27 +0100
Subject: [PATCH 043/392] Update 02973_parse_crlf_with_tsv_files.sh

---
 .../queries/0_stateless/02973_parse_crlf_with_tsv_files.sh  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index 345a01bab88..c36d65fa617 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -18,11 +18,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3
 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS
 
 echo -e "<-- Read UNIX endings -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_UNIX}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
-$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
+$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
 
 echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${FILE_NAME_CRLF}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
 
 # Test teardown 
 rm $DATA_FILE_UNIX_ENDINGS

From 37a17172ccbe4c0f3aeee145ba569ef109ad9efd Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 6 Mar 2024 08:18:13 +0100
Subject: [PATCH 044/392] Update 02973_parse_crlf_with_tsv_files.sh

---
 .../queries/0_stateless/02973_parse_crlf_with_tsv_files.sh  | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index c36d65fa617..c521b936140 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -6,10 +6,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation step
 USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
-FILE_NAME_UNIX="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
-FILE_NAME_CRLF="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
-DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_UNIX
-DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/FILE_NAME_CRLF
+DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
+DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
 
 touch $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_DOS_ENDINGS    

From 361b23c007e1099a9dea11d26c019b9b1b3fb251 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 6 Mar 2024 18:14:58 +0100
Subject: [PATCH 045/392] Use unique test name for parallel tests

---
 .../0_stateless/02973_parse_crlf_with_tsv_files.sh  | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index c521b936140..c8a3d854d5a 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -6,9 +6,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Data preparation step
 USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
-DATA_FILE_UNIX_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
-DATA_FILE_DOS_ENDINGS=${USER_FILES_PATH:?}/${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
+UNIX_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_without_crlf.tsv"
+DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
+DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}"
+DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}"
 
+echo $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_DOS_ENDINGS    
 
@@ -16,11 +19,11 @@ echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\nAegithina_tiphia\t2018-02-01\t3
 echo -ne "Akiba_Hebrew_Academy\t2017-08-01\t241\r\nAegithina_tiphia\t2018-02-01\t34\r\n1971-72_Utah_Stars_season\t2016-10-01\t1\r\n" > $DATA_FILE_DOS_ENDINGS
 
 echo -e "<-- Read UNIX endings -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
-$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${UNIX_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32');" 
+$CLICKHOUSE_CLIENT --multiquery --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32'); --{serverError 117}" 
 
 echo -e "\n<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->\n"
-$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DATA_FILE_DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
+$CLICKHOUSE_CLIENT --query "SELECT * FROM file(${DOS_ENDINGS}, 'TabSeparated', 'SearchTerm String, Date Date, Hits UInt32') SETTINGS input_format_tsv_crlf_end_of_line = 1;"
 
 # Test teardown 
 rm $DATA_FILE_UNIX_ENDINGS

From 0abed7aab1ed5d4aa160b03d64bb5846b5a982dc Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 6 Mar 2024 22:10:24 +0100
Subject: [PATCH 046/392] Update reference file

---
 .../0_stateless/02973_parse_crlf_with_tsv_files.reference        | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
index 88d203bd723..14cf3a564e4 100644
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
@@ -1,3 +1,4 @@
+/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv
 <-- Read UNIX endings -->
 
 Akiba_Hebrew_Academy	2017-08-01	241

From 188fe4a93afa8db75afc9b75e6450424d1c4d542 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Wed, 6 Mar 2024 22:11:47 +0100
Subject: [PATCH 047/392] Update reference file

---
 .../0_stateless/02973_parse_crlf_with_tsv_files.reference        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
index 14cf3a564e4..88d203bd723 100644
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.reference
@@ -1,4 +1,3 @@
-/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv
 <-- Read UNIX endings -->
 
 Akiba_Hebrew_Academy	2017-08-01	241

From bb393890e8b85f33f7e08236d2bcc61029c5c449 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 8 Mar 2024 20:05:26 +0100
Subject: [PATCH 048/392] Test passes locally

---
 .../0.2973_parse_crlf_with_tsv_files.reference       | 12 ++++++++++++
 .../0_stateless/02973_parse_crlf_with_tsv_files.sh   |  1 -
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference

diff --git a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference
new file mode 100644
index 00000000000..14cf3a564e4
--- /dev/null
+++ b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference
@@ -0,0 +1,12 @@
+/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv
+<-- Read UNIX endings -->
+
+Akiba_Hebrew_Academy	2017-08-01	241
+Aegithina_tiphia	2018-02-01	34
+1971-72_Utah_Stars_season	2016-10-01	1
+
+<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->
+
+Akiba_Hebrew_Academy	2017-08-01	241
+Aegithina_tiphia	2018-02-01	34
+1971-72_Utah_Stars_season	2016-10-01	1
diff --git a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
index c8a3d854d5a..14f28f1ba4a 100755
--- a/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
+++ b/tests/queries/0_stateless/02973_parse_crlf_with_tsv_files.sh
@@ -11,7 +11,6 @@ DOS_ENDINGS="${CLICKHOUSE_TEST_UNIQUE_NAME}_data_with_crlf.tsv"
 DATA_FILE_UNIX_ENDINGS="${USER_FILES_PATH:?}/${UNIX_ENDINGS}"
 DATA_FILE_DOS_ENDINGS="${USER_FILES_PATH:?}/${DOS_ENDINGS}"
 
-echo $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_UNIX_ENDINGS
 touch $DATA_FILE_DOS_ENDINGS    
 

From 70272d41744d9cc219d79c6dd5e3b6c9e523d447 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 11 Mar 2024 10:55:01 +0100
Subject: [PATCH 049/392] Minor

---
 src/CMakeLists.txt                            |    2 +-
 src/Databases/DatabaseHDFS.cpp                |    2 +-
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |    6 +-
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |    2 +-
 .../ObjectStorages/ObjectStorageFactory.cpp   |    2 +-
 src/IO/examples/read_buffer_from_hdfs.cpp     |    2 +-
 src/Storages/HDFS/StorageHDFS.cpp             | 1200 -----------------
 src/Storages/Hive/HiveCommon.h                |    2 +-
 src/Storages/Hive/HiveFile.h                  |    2 +-
 src/Storages/Hive/StorageHive.cpp             |    4 +-
 src/Storages/Hive/StorageHive.h               |    2 +-
 .../HDFS/AsynchronousReadBufferFromHDFS.cpp   |    2 +-
 .../HDFS/AsynchronousReadBufferFromHDFS.h     |    2 +-
 .../ObjectStorage/HDFS/Configuration.cpp      |    2 +-
 .../{ => ObjectStorage}/HDFS/HDFSCommon.cpp   |    2 +-
 .../{ => ObjectStorage}/HDFS/HDFSCommon.h     |    0
 .../HDFS/ReadBufferFromHDFS.cpp               |    2 +-
 .../HDFS/ReadBufferFromHDFS.h                 |    0
 .../HDFS/WriteBufferFromHDFS.cpp              |    4 +-
 .../HDFS/WriteBufferFromHDFS.h                |    0
 .../examples/async_read_buffer_from_hdfs.cpp  |    2 +-
 21 files changed, 21 insertions(+), 1221 deletions(-)
 delete mode 100644 src/Storages/HDFS/StorageHDFS.cpp
 rename src/Storages/{ => ObjectStorage}/HDFS/AsynchronousReadBufferFromHDFS.cpp (99%)
 rename src/Storages/{ => ObjectStorage}/HDFS/AsynchronousReadBufferFromHDFS.h (96%)
 rename src/Storages/{ => ObjectStorage}/HDFS/HDFSCommon.cpp (99%)
 rename src/Storages/{ => ObjectStorage}/HDFS/HDFSCommon.h (100%)
 rename src/Storages/{ => ObjectStorage}/HDFS/ReadBufferFromHDFS.cpp (99%)
 rename src/Storages/{ => ObjectStorage}/HDFS/ReadBufferFromHDFS.h (100%)
 rename src/Storages/{ => ObjectStorage}/HDFS/WriteBufferFromHDFS.cpp (97%)
 rename src/Storages/{ => ObjectStorage}/HDFS/WriteBufferFromHDFS.h (100%)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1cf0e4e2b98..3cb64b56c46 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -150,7 +150,7 @@ if (TARGET ch_contrib::azure_sdk)
 endif()
 
 if (TARGET ch_contrib::hdfs)
-    add_headers_and_sources(dbms Storages/HDFS)
+    add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
     add_headers_and_sources(dbms Disks/ObjectStorages/HDFS)
 endif()
 
diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp
index 3a1e6b16ccf..cda38a69c9a 100644
--- a/src/Databases/DatabaseHDFS.cpp
+++ b/src/Databases/DatabaseHDFS.cpp
@@ -11,7 +11,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/parseQuery.h>
 #include <Parsers/ParserCreateQuery.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Storages/IStorage.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <Common/re2.h>
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 8bff687b915..2d03de60c3c 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -1,10 +1,10 @@
 #include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
 
 #include <IO/copyData.h>
-#include <Storages/HDFS/WriteBufferFromHDFS.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Common/getRandomASCIIString.h>
 
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index 66095eb9f8f..4072d21ed7c 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -6,7 +6,7 @@
 
 #include <Disks/IDisk.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Core/UUID.h>
 #include <memory>
 #include <Poco/Util/AbstractConfiguration.h>
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index 02b6816d673..d1841c92a6b 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -7,7 +7,7 @@
 #endif
 #if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
 #include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #endif
 #if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
diff --git a/src/IO/examples/read_buffer_from_hdfs.cpp b/src/IO/examples/read_buffer_from_hdfs.cpp
index 977dd2ae227..a5cf43b3e79 100644
--- a/src/IO/examples/read_buffer_from_hdfs.cpp
+++ b/src/IO/examples/read_buffer_from_hdfs.cpp
@@ -2,7 +2,7 @@
 #include <string>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/copyData.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
 #include <base/types.h>
 #include <Common/Config/ConfigProcessor.h>
 
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
deleted file mode 100644
index cd935fa3100..00000000000
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ /dev/null
@@ -1,1200 +0,0 @@
-#include "config.h"
-
-#if USE_HDFS
-
-#include <Common/parseGlobs.h>
-#include <Common/re2.h>
-#include <DataTypes/DataTypeString.h>
-
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/ASTInsertQuery.h>
-#include <Processors/Sinks/SinkToStorage.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <Processors/Sources/ConstChunkGenerator.h>
-#include <Processors/Sources/NullSource.h>
-#include <Processors/QueryPlan/QueryPlan.h>
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-
-#include <IO/WriteHelpers.h>
-#include <IO/CompressionMethod.h>
-#include <IO/WriteSettings.h>
-
-#include <Interpreters/evaluateConstantExpression.h>
-#include <Interpreters/ExpressionAnalyzer.h>
-#include <Interpreters/TreeRewriter.h>
-
-#include <Storages/StorageFactory.h>
-#include <Storages/HDFS/StorageHDFS.h>
-#include <Storages/HDFS/HDFSCommon.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
-#include <Storages/HDFS/WriteBufferFromHDFS.h>
-#include <Storages/PartitionedSink.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/checkAndGetLiteralArgument.h>
-
-#include <Formats/ReadSchemaUtils.h>
-#include <Formats/FormatFactory.h>
-
-#include <QueryPipeline/QueryPipeline.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
-#include <QueryPipeline/Pipe.h>
-
-#include <Poco/URI.h>
-#include <hdfs/hdfs.h>
-
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace ProfileEvents
-{
-    extern const Event EngineFileLikeReadFiles;
-}
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ACCESS_DENIED;
-    extern const int DATABASE_ACCESS_DENIED;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
-    extern const int CANNOT_COMPILE_REGEXP;
-    extern const int CANNOT_DETECT_FORMAT;
-}
-namespace
-{
-    struct HDFSFileInfoDeleter
-    {
-        /// Can have only one entry (see hdfsGetPathInfo())
-        void operator()(hdfsFileInfo * info) { hdfsFreeFileInfo(info, 1); }
-    };
-    using HDFSFileInfoPtr = std::unique_ptr<hdfsFileInfo, HDFSFileInfoDeleter>;
-
-    /* Recursive directory listing with matched paths as a result.
-     * Have the same method in StorageFile.
-     */
-    std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(
-        const String & path_for_ls,
-        const HDFSFSPtr & fs,
-        const String & for_match)
-    {
-        std::vector<StorageHDFS::PathWithInfo> result;
-
-        const size_t first_glob_pos = for_match.find_first_of("*?{");
-
-        if (first_glob_pos == std::string::npos)
-        {
-            const String path = fs::path(path_for_ls + for_match.substr(1)).lexically_normal();
-            HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path.c_str()));
-            if (hdfs_info) // NOLINT
-            {
-                result.push_back(StorageHDFS::PathWithInfo{
-                        String(path),
-                        StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)}});
-            }
-            return result;
-        }
-
-        const size_t end_of_path_without_globs = for_match.substr(0, first_glob_pos).rfind('/');
-        const String suffix_with_globs = for_match.substr(end_of_path_without_globs);   /// begin with '/'
-        const String prefix_without_globs = path_for_ls + for_match.substr(1, end_of_path_without_globs); /// ends with '/'
-
-        const size_t next_slash_after_glob_pos = suffix_with_globs.find('/', 1);
-
-        const std::string current_glob = suffix_with_globs.substr(0, next_slash_after_glob_pos);
-
-        re2::RE2 matcher(makeRegexpPatternFromGlobs(current_glob));
-        if (!matcher.ok())
-            throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
-                "Cannot compile regex from glob ({}): {}", for_match, matcher.error());
-
-        HDFSFileInfo ls;
-        ls.file_info = hdfsListDirectory(fs.get(), prefix_without_globs.data(), &ls.length);
-        if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
-        {
-            // ignore file not found exception, keep throw other exception, libhdfs3 doesn't have function to get exception type, so use errno.
-            throw Exception(
-                ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError()));
-        }
-
-        if (!ls.file_info && ls.length > 0)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
-        for (int i = 0; i < ls.length; ++i)
-        {
-            const String full_path = fs::path(ls.file_info[i].mName).lexically_normal();
-            const size_t last_slash = full_path.rfind('/');
-            const String file_name = full_path.substr(last_slash);
-            const bool looking_for_directory = next_slash_after_glob_pos != std::string::npos;
-            const bool is_directory = ls.file_info[i].mKind == 'D';
-            /// Condition with type of current file_info means what kind of path is it in current iteration of ls
-            if (!is_directory && !looking_for_directory)
-            {
-                if (re2::RE2::FullMatch(file_name, matcher))
-                    result.push_back(StorageHDFS::PathWithInfo{
-                        String(full_path),
-                        StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)}});
-            }
-            else if (is_directory && looking_for_directory)
-            {
-                if (re2::RE2::FullMatch(file_name, matcher))
-                {
-                    std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs,
-                        suffix_with_globs.substr(next_slash_after_glob_pos));
-                    /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
-                    std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
-                }
-            }
-        }
-
-        return result;
-    }
-
-    std::pair<String, String> getPathFromUriAndUriWithoutPath(const String & uri)
-    {
-        auto pos = uri.find("//");
-        if (pos != std::string::npos && pos + 2 < uri.length())
-        {
-            pos = uri.find('/', pos + 2);
-            if (pos != std::string::npos)
-                return {uri.substr(pos), uri.substr(0, pos)};
-        }
-
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set");
-    }
-
-    std::vector<StorageHDFS::PathWithInfo> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context)
-    {
-        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
-        HDFSFSPtr fs = createHDFSFS(builder.get());
-
-        Strings paths = expandSelectionGlob(path_from_uri);
-
-        std::vector<StorageHDFS::PathWithInfo> res;
-
-        for (const auto & path : paths)
-        {
-            auto part_of_res = LSWithRegexpMatching("/", fs, path);
-            res.insert(res.end(), part_of_res.begin(), part_of_res.end());
-        }
-        return res;
-    }
-}
-
-StorageHDFS::StorageHDFS(
-    const String & uri_,
-    const StorageID & table_id_,
-    const String & format_name_,
-    const ColumnsDescription & columns_,
-    const ConstraintsDescription & constraints_,
-    const String & comment,
-    const ContextPtr & context_,
-    const String & compression_method_,
-    const bool distributed_processing_,
-    ASTPtr partition_by_)
-    : IStorage(table_id_)
-    , WithContext(context_)
-    , uris({uri_})
-    , format_name(format_name_)
-    , compression_method(compression_method_)
-    , distributed_processing(distributed_processing_)
-    , partition_by(partition_by_)
-{
-    if (format_name != "auto")
-        FormatFactory::instance().checkFormatName(format_name);
-    context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
-    checkHDFSURL(uri_);
-
-    String path = uri_.substr(uri_.find('/', uri_.find("//") + 2));
-    is_path_with_globs = path.find_first_of("*?{") != std::string::npos;
-
-    StorageInMemoryMetadata storage_metadata;
-
-    if (columns_.empty())
-    {
-        ColumnsDescription columns;
-        if (format_name == "auto")
-            std::tie(columns, format_name) = getTableStructureAndFormatFromData(uri_, compression_method_, context_);
-        else
-            columns = getTableStructureFromData(format_name, uri_, compression_method, context_);
-
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        if (format_name == "auto")
-            format_name = getTableStructureAndFormatFromData(uri_, compression_method_, context_).second;
-
-        /// We don't allow special columns in HDFS storage.
-        if (!columns_.hasOnlyOrdinary())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine HDFS doesn't support special columns like MATERIALIZED, ALIAS or EPHEMERAL");
-        storage_metadata.setColumns(columns_);
-    }
-
-    storage_metadata.setConstraints(constraints_);
-    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
-
-    virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
-}
-
-namespace
-{
-    class ReadBufferIterator : public IReadBufferIterator, WithContext
-    {
-    public:
-        ReadBufferIterator(
-            const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_,
-            const String & uri_without_path_,
-            std::optional<String> format_,
-            const String & compression_method_,
-            const ContextPtr & context_)
-        : WithContext(context_)
-        , paths_with_info(paths_with_info_)
-        , uri_without_path(uri_without_path_)
-        , format(std::move(format_))
-        , compression_method(compression_method_)
-        {
-        }
-
-        Data next() override
-        {
-            bool is_first = current_index == 0;
-            /// For default mode check cached columns for all paths on first iteration.
-            if (is_first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
-            {
-                if (auto cached_columns = tryGetColumnsFromCache(paths_with_info))
-                    return {nullptr, cached_columns, format};
-            }
-
-            StorageHDFS::PathWithInfo path_with_info;
-
-            while (true)
-            {
-                if (current_index == paths_with_info.size())
-                {
-                    if (is_first)
-                    {
-                        if (format)
-                            throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                                            "The table structure cannot be extracted from a {} format file, because all files are empty. "
-                                            "You can specify table structure manually", *format);
-
-                        throw Exception(
-                            ErrorCodes::CANNOT_DETECT_FORMAT,
-                            "The data format cannot be detected by the contents of the files, because all files are empty. You can specify table structure manually");
-                    }
-                    return {nullptr, std::nullopt, format};
-                }
-
-                path_with_info = paths_with_info[current_index++];
-                if (getContext()->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0)
-                    continue;
-
-                if (getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::UNION)
-                {
-                    std::vector<StorageHDFS::PathWithInfo> paths = {path_with_info};
-                    if (auto cached_columns = tryGetColumnsFromCache(paths))
-                        return {nullptr, cached_columns, format};
-                }
-
-                auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
-                auto impl = std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
-                if (!getContext()->getSettingsRef().hdfs_skip_empty_files || !impl->eof())
-                {
-                    const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-                    return {wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max)), std::nullopt, format};
-                }
-            }
-        }
-
-        void setNumRowsToLastFile(size_t num_rows) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs)
-                return;
-
-            String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addNumRows(key, num_rows);
-        }
-
-        void setSchemaToLastFile(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::UNION)
-                return;
-
-            String source = uri_without_path + paths_with_info[current_index - 1].path;
-            auto key = getKeyForSchemaCache(source, *format, std::nullopt, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addColumns(key, columns);
-        }
-
-        void setResultingSchema(const ColumnsDescription & columns) override
-        {
-            if (!getContext()->getSettingsRef().schema_inference_use_cache_for_hdfs
-                || getContext()->getSettingsRef().schema_inference_mode != SchemaInferenceMode::DEFAULT)
-                return;
-
-            Strings sources;
-            sources.reserve(paths_with_info.size());
-            std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const StorageHDFS::PathWithInfo & path_with_info){ return uri_without_path + path_with_info.path; });
-            auto cache_keys = getKeysForSchemaCache(sources, *format, {}, getContext());
-            StorageHDFS::getSchemaCache(getContext()).addManyColumns(cache_keys, columns);
-        }
-
-        void setFormatName(const String & format_name) override
-        {
-            format = format_name;
-        }
-
-        String getLastFileName() const override
-        {
-            if (current_index != 0)
-                return paths_with_info[current_index - 1].path;
-
-            return "";
-        }
-
-        bool supportsLastReadBufferRecreation() const override { return true; }
-
-        std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override
-        {
-            chassert(current_index > 0 && current_index <= paths_with_info.size());
-            auto path_with_info = paths_with_info[current_index - 1];
-            auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
-            auto impl = std::make_unique<ReadBufferFromHDFS>(uri_without_path, path_with_info.path, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
-            const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-            return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
-        }
-
-    private:
-        std::optional<ColumnsDescription> tryGetColumnsFromCache(const std::vector<StorageHDFS::PathWithInfo> & paths_with_info_)
-        {
-            auto context = getContext();
-
-            if (!context->getSettingsRef().schema_inference_use_cache_for_hdfs)
-                return std::nullopt;
-
-            auto & schema_cache = StorageHDFS::getSchemaCache(context);
-            for (const auto & path_with_info : paths_with_info_)
-            {
-                auto get_last_mod_time = [&]() -> std::optional<time_t>
-                {
-                    if (path_with_info.info)
-                        return path_with_info.info->last_mod_time;
-
-                    auto builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
-                    auto fs = createHDFSFS(builder.get());
-                    HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_with_info.path.c_str()));
-                    if (hdfs_info)
-                        return hdfs_info->mLastMod;
-
-                    return std::nullopt;
-                };
-
-                String url = uri_without_path + path_with_info.path;
-                if (format)
-                {
-                    auto cache_key = getKeyForSchemaCache(url, *format, {}, context);
-                    if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
-                        return columns;
-                }
-                else
-                {
-                    /// If format is unknown, we can iterate through all possible input formats
-                    /// and check if we have an entry with this format and this file in schema cache.
-                    /// If we have such entry for some format, we can use this format to read the file.
-                    for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
-                    {
-                        auto cache_key = getKeyForSchemaCache(url, format_name, {}, context);
-                        if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
-                        {
-                            /// Now format is known. It should be the same for all files.
-                            format = format_name;
-                            return columns;
-                        }
-                    }
-                }
-            }
-
-            return std::nullopt;
-        }
-
-        const std::vector<StorageHDFS::PathWithInfo> & paths_with_info;
-        const String & uri_without_path;
-        std::optional<String> format;
-        const String & compression_method;
-        size_t current_index = 0;
-    };
-}
-
-std::pair<ColumnsDescription, String> StorageHDFS::getTableStructureAndFormatFromDataImpl(
-    std::optional<String> format,
-    const String & uri,
-    const String & compression_method,
-    const ContextPtr & ctx)
-{
-    const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-    auto paths_with_info = getPathsList(path_from_uri, uri, ctx);
-
-    if (paths_with_info.empty() && (!format || !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(*format)))
-    {
-        if (format)
-            throw Exception(
-                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                "The table structure cannot be extracted from a {} format file, because there are no files in HDFS with provided path."
-                " You can specify table structure manually", *format);
-
-        throw Exception(
-            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-            "The data format cannot be detected by the contents of the files, because there are no files in HDFS with provided path."
-            " You can specify the format manually");
-    }
-
-    ReadBufferIterator read_buffer_iterator(paths_with_info, uri_without_path, format, compression_method, ctx);
-    if (format)
-        return {readSchemaFromFormat(*format, std::nullopt, read_buffer_iterator, ctx), *format};
-    return detectFormatAndReadSchema(std::nullopt, read_buffer_iterator, ctx);
-}
-
-std::pair<ColumnsDescription, String> StorageHDFS::getTableStructureAndFormatFromData(const String & uri, const String & compression_method, const ContextPtr & ctx)
-{
-    return getTableStructureAndFormatFromDataImpl(std::nullopt, uri, compression_method, ctx);
-}
-
-ColumnsDescription StorageHDFS::getTableStructureFromData(const String & format, const String & uri, const String & compression_method, const DB::ContextPtr & ctx)
-{
-    return getTableStructureAndFormatFromDataImpl(format, uri, compression_method, ctx).first;
-}
-
-class HDFSSource::DisclosedGlobIterator::Impl
-{
-public:
-    Impl(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    {
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-        uris = getPathsList(path_from_uri, uri_without_path, context);
-        ActionsDAGPtr filter_dag;
-        if (!uris.empty())
-             filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & path_with_info : uris)
-                paths.push_back(path_with_info.path);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, context);
-        }
-        auto file_progress_callback = context->getFileProgressCallback();
-
-        for (auto & elem : uris)
-        {
-            elem.path = uri_without_path + elem.path;
-            if (file_progress_callback && elem.info)
-                file_progress_callback(FileProgress(0, elem.info->size));
-        }
-        uris_iter = uris.begin();
-    }
-
-    StorageHDFS::PathWithInfo next()
-    {
-        std::lock_guard lock(mutex);
-        if (uris_iter != uris.end())
-        {
-            auto answer = *uris_iter;
-            ++uris_iter;
-            return answer;
-        }
-        return {};
-    }
-private:
-    std::mutex mutex;
-    std::vector<StorageHDFS::PathWithInfo> uris;
-    std::vector<StorageHDFS::PathWithInfo>::iterator uris_iter;
-};
-
-class HDFSSource::URISIterator::Impl : WithContext
-{
-public:
-    explicit Impl(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context_)
-        : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback())
-    {
-        ActionsDAGPtr filter_dag;
-        if (!uris.empty())
-            filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
-
-        if (filter_dag)
-        {
-            std::vector<String> paths;
-            paths.reserve(uris.size());
-            for (const auto & uri : uris)
-                paths.push_back(getPathFromUriAndUriWithoutPath(uri).first);
-
-            VirtualColumnUtils::filterByPathOrFile(uris, paths, filter_dag, virtual_columns, getContext());
-        }
-
-        if (!uris.empty())
-        {
-            auto path_and_uri = getPathFromUriAndUriWithoutPath(uris[0]);
-            builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef());
-            fs = createHDFSFS(builder.get());
-        }
-    }
-
-    StorageHDFS::PathWithInfo next()
-    {
-        String uri;
-        HDFSFileInfoPtr hdfs_info;
-        do
-        {
-            size_t current_index = index.fetch_add(1);
-            if (current_index >= uris.size())
-                return {"", {}};
-
-            uri = uris[current_index];
-            auto path_and_uri = getPathFromUriAndUriWithoutPath(uri);
-            hdfs_info.reset(hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()));
-        }
-        /// Skip non-existed files.
-        while (!hdfs_info && String(hdfsGetLastError()).find("FileNotFoundException") != std::string::npos);
-
-        std::optional<StorageHDFS::PathInfo> info;
-        if (hdfs_info)
-        {
-            info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
-            if (file_progress_callback)
-                file_progress_callback(FileProgress(0, hdfs_info->mSize));
-        }
-
-        return {uri, info};
-    }
-
-private:
-    std::atomic_size_t index = 0;
-    Strings uris;
-    HDFSBuilderWrapper builder;
-    HDFSFSPtr fs;
-    std::function<void(FileProgress)> file_progress_callback;
-};
-
-HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(uri, predicate, virtual_columns, context)) {}
-
-StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next()
-{
-    return pimpl->next();
-}
-
-HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context)
-    : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, predicate, virtual_columns, context))
-{
-}
-
-StorageHDFS::PathWithInfo HDFSSource::URISIterator::next()
-{
-    return pimpl->next();
-}
-
-HDFSSource::HDFSSource(
-    const ReadFromFormatInfo & info,
-    StorageHDFSPtr storage_,
-    const ContextPtr & context_,
-    UInt64 max_block_size_,
-    std::shared_ptr<IteratorWrapper> file_iterator_,
-    bool need_only_count_)
-    : ISource(info.source_header, false)
-    , WithContext(context_)
-    , storage(std::move(storage_))
-    , block_for_format(info.format_header)
-    , requested_columns(info.requested_columns)
-    , requested_virtual_columns(info.requested_virtual_columns)
-    , max_block_size(max_block_size_)
-    , file_iterator(file_iterator_)
-    , columns_description(info.columns_description)
-    , need_only_count(need_only_count_)
-{
-    initialize();
-}
-
-bool HDFSSource::initialize()
-{
-    bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files;
-    StorageHDFS::PathWithInfo path_with_info;
-    while (true)
-    {
-        path_with_info = (*file_iterator)();
-        if (path_with_info.path.empty())
-            return false;
-
-        if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0)
-            continue;
-
-        current_path = path_with_info.path;
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
-
-        std::optional<size_t> file_size;
-        if (!path_with_info.info)
-        {
-            auto builder = createHDFSBuilder(uri_without_path + "/", getContext()->getGlobalContext()->getConfigRef());
-            auto fs = createHDFSFS(builder.get());
-            HDFSFileInfoPtr hdfs_info(hdfsGetPathInfo(fs.get(), path_from_uri.c_str()));
-            if (hdfs_info)
-                path_with_info.info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast<size_t>(hdfs_info->mSize)};
-        }
-
-        if (path_with_info.info)
-            file_size = path_with_info.info->size;
-
-        auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
-        auto impl = std::make_unique<ReadBufferFromHDFS>(
-            uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size);
-        if (!skip_empty_files || !impl->eof())
-        {
-            impl->setProgressCallback(getContext());
-            const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-            read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
-            break;
-        }
-    }
-
-    current_path = path_with_info.path;
-    current_file_size = path_with_info.info ? std::optional(path_with_info.info->size) : std::nullopt;
-
-    QueryPipelineBuilder builder;
-    std::optional<size_t> num_rows_from_cache = need_only_count && getContext()->getSettingsRef().use_cache_for_count_from_files ? tryGetNumRowsFromCache(path_with_info) : std::nullopt;
-    if (num_rows_from_cache)
-    {
-        /// We should not return single chunk with all number of rows,
-        /// because there is a chance that this chunk will be materialized later
-        /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
-        /// Instead, we use a special ConstChunkGenerator that will generate chunks
-        /// with max_block_size rows until total number of rows is reached.
-        auto source = std::make_shared<ConstChunkGenerator>(block_for_format, *num_rows_from_cache, max_block_size);
-        builder.init(Pipe(source));
-    }
-    else
-    {
-        std::optional<size_t> max_parsing_threads;
-        if (need_only_count)
-            max_parsing_threads = 1;
-
-        input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, std::nullopt, max_parsing_threads);
-
-        if (need_only_count)
-            input_format->needOnlyCount();
-
-        builder.init(Pipe(input_format));
-        if (columns_description.hasDefaults())
-        {
-            builder.addSimpleTransform([&](const Block & header)
-            {
-                return std::make_shared<AddingDefaultsTransform>(header, columns_description, *input_format, getContext());
-            });
-        }
-    }
-
-    /// Add ExtractColumnsTransform to extract requested columns/subcolumns
-    /// from the chunk read by IInputFormat.
-    builder.addSimpleTransform([&](const Block & header)
-    {
-        return std::make_shared<ExtractColumnsTransform>(header, requested_columns);
-    });
-
-    pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
-    reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
-
-    ProfileEvents::increment(ProfileEvents::EngineFileLikeReadFiles);
-    return true;
-}
-
-String HDFSSource::getName() const
-{
-    return "HDFSSource";
-}
-
-Chunk HDFSSource::generate()
-{
-    while (true)
-    {
-        if (isCancelled() || !reader)
-        {
-            if (reader)
-                reader->cancel();
-            break;
-        }
-
-        Chunk chunk;
-        if (reader->pull(chunk))
-        {
-            UInt64 num_rows = chunk.getNumRows();
-            total_rows_in_file += num_rows;
-            size_t chunk_size = 0;
-            if (input_format)
-                chunk_size = input_format->getApproxBytesReadForChunk();
-            progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
-            VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, current_path, current_file_size);
-            return chunk;
-        }
-
-        if (input_format && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(current_path, total_rows_in_file);
-
-        total_rows_in_file = 0;
-
-        reader.reset();
-        pipeline.reset();
-        input_format.reset();
-        read_buf.reset();
-
-        if (!initialize())
-            break;
-    }
-    return {};
-}
-
-void HDFSSource::addNumRowsToCache(const String & path, size_t num_rows)
-{
-    auto cache_key = getKeyForSchemaCache(path, storage->format_name, std::nullopt, getContext());
-    StorageHDFS::getSchemaCache(getContext()).addNumRows(cache_key, num_rows);
-}
-
-std::optional<size_t> HDFSSource::tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info)
-{
-    auto cache_key = getKeyForSchemaCache(path_with_info.path, storage->format_name, std::nullopt, getContext());
-    auto get_last_mod_time = [&]() -> std::optional<time_t>
-    {
-        if (path_with_info.info)
-            return path_with_info.info->last_mod_time;
-        return std::nullopt;
-    };
-
-    return StorageHDFS::getSchemaCache(getContext()).tryGetNumRows(cache_key, get_last_mod_time);
-}
-
-class HDFSSink : public SinkToStorage
-{
-public:
-    HDFSSink(const String & uri,
-        const String & format,
-        const Block & sample_block,
-        const ContextPtr & context,
-        const CompressionMethod compression_method)
-        : SinkToStorage(sample_block)
-    {
-        const auto & settings = context->getSettingsRef();
-        write_buf = wrapWriteBufferWithCompressionMethod(
-            std::make_unique<WriteBufferFromHDFS>(
-                uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication, context->getWriteSettings()),
-            compression_method,
-            static_cast<int>(settings.output_format_compression_level),
-            static_cast<int>(settings.output_format_compression_zstd_window_log));
-        writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context);
-    }
-
-    String getName() const override { return "HDFSSink"; }
-
-    void consume(Chunk chunk) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        if (cancelled)
-            return;
-        writer->write(getHeader().cloneWithColumns(chunk.detachColumns()));
-    }
-
-    void onCancel() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-        cancelled = true;
-    }
-
-    void onException(std::exception_ptr exception) override
-    {
-        std::lock_guard lock(cancel_mutex);
-        try
-        {
-            std::rethrow_exception(exception);
-        }
-        catch (...)
-        {
-            /// An exception context is needed to proper delete write buffers without finalization
-            release();
-        }
-    }
-
-    void onFinish() override
-    {
-        std::lock_guard lock(cancel_mutex);
-        finalize();
-    }
-
-private:
-    void finalize()
-    {
-        if (!writer)
-            return;
-
-        try
-        {
-            writer->finalize();
-            writer->flush();
-            write_buf->sync();
-            write_buf->finalize();
-        }
-        catch (...)
-        {
-            /// Stop ParallelFormattingOutputFormat correctly.
-            release();
-            throw;
-        }
-    }
-
-    void release()
-    {
-        writer.reset();
-        write_buf->finalize();
-    }
-
-    std::unique_ptr<WriteBuffer> write_buf;
-    OutputFormatPtr writer;
-    std::mutex cancel_mutex;
-    bool cancelled = false;
-};
-
-class PartitionedHDFSSink : public PartitionedSink
-{
-public:
-    PartitionedHDFSSink(
-        const ASTPtr & partition_by,
-        const String & uri_,
-        const String & format_,
-        const Block & sample_block_,
-        ContextPtr context_,
-        const CompressionMethod compression_method_)
-            : PartitionedSink(partition_by, context_, sample_block_)
-            , uri(uri_)
-            , format(format_)
-            , sample_block(sample_block_)
-            , context(context_)
-            , compression_method(compression_method_)
-    {
-    }
-
-    SinkPtr createSinkForPartition(const String & partition_id) override
-    {
-        auto path = PartitionedSink::replaceWildcards(uri, partition_id);
-        PartitionedSink::validatePartitionKey(path, true);
-        return std::make_shared<HDFSSink>(path, format, sample_block, context, compression_method);
-    }
-
-private:
-    const String uri;
-    const String format;
-    const Block sample_block;
-    ContextPtr context;
-    const CompressionMethod compression_method;
-};
-
-
-bool StorageHDFS::supportsSubsetOfColumns(const ContextPtr & context_) const
-{
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context_);
-}
-
-class ReadFromHDFS : public SourceStepWithFilter
-{
-public:
-    std::string getName() const override { return "ReadFromHDFS"; }
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
-    void applyFilters(ActionDAGNodes added_filter_nodes) override;
-
-    ReadFromHDFS(
-        const Names & column_names_,
-        const SelectQueryInfo & query_info_,
-        const StorageSnapshotPtr & storage_snapshot_,
-        const ContextPtr & context_,
-        Block sample_block,
-        ReadFromFormatInfo info_,
-        bool need_only_count_,
-        std::shared_ptr<StorageHDFS> storage_,
-        size_t max_block_size_,
-        size_t num_streams_)
-        : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
-            column_names_,
-            query_info_,
-            storage_snapshot_,
-            context_)
-        , info(std::move(info_))
-        , need_only_count(need_only_count_)
-        , storage(std::move(storage_))
-        , max_block_size(max_block_size_)
-        , num_streams(num_streams_)
-    {
-    }
-
-private:
-    ReadFromFormatInfo info;
-    const bool need_only_count;
-    std::shared_ptr<StorageHDFS> storage;
-
-    size_t max_block_size;
-    size_t num_streams;
-
-    std::shared_ptr<HDFSSource::IteratorWrapper> iterator_wrapper;
-
-    void createIterator(const ActionsDAG::Node * predicate);
-};
-
-void ReadFromHDFS::applyFilters(ActionDAGNodes added_filter_nodes)
-{
-    filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
-    const ActionsDAG::Node * predicate = nullptr;
-    if (filter_actions_dag)
-        predicate = filter_actions_dag->getOutputs().at(0);
-
-    createIterator(predicate);
-}
-
-void StorageHDFS::read(
-    QueryPlan & query_plan,
-    const Names & column_names,
-    const StorageSnapshotPtr & storage_snapshot,
-    SelectQueryInfo & query_info,
-    ContextPtr context_,
-    QueryProcessingStage::Enum /*processed_stage*/,
-    size_t max_block_size,
-    size_t num_streams)
-{
-    auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
-    bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
-        && context_->getSettingsRef().optimize_count_from_files;
-
-    auto this_ptr = std::static_pointer_cast<StorageHDFS>(shared_from_this());
-
-    auto reading = std::make_unique<ReadFromHDFS>(
-        column_names,
-        query_info,
-        storage_snapshot,
-        context_,
-        read_from_format_info.source_header,
-        std::move(read_from_format_info),
-        need_only_count,
-        std::move(this_ptr),
-        max_block_size,
-        num_streams);
-
-    query_plan.addStep(std::move(reading));
-}
-
-void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
-{
-    if (iterator_wrapper)
-        return;
-
-    if (storage->distributed_processing)
-    {
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
-            [callback = context->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
-                return StorageHDFS::PathWithInfo{callback(), std::nullopt};
-        });
-    }
-    else if (storage->is_path_with_globs)
-    {
-        /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
-        {
-            return glob_iterator->next();
-        });
-    }
-    else
-    {
-        auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
-        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
-        {
-            return uris_iterator->next();
-        });
-    }
-}
-
-void ReadFromHDFS::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
-{
-    createIterator(nullptr);
-
-    Pipes pipes;
-    for (size_t i = 0; i < num_streams; ++i)
-    {
-        pipes.emplace_back(std::make_shared<HDFSSource>(
-            info,
-            storage,
-            context,
-            max_block_size,
-            iterator_wrapper,
-            need_only_count));
-    }
-
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-    if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
-
-    for (const auto & processor : pipe.getProcessors())
-        processors.emplace_back(processor);
-
-    pipeline.init(std::move(pipe));
-}
-
-SinkToStoragePtr StorageHDFS::write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context_, bool /*async_insert*/)
-{
-    String current_uri = uris.back();
-
-    bool has_wildcards = current_uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos;
-    const auto * insert_query = dynamic_cast<const ASTInsertQuery *>(query.get());
-    auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr;
-    bool is_partitioned_implementation = partition_by_ast && has_wildcards;
-
-    if (is_partitioned_implementation)
-    {
-        return std::make_shared<PartitionedHDFSSink>(
-            partition_by_ast,
-            current_uri,
-            format_name,
-            metadata_snapshot->getSampleBlock(),
-            context_,
-            chooseCompressionMethod(current_uri, compression_method));
-    }
-    else
-    {
-        if (is_path_with_globs)
-            throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "URI '{}' contains globs, so the table is in readonly mode", uris.back());
-
-        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_uri);
-
-        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context_->getGlobalContext()->getConfigRef());
-        HDFSFSPtr fs = createHDFSFS(builder.get());
-
-        bool truncate_on_insert = context_->getSettingsRef().hdfs_truncate_on_insert;
-        if (!truncate_on_insert && !hdfsExists(fs.get(), path_from_uri.c_str()))
-        {
-            if (context_->getSettingsRef().hdfs_create_new_file_on_insert)
-            {
-                auto pos = uris[0].find_first_of('.', uris[0].find_last_of('/'));
-                size_t index = uris.size();
-                String new_uri;
-                do
-                {
-                    new_uri = uris[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : uris[0].substr(pos));
-                    ++index;
-                }
-                while (!hdfsExists(fs.get(), new_uri.c_str()));
-                uris.push_back(new_uri);
-                current_uri = new_uri;
-            }
-            else
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "File with path {} already exists. If you want to overwrite it, enable setting hdfs_truncate_on_insert, "
-                    "if you want to create new file on each insert, enable setting hdfs_create_new_file_on_insert",
-                    path_from_uri);
-        }
-
-        return std::make_shared<HDFSSink>(current_uri,
-            format_name,
-            metadata_snapshot->getSampleBlock(),
-            context_,
-            chooseCompressionMethod(current_uri, compression_method));
-    }
-}
-
-void StorageHDFS::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &)
-{
-    const size_t begin_of_path = uris[0].find('/', uris[0].find("//") + 2);
-    const String url = uris[0].substr(0, begin_of_path);
-
-    HDFSBuilderWrapper builder = createHDFSBuilder(url + "/", local_context->getGlobalContext()->getConfigRef());
-    auto fs = createHDFSFS(builder.get());
-
-    for (const auto & uri : uris)
-    {
-        const String path = uri.substr(begin_of_path);
-        int ret = hdfsDelete(fs.get(), path.data(), 0);
-        if (ret)
-            throw Exception(ErrorCodes::ACCESS_DENIED, "Unable to truncate hdfs table: {}", std::string(hdfsGetLastError()));
-    }
-}
-
-
-void registerStorageHDFS(StorageFactory & factory)
-{
-    factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args)
-    {
-        ASTs & engine_args = args.engine_args;
-
-        if (engine_args.empty() || engine_args.size() > 3)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Storage HDFS requires 1, 2 or 3 arguments: "
-                            "url, name of used format (taken from file extension by default) and optional compression method.");
-
-        engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext());
-
-        String url = checkAndGetLiteralArgument<String>(engine_args[0], "url");
-
-        String format_name = "auto";
-        if (engine_args.size() > 1)
-        {
-            engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
-            format_name = checkAndGetLiteralArgument<String>(engine_args[1], "format_name");
-        }
-
-        if (format_name == "auto")
-            format_name = FormatFactory::instance().tryGetFormatFromFileName(url).value_or("auto");
-
-        String compression_method;
-        if (engine_args.size() == 3)
-        {
-            engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.getLocalContext());
-            compression_method = checkAndGetLiteralArgument<String>(engine_args[2], "compression_method");
-        } else compression_method = "auto";
-
-        ASTPtr partition_by;
-        if (args.storage_def->partition_by)
-            partition_by = args.storage_def->partition_by->clone();
-
-        return std::make_shared<StorageHDFS>(
-            url, args.table_id, format_name, args.columns, args.constraints, args.comment, args.getContext(), compression_method, false, partition_by);
-    },
-    {
-        .supports_sort_order = true, // for partition by
-        .supports_schema_inference = true,
-        .source_access_type = AccessType::HDFS,
-    });
-}
-
-NamesAndTypesList StorageHDFS::getVirtuals() const
-{
-    return virtual_columns;
-}
-
-Names StorageHDFS::getVirtualColumnNames()
-{
-    return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
-}
-
-SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
-{
-    static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
-}
-
-}
-
-#endif
diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h
index 0f9d3364ffd..81c167165d3 100644
--- a/src/Storages/Hive/HiveCommon.h
+++ b/src/Storages/Hive/HiveCommon.h
@@ -12,7 +12,7 @@
 #include <base/types.h>
 #include <Common/CacheBase.h>
 #include <Common/PoolBase.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Storages/Hive/HiveFile.h>
 
 
diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h
index 1f5e31f1d54..affb72fe09b 100644
--- a/src/Storages/Hive/HiveFile.h
+++ b/src/Storages/Hive/HiveFile.h
@@ -14,7 +14,7 @@
 #include <Core/Block.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/Hive/HiveSettings.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
 
 namespace orc
 {
diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index 183a4532281..a76cef2d45d 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -38,8 +38,8 @@
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <Processors/Sources/NullSource.h>
 #include <Storages/AlterCommands.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
-#include <Storages/HDFS/AsynchronousReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h>
 #include <Storages/Hive/HiveSettings.h>
 #include <Storages/Hive/StorageHiveMetadata.h>
 #include <Storages/MergeTree/KeyCondition.h>
diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h
index 07440097f7a..43a22a886a8 100644
--- a/src/Storages/Hive/StorageHive.h
+++ b/src/Storages/Hive/StorageHive.h
@@ -9,7 +9,7 @@
 
 #include <Interpreters/Context.h>
 #include <Storages/IStorage.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Storages/Hive/HiveCommon.h>
 #include <Storages/Hive/HiveFile.h>
 
diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp
similarity index 99%
rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp
index 6b6151f5474..21df7e35284 100644
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.cpp
@@ -1,9 +1,9 @@
 #include "AsynchronousReadBufferFromHDFS.h"
 
 #if USE_HDFS
+#include "ReadBufferFromHDFS.h"
 #include <mutex>
 #include <Common/logger_useful.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
 #include <IO/AsynchronousReader.h>
 
diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h
similarity index 96%
rename from src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
rename to src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h
index 10e2749fd4a..5aef92315a4 100644
--- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h
+++ b/src/Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h
@@ -13,7 +13,7 @@
 #include <IO/ReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/SeekableReadBuffer.h>
-#include <Storages/HDFS/ReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
 #include <Interpreters/Context.h>
 
 namespace DB
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index a64faafd53d..6c7fe1cef7e 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -1,7 +1,7 @@
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 
 #if USE_HDFS
-#include <Storages/HDFS/HDFSCommon.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
 #include <Interpreters/Context.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Parsers/IAST.h>
diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp
similarity index 99%
rename from src/Storages/HDFS/HDFSCommon.cpp
rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp
index f9a55a1285a..5d14cec14bd 100644
--- a/src/Storages/HDFS/HDFSCommon.cpp
+++ b/src/Storages/ObjectStorage/HDFS/HDFSCommon.cpp
@@ -1,4 +1,4 @@
-#include <Storages/HDFS/HDFSCommon.h>
+#include "HDFSCommon.h"
 #include <Poco/URI.h>
 #include <boost/algorithm/string/replace.hpp>
 #include <filesystem>
diff --git a/src/Storages/HDFS/HDFSCommon.h b/src/Storages/ObjectStorage/HDFS/HDFSCommon.h
similarity index 100%
rename from src/Storages/HDFS/HDFSCommon.h
rename to src/Storages/ObjectStorage/HDFS/HDFSCommon.h
diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
similarity index 99%
rename from src/Storages/HDFS/ReadBufferFromHDFS.cpp
rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
index 4df05d47003..18b22805dfc 100644
--- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
@@ -1,7 +1,7 @@
 #include "ReadBufferFromHDFS.h"
 
 #if USE_HDFS
-#include <Storages/HDFS/HDFSCommon.h>
+#include "HDFSCommon.h"
 #include <Common/Scheduler/ResourceGuard.h>
 #include <IO/Progress.h>
 #include <Common/Throttler.h>
diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h
similarity index 100%
rename from src/Storages/HDFS/ReadBufferFromHDFS.h
rename to src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h
diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp
similarity index 97%
rename from src/Storages/HDFS/WriteBufferFromHDFS.cpp
rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp
index 9d383aa8245..2c14b38ce01 100644
--- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.cpp
@@ -2,8 +2,8 @@
 
 #if USE_HDFS
 
-#include <Storages/HDFS/WriteBufferFromHDFS.h>
-#include <Storages/HDFS/HDFSCommon.h>
+#include "WriteBufferFromHDFS.h"
+#include "HDFSCommon.h"
 #include <Common/Scheduler/ResourceGuard.h>
 #include <Common/Throttler.h>
 #include <Common/safe_cast.h>
diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h
similarity index 100%
rename from src/Storages/HDFS/WriteBufferFromHDFS.h
rename to src/Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h
diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp
index 4f6aed8ef65..1c47a07ba58 100644
--- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp
+++ b/src/Storages/examples/async_read_buffer_from_hdfs.cpp
@@ -9,7 +9,7 @@
 #include <Disks/IO/getThreadPoolReader.h>
 #include <Interpreters/Context.h>
 #include <Common/Config/ConfigProcessor.h>
-#include <Storages/HDFS/AsynchronousReadBufferFromHDFS.h>
+#include <Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h>
 
 int main()
 {

From cfb73dd30781c95261a02dfb3443f6a18273612b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 18 Mar 2024 13:54:23 +0100
Subject: [PATCH 050/392] Move input_format_tsv_crlf_end_of_line to 24.3
 settings changes

---
 src/Core/SettingsChangesHistory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index b14953fd706..5ce98a92003 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -86,6 +86,7 @@ namespace SettingsChangesHistory
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
     {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
+              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
               {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
               {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},
@@ -109,7 +110,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
               {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
               {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"},
-              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"format_template_row_format", "", "", "Template row format string can be set directly in query"},
               {"format_template_resultset_format", "", "", "Template result set format string can be set in query"},
               {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"},

From e019b3a391bb8e3bbfa991e083e65e76438a2a9e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 25 Mar 2024 16:12:39 +0100
Subject: [PATCH 051/392] Fix build after merge

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |  2 +-
 .../IO/WriteBufferFromAzureBlobStorage.h      |  2 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  | 12 +--
 .../ObjectStorage/AzureBlob/Configuration.cpp | 73 +++++++++++--------
 .../ObjectStorage/AzureBlob/Configuration.h   |  2 +-
 .../DataLakes/DeltaLakeMetadata.cpp           |  1 +
 .../ObjectStorage/HDFS/Configuration.cpp      |  8 +-
 .../ObjectStorage/S3/Configuration.cpp        |  2 +
 .../StorageObjectStorageConfiguration.cpp     | 10 +++
 .../StorageObjectStorageConfiguration.h       |  4 +
 src/Storages/S3Queue/S3QueueTableMetadata.cpp |  1 -
 src/Storages/S3Queue/S3QueueTableMetadata.h   |  1 +
 12 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index dc4a825189f..8a3ff1c3b5e 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    auto client_ptr = configuration.createClient(/* is_read_only */ false);
+    auto client_ptr = configuration.createClient(/* is_read_only */ false, /* attempt_to_create_container */true);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
                                                           std::move(client_ptr),
                                                           configuration.createSettings(context_),
diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
index 6e10c07b255..dbf0b2a3052 100644
--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
@@ -13,7 +13,7 @@
 #include <azure/core/io/body_stream.hpp>
 #include <Common/ThreadPoolTaskTracker.h>
 #include <Common/BufferAllocationPolicy.h>
-#include <Storages/StorageAzureBlob.h>
+#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
 
 namespace Poco
 {
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 13d4c2a551b..872f7eec07b 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -69,10 +69,6 @@ std::unique_ptr<S3::Client> getClient(
     {
         String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
 
-        if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region"))
-            throw Exception(
-                ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
-
         url = S3::URI(endpoint);
         if (!url.key.ends_with('/'))
             url.key.push_back('/');
@@ -83,6 +79,12 @@ std::unique_ptr<S3::Client> getClient(
             throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed");
         url = *url_;
     }
+    const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint);
+    if (is_s3_express_bucket && !config.has(config_prefix + ".region"))
+    {
+        throw Exception(
+            ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
+    }
 
     S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
         auth_settings.region,
@@ -130,7 +132,7 @@ std::unique_ptr<S3::Client> getClient(
         .use_virtual_addressing = url.is_virtual_hosted_style,
         .disable_checksum = local_settings.s3_disable_checksum,
         .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
-        .is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint),
+        .is_s3express_bucket = is_s3_express_bucket,
     };
 
     auto credentials_configuration = S3::CredentialsConfiguration
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 7a670441e72..018cec51e7c 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -10,6 +10,7 @@
 #include <Formats/FormatFactory.h>
 #include <azure/storage/blobs.hpp>
 #include <Interpreters/evaluateConstantExpression.h>
+#include <azure/identity/managed_identity_credential.hpp>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTFunction.h>
 
@@ -47,7 +48,8 @@ namespace
         return !candidate.starts_with("http");
     }
 
-    bool containerExists(Azure::Storage::Blobs::BlobServiceClient & blob_service_client, std::string container_name)
+    template <typename T>
+    bool containerExists(T & blob_service_client, const std::string & container_name)
     {
         Azure::Storage::Blobs::ListBlobContainersOptions options;
         options.Prefix = container_name;
@@ -101,12 +103,13 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co
 
 ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
-    auto client = createClient(is_readonly);
+    assertInitialized();
+    auto client = createClient(is_readonly, /* attempt_to_create_container */true);
     auto settings = createSettings(context);
     return std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings), container);
 }
 
-AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
+AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bool attempt_to_create_container)
 {
     using namespace Azure::Storage::Blobs;
 
@@ -114,28 +117,32 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
 
     if (is_connection_string)
     {
-        auto blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
+        std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
+        std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
         result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(connection_url, container));
-        bool container_exists = containerExists(*blob_service_client, container);
 
-        if (!container_exists)
+        if (attempt_to_create_container)
         {
-            if (is_read_only)
-                throw Exception(
-                    ErrorCodes::BAD_ARGUMENTS,
-                    "AzureBlobStorage container does not exist '{}'",
-                    container);
+            bool container_exists = containerExists(*blob_service_client, container);
+            if (!container_exists)
+            {
+                if (is_read_only)
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "AzureBlobStorage container does not exist '{}'",
+                        container);
 
-            try
-            {
-                result->CreateIfNotExists();
-            }
-            catch (const Azure::Storage::StorageException & e)
-            {
-                if (e.StatusCode != Azure::Core::Http::HttpStatusCode::Conflict
-                    || e.ReasonPhrase != "The specified container already exists.")
+                try
                 {
-                    throw;
+                    result->CreateIfNotExists();
+                }
+                catch (const Azure::Storage::StorageException & e)
+                {
+                    if (!(e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
+                        && e.ReasonPhrase == "The specified container already exists."))
+                    {
+                        throw;
+                    }
                 }
             }
         }
@@ -145,22 +152,22 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
         std::shared_ptr<Azure::Storage::StorageSharedKeyCredential> storage_shared_key_credential;
         if (account_name.has_value() && account_key.has_value())
         {
-            storage_shared_key_credential =
-                std::make_shared<Azure::Storage::StorageSharedKeyCredential>(*account_name, *account_key);
+            storage_shared_key_credential
+                = std::make_shared<Azure::Storage::StorageSharedKeyCredential>(*account_name, *account_key);
         }
 
         std::unique_ptr<BlobServiceClient> blob_service_client;
+        std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential;
         if (storage_shared_key_credential)
         {
             blob_service_client = std::make_unique<BlobServiceClient>(connection_url, storage_shared_key_credential);
         }
         else
         {
-            blob_service_client = std::make_unique<BlobServiceClient>(connection_url);
+            managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
+            blob_service_client = std::make_unique<BlobServiceClient>(connection_url, managed_identity_credential);
         }
 
-        bool container_exists = containerExists(*blob_service_client, container);
-
         std::string final_url;
         size_t pos = connection_url.find('?');
         if (pos != std::string::npos)
@@ -173,12 +180,21 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
             final_url
                 = connection_url + (connection_url.back() == '/' ? "" : "/") + container;
 
+        if (!attempt_to_create_container)
+        {
+            if (storage_shared_key_credential)
+                return std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
+            else
+                return std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
+        }
+
+        bool container_exists = containerExists(*blob_service_client, container);
         if (container_exists)
         {
             if (storage_shared_key_credential)
                 result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
             else
-                result = std::make_unique<BlobContainerClient>(final_url);
+                result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
         }
         else
         {
@@ -190,8 +206,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
             try
             {
                 result = std::make_unique<BlobContainerClient>(blob_service_client->CreateBlobContainer(container).Value);
-            }
-            catch (const Azure::Storage::StorageException & e)
+            } catch (const Azure::Storage::StorageException & e)
             {
                 if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict
                       && e.ReasonPhrase == "The specified container already exists.")
@@ -199,7 +214,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only)
                     if (storage_shared_key_credential)
                         result = std::make_unique<BlobContainerClient>(final_url, storage_shared_key_credential);
                     else
-                        result = std::make_unique<BlobContainerClient>(final_url);
+                        result = std::make_unique<BlobContainerClient>(final_url, managed_identity_credential);
                 }
                 else
                 {
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
index 3d701e72cb4..8040d433d99 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
@@ -52,7 +52,7 @@ protected:
     std::string blob_path;
     std::vector<String> blobs_paths;
 
-    AzureClientPtr createClient(bool is_read_only);
+    AzureClientPtr createClient(bool is_read_only, bool attempt_to_create_container);
     AzureObjectStorage::SettingsPtr createSettings(ContextPtr local_context);
 };
 
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 903558b73ab..1caa2c000d6 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -11,6 +11,7 @@
 #include <Formats/FormatFactory.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnNullable.h>
+#include <Common/logger_useful.h>
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <boost/algorithm/string/case_conv.hpp>
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 848fdb292e8..03a0a1a5e69 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -29,12 +29,14 @@ void StorageHDFSConfiguration::check(ContextPtr context) const
     checkHDFSURL(fs::path(url) / path);
 }
 
-ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
 {
-    UNUSED(is_readonly);
-    auto settings = std::make_unique<HDFSObjectStorageSettings>();
+    assertInitialized();
+
     if (!url.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS url is empty");
+
+    auto settings = std::make_unique<HDFSObjectStorageSettings>();
     return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
 }
 
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 0c05f77541b..4e6d8980aa7 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -66,6 +66,8 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot
 
 ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
 {
+    assertInitialized();
+
     const auto & config = context->getConfigRef();
     const std::string config_prefix = "s3.";
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 8a4dee2c31b..6172f8934af 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -18,6 +18,8 @@ void StorageObjectStorageConfiguration::initialize(
     // FIXME: it should be - if (format == "auto" && get_format_from_file)
     if (configuration.format == "auto")
         configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
+
+    configuration.initialized = true;
 }
 
 StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other)
@@ -48,4 +50,12 @@ std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const
     return getPath().substr(0, getPath().find_first_of("*?{"));
 }
 
+void StorageObjectStorageConfiguration::assertInitialized() const
+{
+    if (!initialized)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage");
+    }
+}
+
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 8134bd07806..66fe6a68d76 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -54,6 +54,10 @@ public:
 protected:
     virtual void fromNamedCollection(const NamedCollection & collection) = 0;
     virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
+
+    void assertInitialized() const;
+
+    bool initialized = false;
 };
 
 using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
index e1978259230..8354e6aa2ae 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
@@ -7,7 +7,6 @@
 #include <Poco/JSON/Parser.h>
 #include <Storages/S3Queue/S3QueueSettings.h>
 #include <Storages/S3Queue/S3QueueTableMetadata.h>
-#include <Storages/StorageInMemoryMetadata.h>
 
 
 namespace DB
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index a649f211abc..2158b189070 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -3,6 +3,7 @@
 #if USE_AWS_S3
 
 #include <Storages/S3Queue/S3QueueSettings.h>
+#include <Storages/StorageInMemoryMetadata.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <base/types.h>
 

From f5982fdb1ff30280dfebd89afb9274fca33c56b6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 25 Mar 2024 19:19:54 +0100
Subject: [PATCH 052/392] Fix some tests

---
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   | 16 ++-----
 .../ObjectStorages/ObjectStorageFactory.cpp   |  3 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  2 -
 .../ObjectStorage/HDFS/Configuration.cpp      | 48 +++++++++++++------
 .../ObjectStorage/HDFS/Configuration.h        |  6 +--
 .../ObjectStorage/ReadBufferIterator.cpp      |  4 +-
 .../ObjectStorage/S3/Configuration.cpp        |  6 +++
 src/Storages/ObjectStorage/S3/Configuration.h |  2 +
 .../ObjectStorage/StorageObjectStorage.cpp    |  2 +
 .../ObjectStorage/StorageObjectStorage.h      |  2 +
 .../StorageObjectStorageConfiguration.cpp     |  4 ++
 .../StorageObjectStorageConfiguration.h       |  2 +
 .../StorageObjectStorageSink.cpp              | 40 ++++++++++++++--
 .../ObjectStorage/StorageObjectStorageSink.h  |  3 ++
 src/Storages/StorageS3Settings.cpp            |  2 +-
 .../queries/0_stateless/02114_hdfs_bad_url.sh |  1 -
 .../0_stateless/02700_s3_part_INT_MAX.sh      |  2 +-
 ...ed_url_and_url_with_special_characters.sql |  3 +-
 18 files changed, 104 insertions(+), 44 deletions(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index 4072d21ed7c..f92e160fd4d 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -16,21 +16,13 @@ namespace DB
 
 struct HDFSObjectStorageSettings
 {
-
-    HDFSObjectStorageSettings() = default;
-
-    size_t min_bytes_for_seek;
-    int objects_chunk_size_to_delete;
-    int replication;
-
-    HDFSObjectStorageSettings(
-            int min_bytes_for_seek_,
-            int objects_chunk_size_to_delete_,
-            int replication_)
+    HDFSObjectStorageSettings(int min_bytes_for_seek_, int replication_)
         : min_bytes_for_seek(min_bytes_for_seek_)
-        , objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
         , replication(replication_)
     {}
+
+    size_t min_bytes_for_seek;
+    int replication;
 };
 
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index f30a552f8dd..67e38d6389a 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -227,9 +227,8 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
         if (uri.back() != '/')
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri);
 
-        std::unique_ptr<HDFSObjectStorageSettings> settings = std::make_unique<HDFSObjectStorageSettings>(
+        auto settings = std::make_unique<HDFSObjectStorageSettings>(
             config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
-            config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
             context->getSettingsRef().hdfs_replication
         );
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 872f7eec07b..1aecb590526 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -143,8 +143,6 @@ std::unique_ptr<S3::Client> getClient(
         auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
     };
 
-    LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: {} - {}", auth_settings.access_key_id, auth_settings.secret_access_key);
-
     return S3::ClientFactory::instance().create(
         client_configuration,
         client_settings,
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 03a0a1a5e69..5edc660d717 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -6,6 +6,7 @@
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Parsers/IAST.h>
 #include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
+#include <Interpreters/evaluateConstantExpression.h>
 #include <Formats/FormatFactory.h>
 
 namespace DB
@@ -13,6 +14,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
+    extern const int NOT_IMPLEMENTED;
 }
 
 StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)
@@ -29,37 +31,53 @@ void StorageHDFSConfiguration::check(ContextPtr context) const
     checkHDFSURL(fs::path(url) / path);
 }
 
-ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
+ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
+    ContextPtr context,
+    bool /* is_readonly */)
 {
     assertInitialized();
-
-    if (!url.empty())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS url is empty");
-
-    auto settings = std::make_unique<HDFSObjectStorageSettings>();
-    return std::make_shared<HDFSObjectStorage>(url, std::move(settings), context->getConfigRef());
+    const auto & settings = context->getSettingsRef();
+    auto hdfs_settings = std::make_unique<HDFSObjectStorageSettings>(
+        settings.remote_read_min_bytes_for_seek,
+        settings.hdfs_replication
+    );
+    return std::make_shared<HDFSObjectStorage>(url, std::move(hdfs_settings), context->getConfigRef());
 }
 
-void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr, bool /* with_structure */)
+void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /* with_structure */)
 {
     url = checkAndGetLiteralArgument<String>(args[0], "url");
 
     if (args.size() > 1)
+    {
+        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context);
         format = checkAndGetLiteralArgument<String>(args[1], "format_name");
-    else
-        format = "auto";
+    }
 
     if (args.size() == 3)
+    {
+        args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
         compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
-    else
-        compression_method = "auto";
+    }
 
-    const size_t begin_of_path = url.find('/', url.find("//") + 2);
-    path = url.substr(begin_of_path + 1);
-    url = url.substr(0, begin_of_path);
+    auto pos = url.find("//");
+    if (pos == std::string::npos)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url);
+
+    pos = url.find('/', pos + 2);
+    if (pos == std::string::npos)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url);
+
+    path = url.substr(pos + 1);
+    url = url.substr(0, pos);
     paths = {path};
 }
 
+void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection &)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fromNamedColection() is not implemented");
+}
+
 }
 
 #endif
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 1013c2e00c2..5765edbf36c 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -29,12 +29,12 @@ public:
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
-    void fromNamedCollection(const NamedCollection &) override {}
-    void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override;
-
     static void addStructureToArgs(ASTs &, const String &, ContextPtr) {}
 
 private:
+    void fromNamedCollection(const NamedCollection &) override;
+    void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override;
+
     String url;
     String path;
     std::vector<String> paths;
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index dd4bfe79b06..0b6e34fb831 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -31,7 +31,7 @@ ReadBufferIterator::ReadBufferIterator(
     , query_settings(query_settings_)
     , schema_cache(schema_cache_)
     , read_keys(read_keys_)
-    , format(configuration->format.empty() || configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))
+    , format(configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))
     , prev_read_keys_size(read_keys_.size())
 {
 }
@@ -191,7 +191,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         {
             if (first)
             {
-                if (format)
+                if (format.has_value())
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
                         "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 4e6d8980aa7..132a5045d8a 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -51,10 +51,16 @@ String StorageS3Configuration::getDataSourceDescription()
 
 void StorageS3Configuration::check(ContextPtr context) const
 {
+    validateNamespace(url.bucket);
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri);
     context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast);
 }
 
+void StorageS3Configuration::validateNamespace(const String & name) const
+{
+    S3::URI::validateBucket(name, {});
+}
+
 StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other)
     : StorageObjectStorageConfiguration(other)
 {
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index 88a084f29b3..f9614da4b95 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -27,6 +27,8 @@ public:
     String getDataSourceDescription() override;
 
     void check(ContextPtr context) const override;
+    void validateNamespace(const String & name) const override;
+
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
     bool isStaticConfiguration() const override { return static_configuration; }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index f1d3635514f..3a894af3e01 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -87,6 +87,7 @@ StorageObjectStorage<StorageSettings>::StorageObjectStorage(
     , format_settings(format_settings_)
     , partition_by(partition_by_)
     , distributed_processing(distributed_processing_)
+    , log(getLogger("Storage" + engine_name_))
     , object_storage(object_storage_)
     , configuration(configuration_)
 {
@@ -204,6 +205,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 
         if (partition_by_ast)
         {
+            LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath());
             return std::make_shared<PartitionedStorageObjectStorageSink>(
                 object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
         }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 743b725a88a..ebaf504f532 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -3,6 +3,7 @@
 #include <Common/re2.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/threadPoolCallbackRunner.h>
+#include <Common/logger_useful.h>
 #include <Storages/IStorage.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Processors/Formats/IInputFormat.h>
@@ -113,6 +114,7 @@ protected:
     const ASTPtr partition_by;
     const bool distributed_processing;
 
+    LoggerPtr log;
     ObjectStoragePtr object_storage;
     ConfigurationPtr configuration;
     std::mutex configuration_update_mutex;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 6172f8934af..9a8b8191907 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -1,5 +1,6 @@
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Formats/FormatFactory.h>
+#include <Common/logger_useful.h>
 
 namespace DB
 {
@@ -18,7 +19,10 @@ void StorageObjectStorageConfiguration::initialize(
     // FIXME: it should be - if (format == "auto" && get_format_from_file)
     if (configuration.format == "auto")
         configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
+    else
+        FormatFactory::instance().checkFormatName(configuration.format);
 
+    configuration.check(local_context);
     configuration.initialized = true;
 }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 66fe6a68d76..0beed91b128 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -43,6 +43,8 @@ public:
     std::string getPathWithoutGlob() const;
 
     virtual void check(ContextPtr context) const = 0;
+    virtual void validateNamespace(const String & /* name */) const {}
+
     virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
     virtual StorageObjectStorageConfigurationPtr clone() = 0;
     virtual bool isStaticConfiguration() const { return true; }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 37f93a2b82f..2dd8516ebe8 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -1,9 +1,14 @@
 #include "StorageObjectStorageSink.h"
 #include <Formats/FormatFactory.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Common/isValidUTF8.h>
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int CANNOT_PARSE_TEXT;
+}
 
 StorageObjectStorageSink::StorageObjectStorageSink(
     ObjectStoragePtr object_storage,
@@ -93,6 +98,7 @@ void StorageObjectStorageSink::release()
     write_buf->finalize();
 }
 
+
 PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     ObjectStoragePtr object_storage_,
     StorageObjectStorageConfigurationPtr configuration_,
@@ -111,9 +117,12 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
 
 SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String & partition_id)
 {
-    auto blob = configuration->getPaths().back();
-    auto partition_key = replaceWildcards(blob, partition_id);
-    validatePartitionKey(partition_key, true);
+    auto partition_bucket = replaceWildcards(configuration->getNamespace(), partition_id);
+    validateNamespace(partition_bucket);
+
+    auto partition_key = replaceWildcards(configuration->getPath(), partition_id);
+    validateKey(partition_key);
+
     return std::make_shared<StorageObjectStorageSink>(
         object_storage,
         configuration,
@@ -124,4 +133,29 @@ SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String
     );
 }
 
+void PartitionedStorageObjectStorageSink::validateKey(const String & str)
+{
+    /// See:
+    /// - https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
+    /// - https://cloud.ibm.com/apidocs/cos/cos-compatibility#putobject
+
+    if (str.empty() || str.size() > 1024)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect key length (not empty, max 1023 characters), got: {}", str.size());
+
+    if (!UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
+        throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in key");
+
+    validatePartitionKey(str, true);
+}
+
+void PartitionedStorageObjectStorageSink::validateNamespace(const String & str)
+{
+    configuration->validateNamespace(str);
+
+    if (!UTF8::isValidUTF8(reinterpret_cast<const UInt8 *>(str.data()), str.size()))
+        throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Incorrect non-UTF8 sequence in bucket name");
+
+    validatePartitionKey(str, false);
+}
+
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index 14298376d0e..a352e2c66a3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -54,6 +54,9 @@ public:
     SinkPtr createSinkForPartition(const String & partition_id) override;
 
 private:
+    void validateKey(const String & str);
+    void validateNamespace(const String & str);
+
     ObjectStoragePtr object_storage;
     StorageObjectStorageConfigurationPtr configuration;
     const std::optional<FormatSettings> format_settings;
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 3eff6e0f6c9..e8f32388b1b 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -108,7 +108,7 @@ void S3Settings::RequestSettings::PartUploadSettings::validate()
     if (max_upload_part_size > max_upload_part_size_limit)
         throw Exception(
             ErrorCodes::INVALID_SETTING_VALUE,
-            "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}",
+            "Setting max_upload_part_size has invalid value {} which is greater than the s3 API limit {}",
             ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit));
 
     if (max_single_part_upload_size > max_upload_part_size_limit)
diff --git a/tests/queries/0_stateless/02114_hdfs_bad_url.sh b/tests/queries/0_stateless/02114_hdfs_bad_url.sh
index 22975dddf6f..5bd5610a9f0 100755
--- a/tests/queries/0_stateless/02114_hdfs_bad_url.sh
+++ b/tests/queries/0_stateless/02114_hdfs_bad_url.sh
@@ -23,4 +23,3 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs1:9000/data', 'CSV', 'x UInt32')"
 $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL';
 $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('http://hdfs1:9000/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL';
 $CLICKHOUSE_CLIENT -q "SELECT * FROM hdfs('hdfs://hdfs1@nameservice/abcd/data', 'CSV', 'x UInt32')" 2>&1 | grep -F -q "HDFS_ERROR" && echo 'OK' || echo 'FAIL';
-
diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
index d831c7d9806..a34a480a078 100755
--- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
+++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh
@@ -13,7 +13,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 $CLICKHOUSE_CLIENT -nm -q "
     INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV')
     SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024)
-    SETTINGS s3_max_single_part_upload_size = '10Gi';
+    SETTINGS s3_max_single_part_upload_size = '5Gi';
 
     SELECT count() FROM s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv');
 "
diff --git a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql
index da76a5cb88f..1e99eb8b83d 100644
--- a/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql
+++ b/tests/queries/0_stateless/02873_s3_presigned_url_and_url_with_special_characters.sql
@@ -2,5 +2,4 @@
 
 select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/BU%20-%20UNIT%20-%201/*.parquet'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 
-select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
-
+select * from s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/MyPrefix/*.parquet?some_tocken=ABCD'); -- { serverError CANNOT_DETECT_FORMAT }

From cb97f8dab52aeaf492530d66a8553c422ffbcebd Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 25 Mar 2024 19:22:20 +0100
Subject: [PATCH 053/392] Fix style check

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp           | 1 -
 .../ObjectStorage/StorageObjectStorageConfiguration.cpp       | 4 ++++
 src/Storages/ObjectStorage/StorageObjectStorageSink.cpp       | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 3a894af3e01..8d85224cff0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -27,7 +27,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
     extern const int DATABASE_ACCESS_DENIED;
     extern const int NOT_IMPLEMENTED;
-
 }
 
 template <typename StorageSettings>
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 9a8b8191907..1d5c0cd3a39 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -4,6 +4,10 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
 
 void StorageObjectStorageConfiguration::initialize(
     StorageObjectStorageConfiguration & configuration,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 2dd8516ebe8..cf1c583ca62 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -8,6 +8,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int CANNOT_PARSE_TEXT;
+    extern const int BAD_ARGUMENTS;
 }
 
 StorageObjectStorageSink::StorageObjectStorageSink(

From 7a991de488567a255086a14faa830e1ba1610924 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 27 Mar 2024 19:06:19 +0100
Subject: [PATCH 054/392] Fix tests

---
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 68 ++++++++++++++--
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |  2 +
 .../ObjectStorages/ObjectStorageIterator.h    | 24 +++---
 .../ObjectStorageIteratorAsync.cpp            | 12 +++
 .../ObjectStorageIteratorAsync.h              | 15 ++--
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 23 ++++--
 .../ObjectStorage/HDFS/Configuration.cpp      | 81 +++++++++++++++----
 .../ObjectStorage/HDFS/Configuration.h        |  3 +
 .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp | 17 ++--
 .../ReadFromStorageObjectStorage.cpp          |  4 +-
 .../ObjectStorage/StorageObjectStorage.cpp    |  4 +-
 .../StorageObjectStorageCluster.cpp           |  3 +-
 .../StorageObjectStorageConfiguration.cpp     |  3 +-
 .../StorageObjectStorageConfiguration.h       |  2 +-
 .../StorageObjectStorageQuerySettings.h       |  4 +
 .../StorageObjectStorageSource.cpp            | 56 ++++++++++---
 .../StorageObjectStorageSource.h              | 31 ++++---
 src/Storages/S3Queue/S3QueueSource.cpp        |  5 +-
 src/Storages/S3Queue/S3QueueSource.h          |  2 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |  2 +-
 tests/integration/test_storage_hdfs/test.py   |  4 +-
 21 files changed, 279 insertions(+), 86 deletions(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 2d03de60c3c..db79ff365aa 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -7,6 +7,7 @@
 #include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Common/getRandomASCIIString.h>
+#include <Common/logger_useful.h>
 
 
 #if USE_HDFS
@@ -18,6 +19,7 @@ namespace ErrorCodes
 {
     extern const int UNSUPPORTED_METHOD;
     extern const int HDFS_ERROR;
+    extern const int ACCESS_DENIED;
 }
 
 void HDFSObjectStorage::shutdown()
@@ -48,7 +50,7 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    return std::make_unique<ReadBufferFromHDFS>(object.remote_path, object.remote_path, config, patchSettings(read_settings));
+    return std::make_unique<ReadBufferFromHDFS>(hdfs_root_path, object.remote_path, config, patchSettings(read_settings));
 }
 
 std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLINT
@@ -62,12 +64,12 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
         [this, disk_read_settings]
         (bool /* restricted_seek */, const std::string & path) -> std::unique_ptr<ReadBufferFromFileBase>
     {
-        size_t begin_of_path = path.find('/', path.find("//") + 2);
-        auto hdfs_path = path.substr(begin_of_path);
-        auto hdfs_uri = path.substr(0, begin_of_path);
+        // size_t begin_of_path = path.find('/', path.find("//") + 2);
+        // auto hdfs_path = path.substr(begin_of_path);
+        // auto hdfs_uri = path.substr(0, begin_of_path);
 
         return std::make_unique<ReadBufferFromHDFS>(
-            hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
+            hdfs_root_path, path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
     };
 
     return std::make_unique<ReadBufferFromRemoteFSGather>(
@@ -131,7 +133,8 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
 {
     auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
     if (!file_info)
-        throw Exception(ErrorCodes::HDFS_ERROR, "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
+        throw Exception(ErrorCodes::HDFS_ERROR,
+                        "Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
 
     ObjectMetadata metadata;
     metadata.size_bytes = static_cast<size_t>(file_info->mSize);
@@ -141,6 +144,54 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
     return metadata;
 }
 
+void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
+{
+    auto * log = &Poco::Logger::get("HDFSObjectStorage");
+    LOG_TRACE(log, "Trying to list files for {}", path);
+
+    HDFSFileInfo ls;
+    ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length);
+
+    if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
+    {
+        // ignore file not found exception, keep throw other exception,
+        // libhdfs3 doesn't have function to get exception type, so use errno.
+        throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}",
+                        path, String(hdfsGetLastError()));
+    }
+
+    if (!ls.file_info && ls.length > 0)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
+    }
+
+    LOG_TRACE(log, "Listed {} files for {}", ls.length, path);
+
+    for (int i = 0; i < ls.length; ++i)
+    {
+        const String file_path = fs::path(ls.file_info[i].mName).lexically_normal();
+        const size_t last_slash = file_path.rfind('/');
+        const String file_name = file_path.substr(last_slash);
+
+        const bool is_directory = ls.file_info[i].mKind == 'D';
+        if (is_directory)
+        {
+            listObjects(fs::path(file_path) / "", children, max_keys);
+        }
+        else
+        {
+            LOG_TEST(log, "Found file: {}", file_path);
+
+            children.emplace_back(std::make_shared<RelativePathWithMetadata>(
+                String(file_path),
+                ObjectMetadata{
+                    static_cast<uint64_t>(ls.file_info[i].mSize),
+                    Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod),
+                    {}}));
+        }
+    }
+}
+
 void HDFSObjectStorage::copyObject( /// NOLINT
     const StoredObject & object_from,
     const StoredObject & object_to,
@@ -160,7 +211,10 @@ void HDFSObjectStorage::copyObject( /// NOLINT
 }
 
 
-std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr)
+std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(
+    const std::string &,
+    const Poco::Util::AbstractConfiguration &,
+    const std::string &, ContextPtr)
 {
     throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning");
 }
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index f92e160fd4d..24642ec635a 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -92,6 +92,8 @@ public:
         const WriteSettings & write_settings,
         std::optional<ObjectAttributes> object_to_attributes = {}) override;
 
+    void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
+
     void shutdown() override;
 
     void startup() override;
diff --git a/src/Disks/ObjectStorages/ObjectStorageIterator.h b/src/Disks/ObjectStorages/ObjectStorageIterator.h
index e934fc2056d..26c3c690ba5 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIterator.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIterator.h
@@ -27,9 +27,7 @@ class ObjectStorageIteratorFromList : public IObjectStorageIterator
 public:
     explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
         : batch(std::move(batch_))
-        , batch_iterator(batch.begin())
-    {
-    }
+        , batch_iterator(batch.begin()) {}
 
     void next() override
     {
@@ -37,21 +35,23 @@ public:
             ++batch_iterator;
     }
 
-    void nextBatch() override
-    {
-        batch_iterator = batch.end();
-    }
+    void nextBatch() override { batch_iterator = batch.end(); }
 
-    bool isValid() override
-    {
-        return batch_iterator != batch.end();
-    }
+    bool isValid() override { return batch_iterator != batch.end(); }
 
     RelativePathWithMetadataPtr current() override;
 
     RelativePathsWithMetadata currentBatch() override { return batch; }
 
-    std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override { return std::nullopt; }
+    std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override
+    {
+        if (batch.empty())
+            return {};
+
+        auto current_batch = std::move(batch);
+        batch = {};
+        return current_batch;
+    }
 
     size_t getAccumulatedSize() const override { return batch.size(); }
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index f441b18d59d..94a0751dcc8 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -21,6 +21,18 @@ IObjectStorageIteratorAsync::IObjectStorageIteratorAsync(
 {
 }
 
+IObjectStorageIteratorAsync::~IObjectStorageIteratorAsync()
+{
+    if (!deactivated)
+        deactivate();
+}
+
+void IObjectStorageIteratorAsync::deactivate()
+{
+    list_objects_pool.wait();
+    deactivated = true;
+}
+
 void IObjectStorageIteratorAsync::nextBatch()
 {
     std::lock_guard lock(mutex);
diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
index c4bde91f415..3e3269fb550 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h
@@ -19,18 +19,20 @@ public:
         CurrentMetrics::Metric threads_scheduled_metric,
         const std::string & thread_name);
 
-    void next() override;
-    void nextBatch() override;
+    ~IObjectStorageIteratorAsync() override;
+
     bool isValid() override;
+
     RelativePathWithMetadataPtr current() override;
     RelativePathsWithMetadata currentBatch() override;
+
+    void next() override;
+    void nextBatch() override;
+
     size_t getAccumulatedSize() const override;
     std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override;
 
-    ~IObjectStorageIteratorAsync() override
-    {
-        list_objects_pool.wait();
-    }
+    void deactivate();
 
 protected:
 
@@ -46,6 +48,7 @@ protected:
 
     bool is_initialized{false};
     bool is_finished{false};
+    bool deactivated{false};
 
     mutable std::recursive_mutex mutex;
     ThreadPool list_objects_pool;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 33c0afda4c1..d902a33ae4a 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -110,10 +110,19 @@ public:
             CurrentMetrics::ObjectStorageS3ThreadsScheduled,
             "ListObjectS3")
         , client(client_)
+        , request(std::make_unique<S3::ListObjectsV2Request>())
     {
-        request.SetBucket(bucket_);
-        request.SetPrefix(path_prefix);
-        request.SetMaxKeys(static_cast<int>(max_list_size));
+        request->SetBucket(bucket_);
+        request->SetPrefix(path_prefix);
+        request->SetMaxKeys(static_cast<int>(max_list_size));
+    }
+
+    ~S3IteratorAsync() override
+    {
+        /// Deactivate background threads before resetting the request to avoid data race.
+        deactivate();
+        request.reset();
+        client.reset();
     }
 
 private:
@@ -121,12 +130,12 @@ private:
     {
         ProfileEvents::increment(ProfileEvents::S3ListObjects);
 
-        auto outcome = client->ListObjectsV2(request);
+        auto outcome = client->ListObjectsV2(*request);
 
         /// Outcome failure will be handled on the caller side.
         if (outcome.IsSuccess())
         {
-            request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
+            request->SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
 
             auto objects = outcome.GetResult().GetContents();
             for (const auto & object : objects)
@@ -141,12 +150,12 @@ private:
 
         throw S3Exception(outcome.GetError().GetErrorType(),
                           "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
-                          quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
+                          quoteString(request->GetBucket()), quoteString(request->GetPrefix()),
                           backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
     }
 
     std::shared_ptr<const S3::Client> client;
-    S3::ListObjectsV2Request request;
+    std::unique_ptr<S3::ListObjectsV2Request> request;
 };
 
 }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 5edc660d717..50e8918a12e 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -8,6 +8,8 @@
 #include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Formats/FormatFactory.h>
+#include <Common/logger_useful.h>
+
 
 namespace DB
 {
@@ -28,7 +30,7 @@ StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguratio
 void StorageHDFSConfiguration::check(ContextPtr context) const
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(url));
-    checkHDFSURL(fs::path(url) / path);
+    checkHDFSURL(fs::path(url) / path.substr(1));
 }
 
 ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
@@ -44,9 +46,22 @@ ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
     return std::make_shared<HDFSObjectStorage>(url, std::move(hdfs_settings), context->getConfigRef());
 }
 
-void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /* with_structure */)
+std::string StorageHDFSConfiguration::getPathWithoutGlob() const
 {
-    url = checkAndGetLiteralArgument<String>(args[0], "url");
+    /// Unlike s3 and azure, which are object storages,
+    /// hdfs is a filesystem, so it cannot list files by partual prefix,
+    /// only by directory.
+    auto first_glob_pos = path.find_first_of("*?{");
+    auto end_of_path_without_globs = path.substr(0, first_glob_pos).rfind('/');
+    if (end_of_path_without_globs == std::string::npos || end_of_path_without_globs == 0)
+        return "/";
+    return path.substr(0, end_of_path_without_globs);
+}
+
+void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
+{
+    std::string url_str;
+    url_str = checkAndGetLiteralArgument<String>(args[0], "url");
 
     if (args.size() > 1)
     {
@@ -54,28 +69,60 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool /*
         format = checkAndGetLiteralArgument<String>(args[1], "format_name");
     }
 
-    if (args.size() == 3)
+    if (with_structure)
+    {
+        if (args.size() > 2)
+        {
+            structure = checkAndGetLiteralArgument<String>(args[2], "structure");
+        }
+        if (args.size() > 3)
+        {
+            args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context);
+            compression_method = checkAndGetLiteralArgument<String>(args[3], "compression_method");
+        }
+    }
+    else if (args.size() > 2)
     {
         args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
         compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
     }
 
-    auto pos = url.find("//");
-    if (pos == std::string::npos)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url);
-
-    pos = url.find('/', pos + 2);
-    if (pos == std::string::npos)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid url: {}", url);
-
-    path = url.substr(pos + 1);
-    url = url.substr(0, pos);
-    paths = {path};
+    setURL(url_str);
 }
 
-void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection &)
+void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & collection)
 {
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method fromNamedColection() is not implemented");
+    std::string url_str;
+
+    auto filename = collection.getOrDefault<String>("filename", "");
+    if (!filename.empty())
+        url_str = std::filesystem::path(collection.get<String>("url")) / filename;
+    else
+        url_str = collection.get<String>("url");
+
+    format = collection.getOrDefault<String>("format", "auto");
+    compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+    structure = collection.getOrDefault<String>("structure", "auto");
+
+    setURL(url_str);
+}
+
+void StorageHDFSConfiguration::setURL(const std::string url_)
+{
+    auto pos = url_.find("//");
+    if (pos == std::string::npos)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_);
+
+    pos = url_.find('/', pos + 2);
+    if (pos == std::string::npos)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_);
+
+    path = url_.substr(pos + 1);
+    url = url_.substr(0, pos);
+    path = '/' + path;
+    paths = {path};
+
+    LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path);
 }
 
 }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 5765edbf36c..8506c7c9700 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -31,9 +31,12 @@ public:
 
     static void addStructureToArgs(ASTs &, const String &, ContextPtr) {}
 
+    std::string getPathWithoutGlob() const override;
+
 private:
     void fromNamedCollection(const NamedCollection &) override;
     void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override;
+    void setURL(const std::string url_);
 
     String url;
     String path;
diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
index 18b22805dfc..c29189804e6 100644
--- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
@@ -6,6 +6,7 @@
 #include <IO/Progress.h>
 #include <Common/Throttler.h>
 #include <Common/safe_cast.h>
+#include <Common/logger_useful.h>
 #include <hdfs/hdfs.h>
 #include <mutex>
 
@@ -55,10 +56,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
         : BufferWithOwnMemory<SeekableReadBuffer>(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size)
         , hdfs_uri(hdfs_uri_)
         , hdfs_file_path(hdfs_file_path_)
-        , builder(createHDFSBuilder(hdfs_uri_, config_))
         , read_settings(read_settings_)
         , read_until_position(read_until_position_)
     {
+        builder = createHDFSBuilder(hdfs_uri_, config_);
         fs = createHDFSFS(builder.get());
         fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0);
 
@@ -96,11 +97,14 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
 
     bool nextImpl() override
     {
+        auto log = &Poco::Logger::get("kssenii");
         size_t num_bytes_to_read;
         if (read_until_position)
         {
             if (read_until_position == file_offset)
+            {
                 return false;
+            }
 
             if (read_until_position < file_offset)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", file_offset, read_until_position - 1);
@@ -111,10 +115,11 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
         {
             num_bytes_to_read = internal_buffer.size();
         }
-        if (file_size != 0 && file_offset >= file_size)
-        {
-            return false;
-        }
+        // if (file_size != 0 && file_offset >= file_size)
+        // {
+        //     LOG_TEST(log, "KSSENII 1 2");
+        //     return false;
+        // }
 
         ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read);
         int bytes_read;
@@ -145,6 +150,8 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
             file_offset += bytes_read;
             if (read_settings.remote_throttler)
                 read_settings.remote_throttler->add(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds);
+
+            LOG_TEST(log, "KSSENII SIZE: {}", bytes_read);
             return true;
         }
 
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
index ce157972161..f2595299430 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
@@ -49,8 +49,8 @@ void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predi
     {
         auto context = getContext();
         iterator_wrapper = StorageObjectStorageSource::createFileIterator(
-            configuration, object_storage, distributed_processing, context, predicate,
-            virtual_columns, nullptr, query_settings.list_object_keys_size, metric_threads_count,
+            configuration, object_storage, query_settings, distributed_processing,
+            context, predicate, virtual_columns, nullptr, metric_threads_count,
             metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback());
     }
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 8d85224cff0..0276ff62778 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -288,8 +288,8 @@ std::unique_ptr<ReadBufferIterator> StorageObjectStorage<StorageSettings>::creat
 {
     const auto settings = StorageSettings::create(context->getSettingsRef());
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
-        configuration, object_storage, /* distributed_processing */false,
-        context, /* predicate */{}, /* virtual_columns */{}, &read_keys, settings.list_object_keys_size,
+        configuration, object_storage, settings, /* distributed_processing */false,
+        context, /* predicate */{}, /* virtual_columns */{}, &read_keys,
         StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled());
 
     return std::make_unique<ReadBufferIterator>(
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index c5421f1d319..f023bb068d4 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -92,7 +92,8 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTask
     const auto settings = StorageSettings::create(local_context->getSettingsRef());
     auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
         object_storage, configuration, predicate, virtual_columns, local_context,
-        nullptr, settings.list_object_keys_size, local_context->getFileProgressCallback());
+        nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match,
+        local_context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 1d5c0cd3a39..61e569cee05 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -40,7 +40,8 @@ StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const Stora
 bool StorageObjectStorageConfiguration::withWildcard() const
 {
     static const String PARTITION_ID_WILDCARD = "{_partition_id}";
-    return getPath().find(PARTITION_ID_WILDCARD) != String::npos;
+    return getPath().find(PARTITION_ID_WILDCARD) != String::npos
+        || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos;
 }
 
 bool StorageObjectStorageConfiguration::isPathWithGlobs() const
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 0beed91b128..48825c6a012 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -40,7 +40,7 @@ public:
     bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
     bool isPathWithGlobs() const;
     bool isNamespaceWithGlobs() const;
-    std::string getPathWithoutGlob() const;
+    virtual std::string getPathWithoutGlob() const;
 
     virtual void check(ContextPtr context) const = 0;
     virtual void validateNamespace(const String & /* name */) const {}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
index 454da7c355f..8bcc2ad3b37 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
@@ -25,6 +25,7 @@ struct StorageObjectStorageSettings
     SchemaInferenceMode schema_inference_mode;
     bool skip_empty_files;
     size_t list_object_keys_size;
+    bool throw_on_zero_files_match;
 };
 
 struct S3StorageSettings
@@ -38,6 +39,7 @@ struct S3StorageSettings
             .schema_inference_mode = settings.schema_inference_mode,
             .skip_empty_files = settings.s3_skip_empty_files,
             .list_object_keys_size = settings.s3_list_object_keys_size,
+            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
         };
     }
 
@@ -59,6 +61,7 @@ struct AzureStorageSettings
             .schema_inference_mode = settings.schema_inference_mode,
             .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
             .list_object_keys_size = settings.azure_list_object_keys_size,
+            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
         };
     }
 
@@ -80,6 +83,7 @@ struct HDFSStorageSettings
             .schema_inference_mode = settings.schema_inference_mode,
             .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs
             .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
+            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
         };
     }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 3c8484194c9..5a88f1436c1 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -28,6 +28,7 @@ namespace ErrorCodes
     extern const int CANNOT_COMPILE_REGEXP;
     extern const int BAD_ARGUMENTS;
     extern const int LOGICAL_ERROR;
+    extern const int FILE_DOESNT_EXIST;
 }
 
 StorageObjectStorageSource::StorageObjectStorageSource(
@@ -75,12 +76,12 @@ StorageObjectStorageSource::~StorageObjectStorageSource()
 std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
     ConfigurationPtr configuration,
     ObjectStoragePtr object_storage,
+    const StorageObjectStorageSettings & settings,
     bool distributed_processing,
     const ContextPtr & local_context,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
-    size_t list_object_keys_size,
     CurrentMetrics::Metric metric_threads_,
     CurrentMetrics::Metric metric_threads_active_,
     CurrentMetrics::Metric metric_threads_scheduled_,
@@ -99,12 +100,14 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<GlobIterator>(
-            object_storage, configuration, predicate, virtual_columns, local_context, read_keys, list_object_keys_size, file_progress_callback);
+            object_storage, configuration, predicate, virtual_columns, local_context,
+            read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback);
     }
     else
     {
         return std::make_shared<KeysIterator>(
-            object_storage, configuration, virtual_columns, read_keys, file_progress_callback);
+            object_storage, configuration, virtual_columns, read_keys,
+            settings.throw_on_zero_files_match, file_progress_callback);
     }
 }
 
@@ -209,6 +212,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     do
     {
         object_info = file_iterator->next(processor);
+
         if (!object_info || object_info->relative_path.empty())
             return {};
 
@@ -226,8 +230,11 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         ? tryGetNumRowsFromCache(object_info)
         : std::nullopt;
 
+    LOG_TRACE(&Poco::Logger::get("kssenii"), "HAS NUM ROWS FROM CACHE: {}", num_rows_from_cache.has_value());
     if (num_rows_from_cache)
     {
+        LOG_TRACE(&Poco::Logger::get("kssenii"), "NUM ROWS FROM CACHE: {}", num_rows_from_cache.value());
+
         /// We should not return single chunk with all number of rows,
         /// because there is a chance that this chunk will be materialized later
         /// (it can cause memory problems even with default values in columns or when virtual columns are requested).
@@ -324,6 +331,29 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const S
     }
 }
 
+StorageObjectStorageSource::IIterator::IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_)
+    : throw_on_zero_files_match(throw_on_zero_files_match_)
+    , logger(getLogger(logger_name_))
+{
+}
+
+ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor)
+{
+    auto object_info = nextImpl(processor);
+
+    if (object_info)
+    {
+        first_iteration = false;
+        LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path);
+    }
+    else if (first_iteration && throw_on_zero_files_match)
+    {
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files");
+    }
+
+    return object_info;
+}
+
 StorageObjectStorageSource::GlobIterator::GlobIterator(
     ObjectStoragePtr object_storage_,
     ConfigurationPtr configuration_,
@@ -332,8 +362,10 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     ContextPtr context_,
     ObjectInfos * read_keys_,
     size_t list_object_keys_size,
+    bool throw_on_zero_files_match_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : WithContext(context_)
+    : IIterator(throw_on_zero_files_match_, "GlobIterator")
+    , WithContext(context_)
     , object_storage(object_storage_)
     , configuration(configuration_)
     , virtual_columns(virtual_columns_)
@@ -380,7 +412,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor */)
+ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t /* processor */)
 {
     std::lock_guard lock(next_mutex);
 
@@ -401,9 +433,10 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::next(size_t /* processor
             }
 
             new_batch = std::move(result.value());
+            LOG_TEST(logger, "Batch size: {}", new_batch.size());
+
             for (auto it = new_batch.begin(); it != new_batch.end();)
             {
-                chassert(*it);
                 if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher))
                     it = new_batch.erase(it);
                 else
@@ -452,8 +485,10 @@ StorageObjectStorageSource::KeysIterator::KeysIterator(
     ConfigurationPtr configuration_,
     const NamesAndTypesList & virtual_columns_,
     ObjectInfos * read_keys_,
+    bool throw_on_zero_files_match_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : object_storage(object_storage_)
+    : IIterator(throw_on_zero_files_match_, "KeysIterator")
+    , object_storage(object_storage_)
     , configuration(configuration_)
     , virtual_columns(virtual_columns_)
     , file_progress_callback(file_progress_callback_)
@@ -470,7 +505,7 @@ StorageObjectStorageSource::KeysIterator::KeysIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::KeysIterator::next(size_t /* processor */)
+ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */)
 {
     size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
     if (current_index >= keys.size())
@@ -520,7 +555,8 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
     CurrentMetrics::Metric metric_threads_,
     CurrentMetrics::Metric metric_threads_active_,
     CurrentMetrics::Metric metric_threads_scheduled_)
-    : callback(callback_)
+    : IIterator(false, "ReadTaskIterator")
+    , callback(callback_)
 {
     ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count);
     auto pool_scheduler = threadPoolCallbackRunner<String>(pool, "ReadTaskIter");
@@ -540,7 +576,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::next(size_t)
+ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t)
 {
     size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
     if (current_index >= buffer.size())
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index d02cb4a3a90..7c5497a6eaa 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -53,12 +53,12 @@ public:
     static std::shared_ptr<IIterator> createFileIterator(
         ConfigurationPtr configuration,
         ObjectStoragePtr object_storage,
+        const StorageObjectStorageSettings & settings,
         bool distributed_processing,
         const ContextPtr & local_context,
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
-        size_t list_object_keys_size,
         CurrentMetrics::Metric metric_threads_,
         CurrentMetrics::Metric metric_threads_active_,
         CurrentMetrics::Metric metric_threads_scheduled_,
@@ -133,10 +133,21 @@ protected:
 class StorageObjectStorageSource::IIterator
 {
 public:
+    IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_);
+
     virtual ~IIterator() = default;
 
     virtual size_t estimatedKeysCount() = 0;
-    virtual ObjectInfoPtr next(size_t processor) = 0;
+
+    ObjectInfoPtr next(size_t processor);
+
+protected:
+    virtual ObjectInfoPtr nextImpl(size_t processor) = 0;
+
+protected:
+    const bool throw_on_zero_files_match;
+    bool first_iteration = true;
+    LoggerPtr logger;
 };
 
 class StorageObjectStorageSource::ReadTaskIterator : public IIterator
@@ -151,9 +162,9 @@ public:
 
     size_t estimatedKeysCount() override { return buffer.size(); }
 
-    ObjectInfoPtr next(size_t) override;
-
 private:
+    ObjectInfoPtr nextImpl(size_t) override;
+
     ReadTaskCallback callback;
     ObjectInfos buffer;
     std::atomic_size_t index = 0;
@@ -170,15 +181,17 @@ public:
         ContextPtr context_,
         ObjectInfos * read_keys_,
         size_t list_object_keys_size,
+        bool throw_on_zero_files_match_,
         std::function<void(FileProgress)> file_progress_callback_ = {});
 
     ~GlobIterator() override = default;
 
     size_t estimatedKeysCount() override { return object_infos.size(); }
 
-    ObjectInfoPtr next(size_t processor) override;
-
 private:
+    ObjectInfoPtr nextImpl(size_t processor) override;
+    void createFilterAST(const String & any_key);
+
     ObjectStoragePtr object_storage;
     ConfigurationPtr configuration;
     ActionsDAGPtr filter_dag;
@@ -193,7 +206,6 @@ private:
 
     std::unique_ptr<re2::RE2> matcher;
 
-    void createFilterAST(const String & any_key);
     bool is_finished = false;
     std::mutex next_mutex;
 
@@ -208,15 +220,16 @@ public:
         ConfigurationPtr configuration_,
         const NamesAndTypesList & virtual_columns_,
         ObjectInfos * read_keys_,
+        bool throw_on_zero_files_match_,
         std::function<void(FileProgress)> file_progress_callback = {});
 
     ~KeysIterator() override = default;
 
     size_t estimatedKeysCount() override { return keys.size(); }
 
-    ObjectInfoPtr next(size_t processor) override;
-
 private:
+    ObjectInfoPtr nextImpl(size_t processor) override;
+
     const ObjectStoragePtr object_storage;
     const ConfigurationPtr configuration;
     const NamesAndTypesList virtual_columns;
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 42cd210018a..ee3071ea71f 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -45,7 +45,8 @@ StorageS3QueueSource::FileIterator::FileIterator(
     std::unique_ptr<GlobIterator> glob_iterator_,
     size_t current_shard_,
     std::atomic<bool> & shutdown_called_)
-    : metadata(metadata_)
+    : StorageObjectStorageSource::IIterator(false, "S3QueueIterator")
+    , metadata(metadata_)
     , glob_iterator(std::move(glob_iterator_))
     , shutdown_called(shutdown_called_)
     , log(&Poco::Logger::get("StorageS3QueueSource"))
@@ -59,7 +60,7 @@ StorageS3QueueSource::FileIterator::FileIterator(
     }
 }
 
-StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::next(size_t processor)
+StorageS3QueueSource::ObjectInfoPtr StorageS3QueueSource::FileIterator::nextImpl(size_t processor)
 {
     while (!shutdown_called)
     {
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index 2bdac7f2311..8c785e683c2 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -56,7 +56,7 @@ public:
         /// Note:
         /// List results in s3 are always returned in UTF-8 binary order.
         /// (https://docs.aws.amazon.com/AmazonS3/latest/userguide/ListingKeysUsingAPIs.html)
-        ObjectInfoPtr next(size_t processor) override;
+        ObjectInfoPtr nextImpl(size_t processor) override;
 
         size_t estimatedKeysCount() override;
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index aafcdc39f9e..c5799d23abd 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -598,7 +598,7 @@ std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator
 {
     auto settings = S3StorageSettings::create(local_context->getSettingsRef());
     auto glob_iterator = std::make_unique<StorageObjectStorageSource::GlobIterator>(
-        object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size);
+        object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match);
 
     return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called);
 }
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 9dec1954406..5632c7ae060 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -61,7 +61,7 @@ def test_read_write_storage_with_globs(started_cluster):
         hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n")
         assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n"
 
-    assert node1.query("select count(*) from HDFSStorageWithRange") == "3\n"
+    assert node1.query("select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1") == "3\n"
     assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n"
     assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n"
     assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n"
@@ -159,7 +159,7 @@ def test_bad_hdfs_uri(started_cluster):
         )
     except Exception as ex:
         print(ex)
-        assert "Unable to create builder to connect to HDFS" in str(ex)
+        assert "Unable to connect to HDFS" in str(ex)
 
     try:
         node1.query(

From 480251e5932f2d15891a403887b5afc96f40ee89 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 27 Mar 2024 19:28:11 +0100
Subject: [PATCH 055/392] Fix style check

---
 tests/integration/test_storage_hdfs/test.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 5632c7ae060..f6e486d6594 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -61,7 +61,12 @@ def test_read_write_storage_with_globs(started_cluster):
         hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n")
         assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n"
 
-    assert node1.query("select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1") == "3\n"
+    assert (
+        node1.query(
+            "select count(*) from HDFSStorageWithRange settings s3_throw_on_zero_files_match=1"
+        )
+        == "3\n"
+    )
     assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n"
     assert node1.query("select count(*) from HDFSStorageWithQuestionMark") == "3\n"
     assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n"

From a2e210462d7d78212c32408ea3d276ef366b57c4 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 27 Mar 2024 22:31:22 +0100
Subject: [PATCH 056/392] Fix style check

---
 src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 1 +
 src/Storages/ObjectStorage/HDFS/Configuration.cpp   | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index db79ff365aa..9bc75b740e5 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -20,6 +20,7 @@ namespace ErrorCodes
     extern const int UNSUPPORTED_METHOD;
     extern const int HDFS_ERROR;
     extern const int ACCESS_DENIED;
+    extern const int LOGICAL_ERROR;
 }
 
 void HDFSObjectStorage::shutdown()
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 50e8918a12e..3828afc0bea 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -16,7 +16,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
-    extern const int NOT_IMPLEMENTED;
 }
 
 StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)

From 5c63d09c5bb91f7dc159befeb505a74e4c0257a5 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 28 Mar 2024 14:15:14 +0100
Subject: [PATCH 057/392] More tests fixes

---
 src/Core/Settings.h                           |  3 +
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |  4 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  8 +-
 .../ObjectStorage/HDFS/Configuration.cpp      | 18 ++++-
 .../ObjectStorage/HDFS/Configuration.h        |  2 +-
 .../StorageObjectStorageQuerySettings.h       |  4 +
 .../StorageObjectStorageSource.cpp            | 76 ++++++++++++-------
 .../StorageObjectStorageSource.h              | 20 ++---
 src/Storages/S3Queue/S3QueueSource.cpp        |  2 +-
 tests/integration/test_storage_hdfs/test.py   | 12 +--
 .../0_stateless/02725_database_hdfs.sh        |  3 +-
 11 files changed, 98 insertions(+), 54 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index f8f3595094c..2fae390c35b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -113,6 +113,9 @@ class IColumn;
     M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
     M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
+    M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \
+    M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \
+    M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \
     M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
     M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 85d3e921f22..8bfba6fcfad 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -103,10 +103,10 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
 void HDFSObjectStorage::removeObject(const StoredObject & object)
 {
     const auto & path = object.remote_path;
-    const size_t begin_of_path = path.find('/', path.find("//") + 2);
+    // const size_t begin_of_path = path.find('/', path.find("//") + 2);
 
     /// Add path from root to file name
-    int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0);
+    int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0);
     if (res == -1)
         throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path);
 
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index b9995620c0f..9085fddfd08 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -432,7 +432,9 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
+    auto object_info = S3::getObjectInfo(
+        *client.get(), uri.bucket, path, {}, settings_ptr->request_settings,
+        /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false);
 
     if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
         return {};
@@ -448,7 +450,9 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true);
+    auto object_info = S3::getObjectInfo(
+        *client.get(), uri.bucket, path, {}, settings_ptr->request_settings,
+        /* with_metadata= */ true, /* for_disk_s3= */ true);
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 3828afc0bea..594f0b89454 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -16,6 +16,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
 StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)
@@ -62,6 +63,13 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit
     std::string url_str;
     url_str = checkAndGetLiteralArgument<String>(args[0], "url");
 
+    const size_t max_args_num = with_structure ? 4 : 3;
+    if (args.size() > max_args_num)
+    {
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Expected not more than {} arguments", max_args_num);
+    }
+
     if (args.size() > 1)
     {
         args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context);
@@ -72,6 +80,7 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit
     {
         if (args.size() > 2)
         {
+            args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
             structure = checkAndGetLiteralArgument<String>(args[2], "structure");
         }
         if (args.size() > 3)
@@ -100,13 +109,14 @@ void StorageHDFSConfiguration::fromNamedCollection(const NamedCollection & colle
         url_str = collection.get<String>("url");
 
     format = collection.getOrDefault<String>("format", "auto");
-    compression_method = collection.getOrDefault<String>("compression_method", collection.getOrDefault<String>("compression", "auto"));
+    compression_method = collection.getOrDefault<String>("compression_method",
+                                                         collection.getOrDefault<String>("compression", "auto"));
     structure = collection.getOrDefault<String>("structure", "auto");
 
     setURL(url_str);
 }
 
-void StorageHDFSConfiguration::setURL(const std::string url_)
+void StorageHDFSConfiguration::setURL(const std::string & url_)
 {
     auto pos = url_.find("//");
     if (pos == std::string::npos)
@@ -117,8 +127,10 @@ void StorageHDFSConfiguration::setURL(const std::string url_)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad hdfs url: {}", url_);
 
     path = url_.substr(pos + 1);
+    if (!path.starts_with('/'))
+        path = '/' + path;
+
     url = url_.substr(0, pos);
-    path = '/' + path;
     paths = {path};
 
     LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path);
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 8506c7c9700..7154f790665 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -36,7 +36,7 @@ public:
 private:
     void fromNamedCollection(const NamedCollection &) override;
     void fromAST(ASTs & args, ContextPtr, bool /* with_structure */) override;
-    void setURL(const std::string url_);
+    void setURL(const std::string & url_);
 
     String url;
     String path;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
index 8bcc2ad3b37..f0687776aa7 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
@@ -26,6 +26,7 @@ struct StorageObjectStorageSettings
     bool skip_empty_files;
     size_t list_object_keys_size;
     bool throw_on_zero_files_match;
+    bool ignore_non_existent_file;
 };
 
 struct S3StorageSettings
@@ -40,6 +41,7 @@ struct S3StorageSettings
             .skip_empty_files = settings.s3_skip_empty_files,
             .list_object_keys_size = settings.s3_list_object_keys_size,
             .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+            .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist,
         };
     }
 
@@ -62,6 +64,7 @@ struct AzureStorageSettings
             .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
             .list_object_keys_size = settings.azure_list_object_keys_size,
             .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+            .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
         };
     }
 
@@ -84,6 +87,7 @@ struct HDFSStorageSettings
             .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs
             .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
             .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+            .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
         };
     }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 5a88f1436c1..80aa0c210e9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -100,14 +100,15 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     {
         /// Iterate through disclosed globs and make a source for each file
         return std::make_shared<GlobIterator>(
-            object_storage, configuration, predicate, virtual_columns, local_context,
-            read_keys, settings.list_object_keys_size, settings.throw_on_zero_files_match, file_progress_callback);
+            object_storage, configuration, predicate, virtual_columns,
+            local_context, read_keys, settings.list_object_keys_size,
+            settings.throw_on_zero_files_match, file_progress_callback);
     }
     else
     {
         return std::make_shared<KeysIterator>(
             object_storage, configuration, virtual_columns, read_keys,
-            settings.throw_on_zero_files_match, file_progress_callback);
+            settings.ignore_non_existent_file, file_progress_callback);
     }
 }
 
@@ -331,9 +332,8 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const S
     }
 }
 
-StorageObjectStorageSource::IIterator::IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_)
-    : throw_on_zero_files_match(throw_on_zero_files_match_)
-    , logger(getLogger(logger_name_))
+StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name_)
+    : logger(getLogger(logger_name_))
 {
 }
 
@@ -343,13 +343,8 @@ ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor)
 
     if (object_info)
     {
-        first_iteration = false;
         LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path);
     }
-    else if (first_iteration && throw_on_zero_files_match)
-    {
-        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files");
-    }
 
     return object_info;
 }
@@ -364,11 +359,12 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     size_t list_object_keys_size,
     bool throw_on_zero_files_match_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : IIterator(throw_on_zero_files_match_, "GlobIterator")
+    : IIterator("GlobIterator")
     , WithContext(context_)
     , object_storage(object_storage_)
     , configuration(configuration_)
     , virtual_columns(virtual_columns_)
+    , throw_on_zero_files_match(throw_on_zero_files_match_)
     , read_keys(read_keys_)
     , file_progress_callback(file_progress_callback_)
 {
@@ -412,10 +408,24 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t /* processor */)
+ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor)
 {
     std::lock_guard lock(next_mutex);
+    auto object_info = nextImplUnlocked(processor);
+    if (object_info)
+    {
+        if (first_iteration)
+            first_iteration = false;
+    }
+    else if (first_iteration && throw_on_zero_files_match)
+    {
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files");
+    }
+    return object_info;
+}
 
+ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */)
+{
     bool current_batch_processed = object_infos.empty() || index >= object_infos.size();
     if (is_finished && current_batch_processed)
         return {};
@@ -485,14 +495,15 @@ StorageObjectStorageSource::KeysIterator::KeysIterator(
     ConfigurationPtr configuration_,
     const NamesAndTypesList & virtual_columns_,
     ObjectInfos * read_keys_,
-    bool throw_on_zero_files_match_,
+    bool ignore_non_existent_files_,
     std::function<void(FileProgress)> file_progress_callback_)
-    : IIterator(throw_on_zero_files_match_, "KeysIterator")
+    : IIterator("KeysIterator")
     , object_storage(object_storage_)
     , configuration(configuration_)
     , virtual_columns(virtual_columns_)
     , file_progress_callback(file_progress_callback_)
     , keys(configuration->getPaths())
+    , ignore_non_existent_files(ignore_non_existent_files_)
 {
     if (read_keys_)
     {
@@ -507,20 +518,29 @@ StorageObjectStorageSource::KeysIterator::KeysIterator(
 
 ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */)
 {
-    size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
-    if (current_index >= keys.size())
-        return {};
-
-    auto key = keys[current_index];
-
-    ObjectMetadata metadata{};
-    if (file_progress_callback)
+    while (true)
     {
-        metadata = object_storage->getObjectMetadata(key);
-        file_progress_callback(FileProgress(0, metadata.size_bytes));
-    }
+        size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
+        if (current_index >= keys.size())
+            return {};
 
-    return std::make_shared<ObjectInfo>(key, metadata);
+        auto key = keys[current_index];
+
+        ObjectMetadata object_metadata{};
+        if (ignore_non_existent_files)
+        {
+            auto metadata = object_storage->tryGetObjectMetadata(key);
+            if (!metadata)
+                continue;
+        }
+        else
+            object_metadata = object_storage->getObjectMetadata(key);
+
+        if (file_progress_callback)
+            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
+
+        return std::make_shared<ObjectInfo>(key, object_metadata);
+    }
 }
 
 StorageObjectStorageSource::ReaderHolder::ReaderHolder(
@@ -555,7 +575,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
     CurrentMetrics::Metric metric_threads_,
     CurrentMetrics::Metric metric_threads_active_,
     CurrentMetrics::Metric metric_threads_scheduled_)
-    : IIterator(false, "ReadTaskIterator")
+    : IIterator("ReadTaskIterator")
     , callback(callback_)
 {
     ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 7c5497a6eaa..3d4cc4fbd20 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -133,7 +133,7 @@ protected:
 class StorageObjectStorageSource::IIterator
 {
 public:
-    IIterator(bool throw_on_zero_files_match_, const std::string & logger_name_);
+    explicit IIterator(const std::string & logger_name_);
 
     virtual ~IIterator() = default;
 
@@ -143,10 +143,6 @@ public:
 
 protected:
     virtual ObjectInfoPtr nextImpl(size_t processor) = 0;
-
-protected:
-    const bool throw_on_zero_files_match;
-    bool first_iteration = true;
     LoggerPtr logger;
 };
 
@@ -190,23 +186,26 @@ public:
 
 private:
     ObjectInfoPtr nextImpl(size_t processor) override;
+    ObjectInfoPtr nextImplUnlocked(size_t processor);
     void createFilterAST(const String & any_key);
 
-    ObjectStoragePtr object_storage;
-    ConfigurationPtr configuration;
-    ActionsDAGPtr filter_dag;
-    NamesAndTypesList virtual_columns;
+    const ObjectStoragePtr object_storage;
+    const ConfigurationPtr configuration;
+    const NamesAndTypesList virtual_columns;
+    const bool throw_on_zero_files_match;
 
     size_t index = 0;
 
     ObjectInfos object_infos;
     ObjectInfos * read_keys;
+    ActionsDAGPtr filter_dag;
     ObjectStorageIteratorPtr object_storage_iterator;
     bool recursive{false};
 
     std::unique_ptr<re2::RE2> matcher;
 
     bool is_finished = false;
+    bool first_iteration = true;
     std::mutex next_mutex;
 
     std::function<void(FileProgress)> file_progress_callback;
@@ -220,7 +219,7 @@ public:
         ConfigurationPtr configuration_,
         const NamesAndTypesList & virtual_columns_,
         ObjectInfos * read_keys_,
-        bool throw_on_zero_files_match_,
+        bool ignore_non_existent_files_,
         std::function<void(FileProgress)> file_progress_callback = {});
 
     ~KeysIterator() override = default;
@@ -236,5 +235,6 @@ private:
     const std::function<void(FileProgress)> file_progress_callback;
     const std::vector<String> keys;
     std::atomic<size_t> index = 0;
+    bool ignore_non_existent_files;
 };
 }
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index ee3071ea71f..8e7155205c4 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -45,7 +45,7 @@ StorageS3QueueSource::FileIterator::FileIterator(
     std::unique_ptr<GlobIterator> glob_iterator_,
     size_t current_shard_,
     std::atomic<bool> & shutdown_called_)
-    : StorageObjectStorageSource::IIterator(false, "S3QueueIterator")
+    : StorageObjectStorageSource::IIterator("S3QueueIterator")
     , metadata(metadata_)
     , glob_iterator(std::move(glob_iterator_))
     , shutdown_called(shutdown_called_)
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index f6e486d6594..fbf97adcee0 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -326,7 +326,7 @@ def test_virtual_columns(started_cluster):
     hdfs_api.write_data("/file1", "1\n")
     hdfs_api.write_data("/file2", "2\n")
     hdfs_api.write_data("/file3", "3\n")
-    expected = "1\tfile1\thdfs://hdfs1:9000/file1\n2\tfile2\thdfs://hdfs1:9000/file2\n3\tfile3\thdfs://hdfs1:9000/file3\n"
+    expected = "1\tfile1\t/file1\n2\tfile2\t/file2\n3\tfile3\t/file3\n"
     assert (
         node1.query(
             "select id, _file as file_name, _path as file_path from virtual_cols order by id"
@@ -365,7 +365,7 @@ def test_truncate_table(started_cluster):
     assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n"
     assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n"
     node1.query("truncate table test_truncate")
-    assert node1.query("select * from test_truncate") == ""
+    assert node1.query("select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1") == ""
     node1.query("drop table test_truncate")
 
 
@@ -488,13 +488,13 @@ def test_hdfsCluster(started_cluster):
     actual = node1.query(
         "select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id"
     )
-    expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n"
+    expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n"
     assert actual == expected
 
     actual = node1.query(
         "select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id"
     )
-    expected = "1\tfile1\thdfs://hdfs1:9000/test_hdfsCluster/file1\n2\tfile2\thdfs://hdfs1:9000/test_hdfsCluster/file2\n3\tfile3\thdfs://hdfs1:9000/test_hdfsCluster/file3\n"
+    expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n"
     assert actual == expected
     fs.delete(dir, recursive=True)
 
@@ -502,7 +502,7 @@ def test_hdfsCluster(started_cluster):
 def test_hdfs_directory_not_exist(started_cluster):
     ddl = "create table HDFSStorageWithNotExistDir (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/data/not_eixst', 'TSV')"
     node1.query(ddl)
-    assert "" == node1.query("select * from HDFSStorageWithNotExistDir")
+    assert "" == node1.query("select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1")
 
 
 def test_overwrite(started_cluster):
@@ -658,7 +658,7 @@ def test_virtual_columns_2(started_cluster):
     node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
-    assert result.strip() == "hdfs://hdfs1:9000/parquet_2"
+    assert result.strip() == "/parquet_2"
 
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh
index d62f928e947..623af707542 100755
--- a/tests/queries/0_stateless/02725_database_hdfs.sh
+++ b/tests/queries/0_stateless/02725_database_hdfs.sh
@@ -60,7 +60,8 @@ SELECT * FROM \"abacaba/file.tsv\"
 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
-
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`"
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`"
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
 
 
From 961704ba173bef199735c52e5296b371a5168f15 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 28 Mar 2024 15:00:49 +0100
Subject: [PATCH 058/392] Style check

---
 tests/integration/test_storage_hdfs/test.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index fbf97adcee0..77a55ced5c8 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -365,7 +365,12 @@ def test_truncate_table(started_cluster):
     assert hdfs_api.read_data("/tr") == "1\tMark\t72.53\n"
     assert node1.query("select * from test_truncate") == "1\tMark\t72.53\n"
     node1.query("truncate table test_truncate")
-    assert node1.query("select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1") == ""
+    assert (
+        node1.query(
+            "select * from test_truncate settings hdfs_ignore_file_doesnt_exist=1"
+        )
+        == ""
+    )
     node1.query("drop table test_truncate")
 
 
@@ -502,7 +507,9 @@ def test_hdfsCluster(started_cluster):
 def test_hdfs_directory_not_exist(started_cluster):
     ddl = "create table HDFSStorageWithNotExistDir (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/data/not_eixst', 'TSV')"
     node1.query(ddl)
-    assert "" == node1.query("select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1")
+    assert "" == node1.query(
+        "select * from HDFSStorageWithNotExistDir settings hdfs_ignore_file_doesnt_exist=1"
+    )
 
 
 def test_overwrite(started_cluster):

From 34a87666ebe932fbedef68ac7fef05f2a6e5880a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 28 Mar 2024 16:55:39 +0100
Subject: [PATCH 059/392] Update settings changes history

---
 src/Core/SettingsChangesHistory.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index db6fb2f1c0e..8cde00fcc14 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -124,6 +124,9 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."},
               {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."},
               {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."},
+              {"hdfs_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS"},
+              {"azure_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageAzureBlob"},
+              {"s3_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageS3"},
               }},
     {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
               {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

From 422a3bd672d8c3f7f5bc050eaeca14415a013a60 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 3 Apr 2024 17:16:51 +0200
Subject: [PATCH 060/392] Update version in SettingsChangesHistory.h

---
 src/Core/SettingsChangesHistory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 9fa1a71f58e..0b90d0216bf 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -85,8 +85,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.4", {{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"}}},
     {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
-              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
               {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"},
               {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"},

From b24a2afd5fb6c44fd1ecd2435963f3433c61f2af Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 4 Apr 2024 13:21:22 +0200
Subject: [PATCH 061/392] A few more test fixes

---
 src/TableFunctions/TableFunctionObjectStorageCluster.cpp | 5 +++--
 src/TableFunctions/TableFunctionObjectStorageCluster.h   | 8 ++++++++
 tests/queries/0_stateless/02725_database_hdfs.sh         | 6 ++----
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 4ec94cfaf7c..909ace788eb 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -17,9 +17,8 @@ namespace DB
 template <typename Definition, typename StorageSettings, typename Configuration>
 StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>::executeImpl(
     const ASTPtr & /*function*/, ContextPtr context,
-    const std::string & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const
+    const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const
 {
-    using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
     auto configuration = Base::getConfiguration();
 
     ColumnsDescription columns;
@@ -27,6 +26,8 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
         columns = parseColumnsListFromString(configuration->structure, context);
     else if (!Base::structure_hint.empty())
         columns = Base::structure_hint;
+    else if (!cached_columns.empty())
+        columns = cached_columns;
 
     auto object_storage = Base::getObjectStorage(context, !is_insert_query);
     StoragePtr storage;
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h
index 461456e37df..21c2f8995dc 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.h
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h
@@ -67,6 +67,8 @@ public:
     String getSignature() const override { return signature; }
 
 protected:
+    using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
+
     StoragePtr executeImpl(
         const ASTPtr & ast_function,
         ContextPtr context,
@@ -75,6 +77,12 @@ protected:
         bool is_insert_query) const override;
 
     const char * getStorageTypeName() const override { return Definition::storage_type_name; }
+
+    bool hasStaticStructure() const override { return Base::getConfiguration()->structure != "auto"; }
+
+    bool needStructureHint() const override { return Base::getConfiguration()->structure == "auto"; }
+
+    void setStructureHint(const ColumnsDescription & structure_hint_) override { Base::structure_hint = structure_hint_; }
 };
 
 #if USE_AWS_S3
diff --git a/tests/queries/0_stateless/02725_database_hdfs.sh b/tests/queries/0_stateless/02725_database_hdfs.sh
index 623af707542..1eb22976b84 100755
--- a/tests/queries/0_stateless/02725_database_hdfs.sh
+++ b/tests/queries/0_stateless/02725_database_hdfs.sh
@@ -58,10 +58,8 @@ SELECT * FROM \"abacaba/file.tsv\"
 """ 2>&1 | tr '\n' ' ' | grep -oF "CANNOT_EXTRACT_TABLE_STRUCTURE"
 
 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM test_hdfs_4.\`http://localhost:11111/test/a.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "CANNOT_EXTRACT_TABLE_STRUCTURE" > /dev/null && echo "OK" || echo 'FAIL' ||:
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`"
-${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`"
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/file.myext\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The data format cannot be detected" > /dev/null && echo "OK" || echo 'FAIL' ||:
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222/test_02725_3.tsv\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "The table structure cannot be extracted" > /dev/null && echo "OK" || echo 'FAIL' ||:
 ${CLICKHOUSE_CLIENT} --query "SELECT * FROM test_hdfs_4.\`hdfs://localhost:12222\`" 2>&1 | tr '\n' ' ' | grep -oF -e "UNKNOWN_TABLE" -e "BAD_ARGUMENTS" > /dev/null && echo "OK" || echo 'FAIL' ||:
 
 
From aa804e744b1f1c233ef7158431feb4c016d0026c Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 4 Apr 2024 14:05:50 +0200
Subject: [PATCH 062/392] Fix style check

---
 src/Storages/ObjectStorage/HDFS/Configuration.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 5a4fb322692..0a49ba5e251 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -153,7 +153,7 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st
     {
         size_t count = args.size();
         if (count == 0 || count > 3)
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                             "Expected 1 to 3 arguments in table function, got {}", count);
 
         auto structure_literal = std::make_shared<ASTLiteral>(structure_);

From e5ffe3cf8d7362335ef6150e7864d5deb74c9479 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 5 Apr 2024 16:15:11 +0200
Subject: [PATCH 063/392] More tests fixes

---
 src/Storages/MergeTree/KeyCondition.cpp       |  7 +++++
 .../ObjectStorage/AzureBlob/Configuration.cpp |  3 +-
 .../ObjectStorage/AzureBlob/Configuration.h   |  4 ++-
 .../ObjectStorage/HDFS/Configuration.cpp      | 28 +++++++++++++------
 .../ObjectStorage/HDFS/Configuration.h        |  4 ++-
 .../ReadFromStorageObjectStorage.cpp          |  3 +-
 .../ReadFromStorageObjectStorage.h            |  1 +
 .../ObjectStorage/S3/Configuration.cpp        |  4 ++-
 src/Storages/ObjectStorage/S3/Configuration.h |  4 ++-
 .../ObjectStorage/StorageObjectStorage.cpp    |  3 ++
 .../StorageObjectStorageConfiguration.h       |  1 +
 .../StorageObjectStorageSource.cpp            | 16 ++++++++++-
 .../StorageObjectStorageSource.h              |  2 +-
 .../TableFunctionObjectStorage.cpp            |  4 +--
 14 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 2d57ea40c9c..a720e243fdb 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -2661,6 +2661,13 @@ BoolMask KeyCondition::checkInHyperrectangle(
         else if (element.function == RPNElement::FUNCTION_IN_RANGE
             || element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
         {
+            if (element.key_column >= hyperrectangle.size())
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                                "Hyperrectangle size is {}, but requested element at posittion {} ({})",
+                                hyperrectangle.size(), element.key_column, element.toString());
+            }
+
             const Range * key_range = &hyperrectangle[element.key_column];
 
             /// The case when the column is wrapped in a chain of possibly monotonic functions.
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 018cec51e7c..fe01251e58a 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -379,7 +379,8 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
     blobs_paths = {blob_path};
 }
 
-void StorageAzureBlobConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
+void StorageAzureBlobConfiguration::addStructureAndFormatToArgs(
+    ASTs & args, const String & structure_, const String & /* format */, ContextPtr context)
 {
     if (tryGetNamedCollectionWithOverrides(args, context))
     {
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
index 8040d433d99..c12ff81197d 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
@@ -26,6 +26,7 @@ public:
 
     const Paths & getPaths() const override { return blobs_paths; }
     Paths & getPaths() override { return blobs_paths; }
+    void setPaths(const Paths & paths) override { blobs_paths = paths; }
 
     String getDataSourceDescription() override { return fs::path(connection_url) / container; }
     String getNamespace() const override { return container; }
@@ -36,7 +37,8 @@ public:
 
     void fromNamedCollection(const NamedCollection & collection) override;
     void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
-    static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context);
+    static void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context);
 
 protected:
     using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 0a49ba5e251..220857fead6 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -139,7 +139,11 @@ void StorageHDFSConfiguration::setURL(const std::string & url_)
     LOG_TRACE(getLogger("StorageHDFSConfiguration"), "Using url: {}, path: {}", url, path);
 }
 
-void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
+void StorageHDFSConfiguration::addStructureAndFormatToArgs(
+    ASTs & args,
+    const String & structure_,
+    const String & format_,
+    ContextPtr context)
 {
     if (tryGetNamedCollectionWithOverrides(args, context))
     {
@@ -152,10 +156,13 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st
     else
     {
         size_t count = args.size();
-        if (count == 0 || count > 3)
+        if (count == 0 || count > 4)
+        {
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                            "Expected 1 to 3 arguments in table function, got {}", count);
+                            "Expected 1 to 4 arguments in table function, got {}", count);
+        }
 
+        auto format_literal = std::make_shared<ASTLiteral>(format_);
         auto structure_literal = std::make_shared<ASTLiteral>(structure_);
 
         /// hdfs(url)
@@ -168,15 +175,18 @@ void StorageHDFSConfiguration::addStructureToArgs(ASTs & args, const String & st
         /// hdfs(url, format)
         else if (count == 2)
         {
+            if (checkAndGetLiteralArgument<String>(args[1], "format") == "auto")
+                args.back() = format_literal;
             args.push_back(structure_literal);
         }
-        /// hdfs(url, format, compression_method)
-        else if (count == 3)
+        /// hdfs(url, format, structure)
+        /// hdfs(url, format, structure, compression_method)
+        else if (count >= 3)
         {
-            auto compression_method = args.back();
-            args.pop_back();
-            args.push_back(structure_literal);
-            args.push_back(compression_method);
+            if (checkAndGetLiteralArgument<String>(args[1], "format") == "auto")
+                args[1] = format_literal;
+            if (checkAndGetLiteralArgument<String>(args[2], "structure") == "auto")
+                args[2] = structure_literal;
         }
     }
 }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 7dc1f8073c1..23a7e8e4549 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -21,6 +21,7 @@ public:
 
     const Paths & getPaths() const override { return paths; }
     Paths & getPaths() override { return paths; }
+    void setPaths(const Paths & paths_) override { paths = paths_; }
 
     String getNamespace() const override { return ""; }
     String getDataSourceDescription() override { return url; }
@@ -29,7 +30,8 @@ public:
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
-    static void addStructureToArgs(ASTs &, const String &, ContextPtr);
+    static void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context);
 
     std::string getPathWithoutGlob() const override;
 
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
index f2595299430..89d33191f41 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
@@ -9,6 +9,7 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
     ObjectStoragePtr object_storage_,
     ConfigurationPtr configuration_,
     const String & name_,
+    const Names & columns_to_read,
     const NamesAndTypesList & virtual_columns_,
     const SelectQueryInfo & query_info_,
     const StorageSnapshotPtr & storage_snapshot_,
@@ -24,7 +25,7 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
     CurrentMetrics::Metric metric_threads_count_,
     CurrentMetrics::Metric metric_threads_active_,
     CurrentMetrics::Metric metric_threads_scheduled_)
-    : SourceStepWithFilter(DataStream{.header = info_.source_header}, info_.requested_columns.getNames(), query_info_, storage_snapshot_, context_)
+    : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
     , object_storage(object_storage_)
     , configuration(configuration_)
     , info(std::move(info_))
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
index 44b992f8c12..c0dd02d75f8 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
@@ -15,6 +15,7 @@ public:
         ObjectStoragePtr object_storage_,
         ConfigurationPtr configuration_,
         const String & name_,
+        const Names & columns_to_read,
         const NamesAndTypesList & virtual_columns_,
         const SelectQueryInfo & query_info_,
         const StorageSnapshotPtr & storage_snapshot_,
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 132a5045d8a..f532af24017 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -330,7 +330,8 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_
     keys = {url.key};
 }
 
-void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & structure_, ContextPtr context)
+void StorageS3Configuration::addStructureAndFormatToArgs(
+    ASTs & args, const String & structure_, const String & format_, ContextPtr context)
 {
     if (tryGetNamedCollectionWithOverrides(args, context))
     {
@@ -348,6 +349,7 @@ void StorageS3Configuration::addStructureToArgs(ASTs & args, const String & stru
         if (count == 0 || count > 6)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to 6 arguments in table function, got {}", count);
 
+        auto format_literal = std::make_shared<ASTLiteral>(format_);
         auto structure_literal = std::make_shared<ASTLiteral>(structure_);
 
         /// s3(s3_url)
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index f9614da4b95..ff5e8680e66 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -22,6 +22,7 @@ public:
 
     const Paths & getPaths() const override { return keys; }
     Paths & getPaths() override { return keys; }
+    void setPaths(const Paths & paths) override { keys = paths; }
 
     String getNamespace() const override { return url.bucket; }
     String getDataSourceDescription() override;
@@ -33,7 +34,8 @@ public:
     bool isStaticConfiguration() const override { return static_configuration; }
 
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
-    static void addStructureToArgs(ASTs & args, const String & structure, ContextPtr context);
+    static void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure, const String & format, ContextPtr context);
 
 private:
     void fromNamedCollection(const NamedCollection & collection) override;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 84810c117c9..8fc3de4de1b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -158,10 +158,13 @@ void StorageObjectStorage<StorageSettings>::read(
     const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
+    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII SOURCE HEADER: {}", read_from_format_info.source_header.dumpStructure());
+    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII FORMAT HEADER: {}", read_from_format_info.format_header.dumpStructure());
     auto read_step = std::make_unique<ReadFromStorageObejctStorage>(
         object_storage,
         configuration,
         getName(),
+        column_names,
         getVirtualsList(),
         query_info,
         storage_snapshot,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 48825c6a012..647575aaa90 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -32,6 +32,7 @@ public:
 
     virtual const Paths & getPaths() const = 0;
     virtual Paths & getPaths() = 0;
+    virtual void setPaths(const Paths & paths) = 0;
 
     virtual String getDataSourceDescription() = 0;
     virtual String getNamespace() const = 0;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index fd3ac58b1a2..30316af987c 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -106,8 +106,21 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
     }
     else
     {
+        ConfigurationPtr copy_configuration = configuration->clone();
+        auto filter_dag = VirtualColumnUtils::createPathAndFileFilterDAG(predicate, virtual_columns);
+        if (filter_dag)
+        {
+            auto keys = configuration->getPaths();
+            std::vector<String> paths;
+            paths.reserve(keys.size());
+            for (const auto & key : keys)
+                paths.push_back(fs::path(configuration->getNamespace()) / key);
+            VirtualColumnUtils::filterByPathOrFile(keys, paths, filter_dag, virtual_columns, local_context);
+            copy_configuration->setPaths(keys);
+        }
+
         return std::make_shared<KeysIterator>(
-            object_storage, configuration, virtual_columns, read_keys,
+            object_storage, copy_configuration, virtual_columns, read_keys,
             settings.ignore_non_existent_file, file_progress_callback);
     }
 }
@@ -247,6 +260,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         const auto max_parsing_threads = need_only_count ? std::optional<size_t>(1) : std::nullopt;
         read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes);
 
+        LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII HEADER: {}", read_from_format_info.format_header.dumpStructure());
         auto input_format = FormatFactory::instance().getInput(
             configuration->format, *read_buf, read_from_format_info.format_header,
             getContext(), max_block_size, format_settings, max_parsing_threads,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 3d4cc4fbd20..28962aadecd 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -45,7 +45,7 @@ public:
 
     void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override
     {
-        setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.source_header);
+        setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header);
     }
 
     Chunk generate() override;
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index d407017d5f7..9223642a7e6 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -65,9 +65,9 @@ std::vector<size_t> TableFunctionObjectStorage<
 
 template <typename Definition, typename StorageSettings, typename Configuration>
 void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
-        ASTs & args, const String & structure, const String & /* format */, const ContextPtr & context)
+        ASTs & args, const String & structure, const String & format, const ContextPtr & context)
 {
-    Configuration::addStructureToArgs(args, structure, context);
+    Configuration::addStructureAndFormatToArgs(args, structure, format, context);
 }
 
 template <typename Definition, typename StorageSettings, typename Configuration>

From e8f02af78c418f7c0a521bd48d49fcfb91db455f Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Tue, 9 Apr 2024 09:49:32 +0000
Subject: [PATCH 064/392] fix part splitter wrongly add ranges with undefined
 end mark value to non-intersect part

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 src/Processors/QueryPlan/PartsSplitter.cpp    | 11 ++++------
 .../03033_final_undefined_last_mark.reference |  1 +
 .../03033_final_undefined_last_mark.sql       | 21 +++++++++++++++++++
 3 files changed, 26 insertions(+), 7 deletions(-)
 create mode 100644 tests/queries/0_stateless/03033_final_undefined_last_mark.reference
 create mode 100644 tests/queries/0_stateless/03033_final_undefined_last_mark.sql

diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp
index 2af1bcb0260..d3425bce2a3 100644
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@@ -609,14 +609,11 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts,
     }
 
     /// Process parts ranges with undefined value at end mark
-    bool is_intersecting = part_index_start_to_range.size() > 1;
+    /// The last parts ranges could be non-intersect only if: (1) there is only one part range left, (2) it belongs to a non-L0 part,
+    /// and (3) the begin value of this range is larger than the largest end value of all previous ranges. This is too complicated
+    /// to check, so we just add the last part ranges to the intersecting ranges.
     for (const auto & [part_range_index, mark_range] : part_index_start_to_range)
-    {
-        if (is_intersecting)
-            add_intersecting_range(part_range_index.part_index, mark_range);
-        else
-            add_non_intersecting_range(part_range_index.part_index, mark_range);
-    }
+        add_intersecting_range(part_range_index.part_index, mark_range);
 
     auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts());
     auto && intersecting_ranges_in_data_parts = std::move(intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts());
diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
new file mode 100644
index 00000000000..7b82946b108
--- /dev/null
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
@@ -0,0 +1 @@
+GOOD	11338881281426660955	14765404159170880511
diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
new file mode 100644
index 00000000000..183406f803c
--- /dev/null
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
@@ -0,0 +1,21 @@
+-- Tags: no-random-settings, no-random-merge-tree-settings
+
+CREATE TABLE account_test
+(
+    `id` UInt64,
+    `row_ver` UInt64,
+)
+ENGINE = ReplacingMergeTree(row_ver)
+PARTITION BY id % 64
+ORDER BY id
+SETTINGS index_granularity = 512, index_granularity_bytes = 0,
+         min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0,
+         min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0;
+
+INSERT INTO account_test
+	SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 50000;
+
+INSERT INTO account_test
+    SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000;
+
+SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1;

From f41d88b990052e06ae7dd87826662d664c4f54e8 Mon Sep 17 00:00:00 2001
From: Duc Canh Le <duccanh.le@ahrefs.com>
Date: Wed, 10 Apr 2024 05:43:50 +0000
Subject: [PATCH 065/392] add reference query to test

Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
---
 .../0_stateless/03033_final_undefined_last_mark.reference       | 1 +
 tests/queries/0_stateless/03033_final_undefined_last_mark.sql   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
index 7b82946b108..bf0a25f24e4 100644
--- a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
@@ -1 +1,2 @@
 GOOD	11338881281426660955	14765404159170880511
+GOOD	11338881281426660955	14765404159170880511
diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
index 183406f803c..2c13da42ca4 100644
--- a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
@@ -18,4 +18,6 @@ INSERT INTO account_test
 INSERT INTO account_test
     SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000;
 
+SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0;
 SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1;
+

From 14c461338b12719daa1dc044148f914fd6a5fac6 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 10 Apr 2024 12:56:29 +0200
Subject: [PATCH 066/392] Replay ZK logs using keeper-bench

---
 src/Common/ZooKeeper/ZooKeeperImpl.cpp |   4 +-
 src/Common/ZooKeeper/ZooKeeperImpl.h   |   3 +-
 utils/keeper-bench/CMakeLists.txt      |   3 +-
 utils/keeper-bench/Generator.cpp       | 194 +-----
 utils/keeper-bench/Generator.h         |  18 -
 utils/keeper-bench/Runner.cpp          | 821 ++++++++++++++++++++++++-
 utils/keeper-bench/Runner.h            |  77 ++-
 utils/keeper-bench/main.cpp            |  24 +-
 8 files changed, 875 insertions(+), 269 deletions(-)

diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
index 2185d32e47a..ed7498b1ac9 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@@ -1259,11 +1259,13 @@ void ZooKeeper::initFeatureFlags()
 
 void ZooKeeper::executeGenericRequest(
     const ZooKeeperRequestPtr & request,
-    ResponseCallback callback)
+    ResponseCallback callback,
+    WatchCallbackPtr watch)
 {
     RequestInfo request_info;
     request_info.request = request;
     request_info.callback = callback;
+    request_info.watch = watch;
 
     pushRequest(std::move(request_info));
 }
diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h
index cf331a03d06..8fdf0f97d9d 100644
--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@@ -139,7 +139,8 @@ public:
 
     void executeGenericRequest(
         const ZooKeeperRequestPtr & request,
-        ResponseCallback callback);
+        ResponseCallback callback,
+        WatchCallbackPtr watch = nullptr);
 
     /// See the documentation about semantics of these methods in IKeeper class.
 
diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt
index 5514c34f4ef..4fe0d852fd2 100644
--- a/utils/keeper-bench/CMakeLists.txt
+++ b/utils/keeper-bench/CMakeLists.txt
@@ -4,5 +4,4 @@ if (NOT TARGET ch_contrib::rapidjson)
 endif ()
 
 clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp)
-target_link_libraries(keeper-bench PRIVATE dbms)
-target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson)
+target_link_libraries(keeper-bench PRIVATE dbms clickhouse_functions ch_contrib::rapidjson)
diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp
index 2212f7158ae..cbf1bcdae23 100644
--- a/utils/keeper-bench/Generator.cpp
+++ b/utils/keeper-bench/Generator.cpp
@@ -40,54 +40,6 @@ std::string generateRandomString(size_t length)
 }
 }
 
-void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path)
-{
-    namespace fs = std::filesystem;
-
-    auto promise = std::make_shared<std::promise<void>>();
-    auto future = promise->get_future();
-
-    Strings children;
-    auto list_callback = [promise, &children] (const ListResponse & response)
-    {
-        children = response.names;
-
-        promise->set_value();
-    };
-    zookeeper.list(path, ListRequestType::ALL, list_callback, nullptr);
-    future.get();
-
-    while (!children.empty())
-    {
-        Coordination::Requests ops;
-        for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
-        {
-            removeRecursive(zookeeper, fs::path(path) / children.back());
-            ops.emplace_back(makeRemoveRequest(fs::path(path) / children.back(), -1));
-            children.pop_back();
-        }
-        auto multi_promise = std::make_shared<std::promise<void>>();
-        auto multi_future = multi_promise->get_future();
-
-        auto multi_callback = [multi_promise] (const MultiResponse &)
-        {
-            multi_promise->set_value();
-        };
-        zookeeper.multi(ops, multi_callback);
-        multi_future.get();
-    }
-    auto remove_promise = std::make_shared<std::promise<void>>();
-    auto remove_future = remove_promise->get_future();
-
-    auto remove_callback = [remove_promise] (const RemoveResponse &)
-    {
-        remove_promise->set_value();
-    };
-
-    zookeeper.remove(path, -1, remove_callback);
-    remove_future.get();
-}
-
 NumberGetter
 NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional<uint64_t> default_value)
 {
@@ -603,148 +555,16 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config)
     acl.id = "anyone";
     default_acls.emplace_back(std::move(acl));
 
-    static const std::string generator_key = "generator";
-
-    std::cerr << "---- Parsing setup ---- " << std::endl;
-    static const std::string setup_key = generator_key + ".setup";
-    Poco::Util::AbstractConfiguration::Keys keys;
-    config.keys(setup_key, keys);
-    for (const auto & key : keys)
-    {
-        if (key.starts_with("node"))
-        {
-            auto node_key = setup_key + "." + key;
-            auto parsed_root_node = parseNode(node_key, config);
-            const auto node = root_nodes.emplace_back(parsed_root_node);
-
-            if (config.has(node_key + ".repeat"))
-            {
-                if (!node->name.isRandom())
-                    throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key);
-
-                auto repeat_count = config.getUInt64(node_key + ".repeat");
-                node->repeat_count = repeat_count;
-                for (size_t i = 1; i < repeat_count; ++i)
-                    root_nodes.emplace_back(node->clone());
-            }
-
-            std::cerr << "Tree to create:" << std::endl;
-
-            node->dumpTree();
-            std::cerr << std::endl;
-        }
-    }
-    std::cerr << "---- Done parsing data setup ----\n" << std::endl;
-
     std::cerr << "---- Collecting request generators ----" << std::endl;
-    static const std::string requests_key = generator_key + ".requests";
+    static const std::string requests_key = "generator.requests";
     request_getter = RequestGetter::fromConfig(requests_key, config);
     std::cerr << request_getter.description() << std::endl;
     std::cerr << "---- Done collecting request generators ----\n" << std::endl;
 }
 
-std::shared_ptr<Generator::Node> Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config)
-{
-    auto node = std::make_shared<Generator::Node>();
-    node->name = StringGetter::fromConfig(key + ".name", config);
-
-    if (config.has(key + ".data"))
-        node->data = StringGetter::fromConfig(key + ".data", config);
-
-    Poco::Util::AbstractConfiguration::Keys node_keys;
-    config.keys(key, node_keys);
-
-    for (const auto & node_key : node_keys)
-    {
-        if (!node_key.starts_with("node"))
-            continue;
-
-        const auto node_key_string = key + "." + node_key;
-        auto child_node = parseNode(node_key_string, config);
-        node->children.push_back(child_node);
-
-        if (config.has(node_key_string + ".repeat"))
-        {
-            if (!child_node->name.isRandom())
-                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string);
-
-            auto repeat_count = config.getUInt64(node_key_string + ".repeat");
-            child_node->repeat_count = repeat_count;
-            for (size_t i = 1; i < repeat_count; ++i)
-                node->children.push_back(child_node);
-        }
-    }
-
-    return node;
-}
-
-void Generator::Node::dumpTree(int level) const
-{
-    std::string data_string
-        = data.has_value() ? fmt::format("{}", data->description()) : "no data";
-
-    std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : "";
-
-    std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl;
-
-    for (auto it = children.begin(); it != children.end();)
-    {
-        const auto & child = *it;
-        child->dumpTree(level + 1);
-        std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1);
-    }
-}
-
-std::shared_ptr<Generator::Node> Generator::Node::clone() const
-{
-    auto new_node = std::make_shared<Node>();
-    new_node->name = name;
-    new_node->data = data;
-    new_node->repeat_count = repeat_count;
-
-    // don't do deep copy of children because we will do clone only for root nodes
-    new_node->children = children;
-
-    return new_node;
-}
-
-void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const
-{
-    auto path = std::filesystem::path(parent_path) / name.getString();
-    auto promise = std::make_shared<std::promise<void>>();
-    auto future = promise->get_future();
-    auto create_callback = [promise] (const CreateResponse & response)
-    {
-        if (response.error != Coordination::Error::ZOK)
-            promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
-        else
-            promise->set_value();
-    };
-    zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback);
-    future.get();
-
-    for (const auto & child : children)
-        child->createNode(zookeeper, path, acls);
-}
-
 void Generator::startup(Coordination::ZooKeeper & zookeeper)
 {
-    std::cerr << "---- Creating test data ----" << std::endl;
-    for (const auto & node : root_nodes)
-    {
-        auto node_name = node->name.getString();
-        node->name.setString(node_name);
-
-        std::string root_path = std::filesystem::path("/") / node_name;
-        std::cerr << "Cleaning up " << root_path << std::endl;
-        removeRecursive(zookeeper, root_path);
-
-        node->createNode(zookeeper, "/", default_acls);
-    }
-    std::cerr << "---- Created test data ----\n" << std::endl;
-
     std::cerr << "---- Initializing generators ----" << std::endl;
-
     request_getter.startup(zookeeper);
 }
 
@@ -752,15 +572,3 @@ Coordination::ZooKeeperRequestPtr Generator::generate()
 {
     return request_getter.getRequestGenerator()->generate(default_acls);
 }
-
-void Generator::cleanup(Coordination::ZooKeeper & zookeeper)
-{
-    std::cerr << "---- Cleaning up test data ----" << std::endl;
-    for (const auto & node : root_nodes)
-    {
-        auto node_name = node->name.getString();
-        std::string root_path = std::filesystem::path("/") / node_name;
-        std::cerr << "Cleaning up " << root_path << std::endl;
-        removeRecursive(zookeeper, root_path);
-    }
-}
diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h
index 5b4c05b2d8b..35dce1a95d9 100644
--- a/utils/keeper-bench/Generator.h
+++ b/utils/keeper-bench/Generator.h
@@ -173,27 +173,9 @@ public:
 
     void startup(Coordination::ZooKeeper & zookeeper);
     Coordination::ZooKeeperRequestPtr generate();
-    void cleanup(Coordination::ZooKeeper & zookeeper);
 private:
-    struct Node
-    {
-        StringGetter name;
-        std::optional<StringGetter> data;
-        std::vector<std::shared_ptr<Node>> children;
-        size_t repeat_count = 0;
-
-        std::shared_ptr<Node> clone() const;
-
-        void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const;
-        void dumpTree(int level = 0) const;
-    };
-
-    static std::shared_ptr<Node> parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config);
 
     std::uniform_int_distribution<size_t> request_picker;
-    std::vector<std::shared_ptr<Node>> root_nodes;
     RequestGetter request_getter;
     Coordination::ACLs default_acls;
 };
-
-std::optional<Generator> getGenerator(const std::string & name);
diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index a4b579f1f7b..8b111f5adb9 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -1,14 +1,28 @@
 #include "Runner.h"
+#include <atomic>
+#include <condition_variable>
 #include <Poco/Util/AbstractConfiguration.h>
 
+#include "Common/ConcurrentBoundedQueue.h"
+#include "Common/ZooKeeper/IKeeper.h"
+#include "Common/ZooKeeper/ZooKeeperArgs.h"
 #include "Common/ZooKeeper/ZooKeeperCommon.h"
 #include "Common/ZooKeeper/ZooKeeperConstants.h"
 #include <Common/EventNotifier.h>
 #include <Common/Config/ConfigProcessor.h>
-#include "IO/ReadBufferFromString.h"
+#include "Core/ColumnWithTypeAndName.h"
+#include "Core/ColumnsWithTypeAndName.h"
+#include "IO/ReadBuffer.h"
+#include "IO/ReadBufferFromFile.h"
+#include "base/Decimal.h"
+#include "base/types.h"
+#include <Processors/Formats/IInputFormat.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Formats/registerFormats.h>
+#include <Interpreters/Context.h>
 
 
 namespace CurrentMetrics
@@ -22,23 +36,41 @@ namespace DB::ErrorCodes
 {
     extern const int CANNOT_BLOCK_SIGNAL;
     extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
 }
 
 Runner::Runner(
         std::optional<size_t> concurrency_,
         const std::string & config_path,
+        const std::string & input_request_log_,
         const Strings & hosts_strings_,
         std::optional<double> max_time_,
         std::optional<double> delay_,
         std::optional<bool> continue_on_error_,
         std::optional<size_t> max_iterations_)
-        : info(std::make_shared<Stats>())
+        : input_request_log(input_request_log_)
+        , info(std::make_shared<Stats>())
 {
 
     DB::ConfigProcessor config_processor(config_path, true, false);
-    auto config = config_processor.loadConfig().configuration;
+    DB::ConfigurationPtr config = nullptr;
+
+    if (!config_path.empty())
+    {
+        config = config_processor.loadConfig().configuration;
+
+        if (config->has("generator"))
+            generator.emplace(*config);
+    }
+    else
+    {
+        if (input_request_log.empty())
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both --config and --input_request_log cannot be empty");
+
+        if (!std::filesystem::exists(input_request_log))
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "File on path {} does not exist", input_request_log);
+    }
 
-    generator.emplace(*config);
 
     if (!hosts_strings_.empty())
     {
@@ -57,6 +89,8 @@ Runner::Runner(
     static constexpr uint64_t DEFAULT_CONCURRENCY = 1;
     if (concurrency_)
         concurrency = *concurrency_;
+    else if (!config)
+        concurrency = DEFAULT_CONCURRENCY;
     else
         concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY);
     std::cerr << "Concurrency: " << concurrency << std::endl;
@@ -64,6 +98,8 @@ Runner::Runner(
     static constexpr uint64_t DEFAULT_ITERATIONS = 0;
     if (max_iterations_)
         max_iterations = *max_iterations_;
+    else if (!config)
+        max_iterations = DEFAULT_ITERATIONS;
     else
         max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS);
     std::cerr << "Iterations: " << max_iterations << std::endl;
@@ -71,6 +107,8 @@ Runner::Runner(
     static constexpr double DEFAULT_DELAY = 1.0;
     if (delay_)
         delay = *delay_;
+    else if (!config)
+        delay = DEFAULT_DELAY;
     else
         delay = config->getDouble("report_delay", DEFAULT_DELAY);
     std::cerr << "Report delay: " << delay << std::endl;
@@ -78,44 +116,48 @@ Runner::Runner(
     static constexpr double DEFAULT_TIME_LIMIT = 0.0;
     if (max_time_)
         max_time = *max_time_;
+    else if (!config)
+        max_time = DEFAULT_TIME_LIMIT;
     else
         max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT);
     std::cerr << "Time limit: " << max_time << std::endl;
 
     if (continue_on_error_)
         continue_on_error = *continue_on_error_;
+    else if (!config)
+        continue_on_error_ = false;
     else
         continue_on_error = config->getBool("continue_on_error", false);
     std::cerr << "Continue on error: " << continue_on_error << std::endl;
 
-    static const std::string output_key = "output";
-    print_to_stdout = config->getBool(output_key + ".stdout", false);
-    std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl;
-
-    static const std::string output_file_key = output_key + ".file";
-    if (config->has(output_file_key))
+    if (config)
     {
-        if (config->has(output_file_key + ".path"))
-        {
-            file_output = config->getString(output_file_key + ".path");
-            output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp");
-        }
-        else
-            file_output = config->getString(output_file_key);
+        benchmark_context.initializeFromConfig(*config);
 
-        std::cerr << "Result file path: " << file_output->string() << std::endl;
+        static const std::string output_key = "output";
+        print_to_stdout = config->getBool(output_key + ".stdout", false);
+        std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl;
+
+        static const std::string output_file_key = output_key + ".file";
+        if (config->has(output_file_key))
+        {
+            if (config->has(output_file_key + ".path"))
+            {
+                file_output = config->getString(output_file_key + ".path");
+                output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp");
+            }
+            else
+                file_output = config->getString(output_file_key);
+
+            std::cerr << "Result file path: " << file_output->string() << std::endl;
+        }
     }
 
     std::cerr << "---- Run options ----\n" << std::endl;
-
-    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency);
-    queue.emplace(concurrency);
 }
 
 void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config)
 {
-    ConnectionInfo default_connection_info;
-
     const auto fill_connection_details = [&](const std::string & key, auto & connection_info)
     {
         if (config.has(key + ".secure"))
@@ -328,9 +370,519 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re
 
 void Runner::runBenchmark()
 {
+    if (generator)
+        runBenchmarkWithGenerator();
+    else
+        runBenchmarkFromLog();
+}
+
+
+struct ZooKeeperRequestBlock
+{
+    explicit ZooKeeperRequestBlock(DB::Block block_)
+        : block(std::move(block_))
+        , hostname_idx(block.getPositionByName("hostname")) //
+        , request_event_time_idx(block.getPositionByName("request_event_time")) //
+        , thread_id_idx(block.getPositionByName("thread_id")) //
+        , session_id_idx(block.getPositionByName("session_id")) //
+        , xid_idx(block.getPositionByName("xid")) //
+        , has_watch_idx(block.getPositionByName("has_watch"))
+        , op_num_idx(block.getPositionByName("op_num"))
+        , path_idx(block.getPositionByName("path"))
+        , data_idx(block.getPositionByName("data"))
+        , is_ephemeral_idx(block.getPositionByName("is_ephemeral"))
+        , is_sequential_idx(block.getPositionByName("is_sequential"))
+        , response_event_time_idx(block.getPositionByName("response_event_time")) //
+        , error_idx(block.getPositionByName("error"))
+        , requests_size_idx(block.getPositionByName("requests_size"))
+        , version_idx(block.getPositionByName("version"))
+    {}
+
+    size_t rows() const
+    {
+        return block.rows();
+    }
+
+    UInt64 getExecutorId(size_t row) const
+    {
+        return getSessionId(row);
+    }
+
+    std::string getHostname(size_t row) const
+    {
+        return getField(hostname_idx, row).safeGet<std::string>();
+    }
+
+    UInt64 getThreadId(size_t row) const
+    {
+        return getField(thread_id_idx, row).safeGet<UInt64>();
+    }
+
+    DB::DateTime64 getRequestEventTime(size_t row) const
+    {
+        return getField(request_event_time_idx, row).safeGet<DB::DateTime64>();
+    }
+
+    DB::DateTime64 getResponseEventTime(size_t row) const
+    {
+        return getField(response_event_time_idx, row).safeGet<DB::DateTime64>();
+    }
+
+    Int64 getSessionId(size_t row) const
+    {
+        return getField(session_id_idx, row).safeGet<Int64>();
+    }
+
+    Int64 getXid(size_t row) const
+    {
+        return getField(xid_idx, row).safeGet<Int64>();
+    }
+
+    bool hasWatch(size_t row) const
+    {
+        return getField(has_watch_idx, row).safeGet<UInt8>();
+    }
+
+    Coordination::OpNum getOpNum(size_t row) const
+    {
+        return static_cast<Coordination::OpNum>(getField(op_num_idx, row).safeGet<Int64>());
+    }
+
+    bool isEphemeral(size_t row) const
+    {
+        return getField(is_ephemeral_idx, row).safeGet<UInt8>();
+    }
+
+    bool isSequential(size_t row) const
+    {
+        return getField(is_sequential_idx, row).safeGet<UInt8>();
+    }
+
+    std::string getPath(size_t row) const
+    {
+        return getField(path_idx, row).safeGet<std::string>();
+    }
+
+    std::string getData(size_t row) const
+    {
+        return getField(data_idx, row).safeGet<std::string>();
+    }
+
+    UInt64 getRequestsSize(size_t row) const
+    {
+        return getField(requests_size_idx, row).safeGet<UInt64>();
+    }
+
+    std::optional<Int32> getVersion(size_t row) const
+    {
+        auto field = getField(version_idx, row);
+        if (field.isNull())
+            return std::nullopt;
+        return static_cast<Int32>(field.safeGet<Int64>());
+    }
+
+    std::optional<Coordination::Error> getError(size_t row) const
+    {
+        auto field = getField(error_idx, row);
+        if (field.isNull())
+            return std::nullopt;
+
+        return static_cast<Coordination::Error>(field.safeGet<Int64>());
+    }
+private:
+    DB::Field getField(size_t position, size_t row) const
+    {
+        DB::Field field;
+        block.getByPosition(position).column->get(row, field);
+        return field;
+    }
+
+    DB::Block block;
+    size_t hostname_idx = 0;
+    size_t request_event_time_idx = 0;
+    size_t thread_id_idx = 0;
+    size_t session_id_idx = 0;
+    size_t xid_idx = 0;
+    size_t has_watch_idx = 0;
+    size_t op_num_idx = 0;
+    size_t path_idx = 0;
+    size_t data_idx = 0;
+    size_t is_ephemeral_idx = 0;
+    size_t is_sequential_idx = 0;
+    size_t response_event_time_idx = 0;
+    size_t error_idx = 0;
+    size_t requests_size_idx = 0;
+    size_t version_idx = 0;
+};
+
+struct RequestFromLog
+{
+    Coordination::ZooKeeperRequestPtr request;
+    std::optional<Coordination::Error> expected_result;
+    int64_t session_id = 0;
+    size_t executor_id = 0;
+    bool has_watch = false;
+    DB::DateTime64 request_event_time;
+    DB::DateTime64 response_event_time;
+    std::shared_ptr<Coordination::ZooKeeper> connection;
+};
+
+struct ZooKeeperRequestFromLogReader
+{
+    ZooKeeperRequestFromLogReader(const std::string & input_request_log, DB::ContextPtr context)
+    {
+        std::optional<DB::FormatSettings> format_settings;
+
+        file_read_buf = std::make_unique<DB::ReadBufferFromFile>(input_request_log);
+        auto compression_method = DB::chooseCompressionMethod(input_request_log, "");
+        file_read_buf = DB::wrapReadBufferWithCompressionMethod(std::move(file_read_buf), compression_method);
+
+        DB::SingleReadBufferIterator read_buffer_iterator(std::move(file_read_buf));
+        auto [columns_description, format] = DB::detectFormatAndReadSchema(format_settings, read_buffer_iterator, context);
+
+        DB::ColumnsWithTypeAndName columns;
+        columns.reserve(columns_description.size());
+
+        for (const auto & column_description : columns_description)
+            columns.push_back(DB::ColumnWithTypeAndName{column_description.type, column_description.name});
+
+        header_block = std::move(columns);
+
+        file_read_buf
+            = DB::wrapReadBufferWithCompressionMethod(std::make_unique<DB::ReadBufferFromFile>(input_request_log), compression_method);
+
+        input_format = DB::FormatFactory::instance().getInput(
+            format,
+            *file_read_buf,
+            header_block,
+            context,
+            context->getSettingsRef().max_block_size,
+            format_settings,
+            1,
+            std::nullopt,
+            /*is_remote_fs*/ false,
+            DB::CompressionMethod::None,
+            false);
+
+        Coordination::ACL acl;
+        acl.permissions = Coordination::ACL::All;
+        acl.scheme = "world";
+        acl.id = "anyone";
+        default_acls.emplace_back(std::move(acl));
+    }
+
+    std::optional<RequestFromLog> getNextRequest(bool for_multi = false)
+    {
+        RequestFromLog request_from_log;
+
+        if (!current_block)
+        {
+            auto chunk = input_format->generate();
+
+            if (chunk.empty())
+                return std::nullopt;
+
+            current_block.emplace(header_block.cloneWithColumns(chunk.detachColumns()));
+            idx_in_block = 0;
+        }
+
+
+        request_from_log.expected_result = current_block->getError(idx_in_block);
+        request_from_log.session_id = current_block->getSessionId(idx_in_block);
+        request_from_log.has_watch = current_block->hasWatch(idx_in_block);
+        request_from_log.executor_id = current_block->getExecutorId(idx_in_block);
+        request_from_log.request_event_time = current_block->getRequestEventTime(idx_in_block);
+        request_from_log.response_event_time = current_block->getResponseEventTime(idx_in_block);
+
+        const auto move_row_iterator = [&]
+        {
+            if (idx_in_block == current_block->rows() - 1)
+                current_block.reset();
+            else
+                ++idx_in_block;
+        };
+
+        auto op_num = current_block->getOpNum(idx_in_block);
+        switch (op_num)
+        {
+            case Coordination::OpNum::Create:
+            {
+                auto create_request = std::make_shared<Coordination::ZooKeeperCreateRequest>();
+                create_request->path = current_block->getPath(idx_in_block);
+                create_request->data = current_block->getData(idx_in_block);
+                create_request->is_ephemeral = current_block->isEphemeral(idx_in_block);
+                create_request->is_sequential = current_block->isSequential(idx_in_block);
+                request_from_log.request = create_request;
+                break;
+            }
+            case Coordination::OpNum::Set:
+            {
+                auto set_request = std::make_shared<Coordination::ZooKeeperSetRequest>();
+                set_request->path = current_block->getPath(idx_in_block);
+                set_request->data = current_block->getData(idx_in_block);
+                if (auto version = current_block->getVersion(idx_in_block))
+                    set_request->version = *version;
+                request_from_log.request = set_request;
+                break;
+            }
+            case Coordination::OpNum::Remove:
+            {
+                auto remove_request = std::make_shared<Coordination::ZooKeeperRemoveRequest>();
+                remove_request->path = current_block->getPath(idx_in_block);
+                if (auto version = current_block->getVersion(idx_in_block))
+                    remove_request->version = *version;
+                request_from_log.request = remove_request;
+                break;
+            }
+            case Coordination::OpNum::Check:
+            {
+                auto check_request = std::make_shared<Coordination::ZooKeeperCheckRequest>();
+                check_request->path = current_block->getPath(idx_in_block);
+                if (auto version = current_block->getVersion(idx_in_block))
+                    check_request->version = *version;
+                request_from_log.request = check_request;
+                break;
+            }
+            case Coordination::OpNum::Sync:
+            {
+                auto sync_request = std::make_shared<Coordination::ZooKeeperSyncRequest>();
+                sync_request->path = current_block->getPath(idx_in_block);
+                request_from_log.request = sync_request;
+                break;
+            }
+            case Coordination::OpNum::Get:
+            {
+                auto get_request = std::make_shared<Coordination::ZooKeeperGetRequest>();
+                get_request->path = current_block->getPath(idx_in_block);
+                request_from_log.request = get_request;
+                break;
+            }
+            case Coordination::OpNum::SimpleList:
+            case Coordination::OpNum::FilteredList:
+            {
+                auto list_request = std::make_shared<Coordination::ZooKeeperSimpleListRequest>();
+                list_request->path = current_block->getPath(idx_in_block);
+                request_from_log.request = list_request;
+                break;
+            }
+            case Coordination::OpNum::Exists:
+            {
+                auto exists_request = std::make_shared<Coordination::ZooKeeperExistsRequest>();
+                exists_request->path = current_block->getPath(idx_in_block);
+                request_from_log.request = exists_request;
+                break;
+            }
+            case Coordination::OpNum::Multi:
+            case Coordination::OpNum::MultiRead:
+            {
+                if (for_multi)
+                    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Nested multi requests are not allowed");
+
+                auto requests_size = current_block->getRequestsSize(idx_in_block);
+
+                Coordination::Requests requests;
+                requests.reserve(requests_size);
+                move_row_iterator();
+
+                for (size_t i = 0; i < requests_size; ++i)
+                {
+                    auto subrequest_from_log = getNextRequest(/*for_multi=*/true);
+                    if (!subrequest_from_log)
+                        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i);
+
+                    requests.push_back(std::move(subrequest_from_log->request));
+
+                    if (subrequest_from_log->session_id != request_from_log.session_id)
+                        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Session id mismatch for subrequest in {}, subrequest index {}", op_num, i);
+
+                    if (subrequest_from_log->executor_id != request_from_log.executor_id)
+                        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i);
+                }
+
+                request_from_log.request = std::make_shared<Coordination::ZooKeeperMultiRequest>(requests, default_acls);
+
+                return request_from_log;
+            }
+            default:
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unsupported operation {} ({})", op_num, static_cast<int64_t>(op_num));
+        }
+
+        move_row_iterator();
+
+        return request_from_log;
+    }
+
+private:
+    DB::Block header_block;
+
+    std::unique_ptr<DB::ReadBuffer> file_read_buf;
+    DB::InputFormatPtr input_format;
+
+    std::optional<ZooKeeperRequestBlock> current_block;
+    size_t idx_in_block = 0;
+
+    Coordination::ACLs default_acls;
+};
+
+
+namespace
+{
+
+
+struct RequestFromLogStats
+{
+    struct Stats
+    {
+        std::atomic<size_t> total = 0;
+        std::atomic<size_t> unexpected_results = 0;
+    };
+
+    Stats write_requests;
+    Stats read_requests;
+};
+
+void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type)
+{
+    std::cerr << fmt::format(
+        "{} requests: {} total, {} with unexpected results ({:.4}%)",
+        type,
+        stats_for_type.total,
+        stats_for_type.unexpected_results,
+        static_cast<double>(stats_for_type.unexpected_results) / stats_for_type.total * 100)
+              << std::endl;
+};
+
+void requestFromLogExecutor(std::shared_ptr<ConcurrentBoundedQueue<RequestFromLog>> queue, RequestFromLogStats & request_stats)
+{
+    RequestFromLog request_from_log;
+    std::optional<std::future<void>> last_request;
+    while (queue->pop(request_from_log))
+    {
+        auto request_promise = std::make_shared<std::promise<void>>();
+        last_request = request_promise->get_future();
+        Coordination::ResponseCallback callback
+            = [&, request_promise, request = request_from_log.request, expected_result = request_from_log.expected_result](
+                  const Coordination::Response & response) mutable
+        {
+            auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests;
+
+            stats.total.fetch_add(1, std::memory_order_relaxed);
+
+            if (*expected_result != response.error)
+                stats.unexpected_results.fetch_add(1, std::memory_order_relaxed);
+
+            //if (!expected_result)
+            //    return;
+
+            //if (*expected_result != response.error)
+            //    std::cerr << fmt::format(
+            //        "Unexpected result for {}, got {}, expected {}", request->getOpNum(), response.error, *expected_result)
+            //              << std::endl;
+
+            request_promise->set_value();
+        };
+
+        Coordination::WatchCallbackPtr watch;
+        if (request_from_log.has_watch)
+            watch = std::make_shared<Coordination::WatchCallback>([](const Coordination::WatchResponse &) {});
+
+        request_from_log.connection->executeGenericRequest(request_from_log.request, callback, watch);
+    }
+
+    if (last_request)
+        last_request->wait();
+}
+
+}
+
+void Runner::runBenchmarkFromLog()
+{
+    std::cerr << fmt::format("Running benchmark using requests from {}", input_request_log) << std::endl;
+
+    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency);
+
+    shared_context = DB::Context::createShared();
+    global_context = DB::Context::createGlobal(shared_context.get());
+    global_context->makeGlobalContext();
+    DB::registerFormats();
+
+    /// Randomly choosing connection index
+    pcg64 rng(randomSeed());
+    std::uniform_int_distribution<size_t> connection_distribution(0, connection_infos.size() - 1);
+
+    std::unordered_map<int64_t, std::shared_ptr<Coordination::ZooKeeper>> zookeeper_connections;
+    auto get_zookeeper_connection = [&](int64_t session_id)
+    {
+        if (auto it = zookeeper_connections.find(session_id); it != zookeeper_connections.end() && !it->second->isExpired())
+            return it->second;
+
+        auto connection_idx = connection_distribution(rng);
+        auto zk_connection = getConnection(connection_infos[connection_idx], connection_idx);
+        zookeeper_connections.insert_or_assign(session_id, zk_connection);
+        return zk_connection;
+    };
+
+    RequestFromLogStats stats;
+
+
+    std::unordered_map<uint64_t, std::shared_ptr<ConcurrentBoundedQueue<RequestFromLog>>> executor_id_to_queue;
+
+    SCOPE_EXIT({
+        for (const auto & [executor_id, executor_queue] : executor_id_to_queue)
+            executor_queue->finish();
+
+        pool->wait();
+
+        dumpStats("Write", stats.write_requests);
+        dumpStats("Read", stats.read_requests);
+    });
+
+    auto push_request = [&](RequestFromLog request)
+    {
+        if (auto it = executor_id_to_queue.find(request.executor_id); it != executor_id_to_queue.end())
+        {
+            auto success = it->second->push(std::move(request));
+            if (!success)
+                throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue");
+            return;
+        }
+
+        auto executor_queue = std::make_shared<ConcurrentBoundedQueue<RequestFromLog>>(std::numeric_limits<uint64_t>().max());
+        executor_id_to_queue.emplace(request.executor_id, executor_queue);
+        auto scheduled = pool->trySchedule([&, executor_queue]() mutable
+        {
+            requestFromLogExecutor(std::move(executor_queue), stats);
+        });
+
+        if (!scheduled)
+            throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to schedule worker, try to increase concurrency parameter");
+
+        auto success = executor_queue->push(std::move(request));
+        if (!success)
+            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue");
+    };
+
+    {
+        auto setup_connection = getConnection(connection_infos[0], 0);
+        benchmark_context.startup(*setup_connection);
+    }
+
+    ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context);
+    while (auto request_from_log = request_reader.getNextRequest())
+    {
+        request_from_log->connection = get_zookeeper_connection(request_from_log->session_id);
+        push_request(std::move(*request_from_log));
+    }
+}
+
+void Runner::runBenchmarkWithGenerator()
+{
+    pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, concurrency);
+    queue.emplace(concurrency);
     createConnections();
 
     std::cerr << "Preparing to run\n";
+    benchmark_context.startup(*connections[0]);
     generator->startup(*connections[0]);
     std::cerr << "Prepared\n";
 
@@ -458,8 +1010,225 @@ std::vector<std::shared_ptr<Coordination::ZooKeeper>> Runner::refreshConnections
 
 Runner::~Runner()
 {
-    queue->clearAndFinish();
+    if (queue)
+        queue->clearAndFinish();
     shutdown = true;
-    pool->wait();
-    generator->cleanup(*connections[0]);
+
+    if (pool)
+        pool->wait();
+
+    auto connection = getConnection(connection_infos[0], 0);
+    benchmark_context.cleanup(*connection);
+}
+
+namespace
+{
+
+void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path)
+{
+    namespace fs = std::filesystem;
+
+    auto promise = std::make_shared<std::promise<void>>();
+    auto future = promise->get_future();
+
+    Strings children;
+    auto list_callback = [promise, &children] (const Coordination::ListResponse & response)
+    {
+        children = response.names;
+        promise->set_value();
+    };
+    zookeeper.list(path, Coordination::ListRequestType::ALL, list_callback, nullptr);
+    future.get();
+
+    std::span children_span(children);
+    while (!children_span.empty())
+    {
+        Coordination::Requests ops;
+        for (size_t i = 0; i < 1000 && !children.empty(); ++i)
+        {
+            removeRecursive(zookeeper, fs::path(path) / children.back());
+            ops.emplace_back(zkutil::makeRemoveRequest(fs::path(path) / children_span.back(), -1));
+            children_span = children_span.subspan(0, children_span.size() - 1);
+        }
+        auto multi_promise = std::make_shared<std::promise<void>>();
+        auto multi_future = multi_promise->get_future();
+
+        auto multi_callback = [multi_promise] (const Coordination::MultiResponse &)
+        {
+            multi_promise->set_value();
+        };
+        zookeeper.multi(ops, multi_callback);
+        multi_future.get();
+    }
+    auto remove_promise = std::make_shared<std::promise<void>>();
+    auto remove_future = remove_promise->get_future();
+
+    auto remove_callback = [remove_promise] (const Coordination::RemoveResponse &)
+    {
+        remove_promise->set_value();
+    };
+
+    zookeeper.remove(path, -1, remove_callback);
+    remove_future.get();
+}
+
+}
+
+void BenchmarkContext::initializeFromConfig(const Poco::Util::AbstractConfiguration & config)
+{
+    Coordination::ACL acl;
+    acl.permissions = Coordination::ACL::All;
+    acl.scheme = "world";
+    acl.id = "anyone";
+    default_acls.emplace_back(std::move(acl));
+
+    std::cerr << "---- Parsing setup ---- " << std::endl;
+    static const std::string setup_key = "setup";
+    Poco::Util::AbstractConfiguration::Keys keys;
+    config.keys(setup_key, keys);
+    for (const auto & key : keys)
+    {
+        if (key.starts_with("node"))
+        {
+            auto node_key = setup_key + "." + key;
+            auto parsed_root_node = parseNode(node_key, config);
+            const auto node = root_nodes.emplace_back(parsed_root_node);
+
+            if (config.has(node_key + ".repeat"))
+            {
+                if (!node->name.isRandom())
+                    throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key);
+
+                auto repeat_count = config.getUInt64(node_key + ".repeat");
+                node->repeat_count = repeat_count;
+                for (size_t i = 1; i < repeat_count; ++i)
+                    root_nodes.emplace_back(node->clone());
+            }
+
+            std::cerr << "Tree to create:" << std::endl;
+
+            node->dumpTree();
+            std::cerr << std::endl;
+        }
+    }
+    std::cerr << "---- Done parsing data setup ----\n" << std::endl;
+}
+
+std::shared_ptr<BenchmarkContext::Node> BenchmarkContext::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config)
+{
+    auto node = std::make_shared<BenchmarkContext::Node>();
+    node->name = StringGetter::fromConfig(key + ".name", config);
+
+    if (config.has(key + ".data"))
+        node->data = StringGetter::fromConfig(key + ".data", config);
+
+    Poco::Util::AbstractConfiguration::Keys node_keys;
+    config.keys(key, node_keys);
+
+    for (const auto & node_key : node_keys)
+    {
+        if (!node_key.starts_with("node"))
+            continue;
+
+        const auto node_key_string = key + "." + node_key;
+        auto child_node = parseNode(node_key_string, config);
+        node->children.push_back(child_node);
+
+        if (config.has(node_key_string + ".repeat"))
+        {
+            if (!child_node->name.isRandom())
+                throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string);
+
+            auto repeat_count = config.getUInt64(node_key_string + ".repeat");
+            child_node->repeat_count = repeat_count;
+            for (size_t i = 1; i < repeat_count; ++i)
+                node->children.push_back(child_node);
+        }
+    }
+
+    return node;
+}
+
+void BenchmarkContext::Node::dumpTree(int level) const
+{
+    std::string data_string
+        = data.has_value() ? fmt::format("{}", data->description()) : "no data";
+
+    std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : "";
+
+    std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl;
+
+    for (auto it = children.begin(); it != children.end();)
+    {
+        const auto & child = *it;
+        child->dumpTree(level + 1);
+        std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1);
+    }
+}
+
+std::shared_ptr<BenchmarkContext::Node> BenchmarkContext::Node::clone() const
+{
+    auto new_node = std::make_shared<Node>();
+    new_node->name = name;
+    new_node->data = data;
+    new_node->repeat_count = repeat_count;
+
+    // don't do deep copy of children because we will do clone only for root nodes
+    new_node->children = children;
+
+    return new_node;
+}
+
+void BenchmarkContext::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const
+{
+    auto path = std::filesystem::path(parent_path) / name.getString();
+    auto promise = std::make_shared<std::promise<void>>();
+    auto future = promise->get_future();
+    auto create_callback = [promise] (const Coordination::CreateResponse & response)
+    {
+        if (response.error != Coordination::Error::ZOK)
+            promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error)));
+        else
+            promise->set_value();
+    };
+    zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback);
+    future.get();
+
+    for (const auto & child : children)
+        child->createNode(zookeeper, path, acls);
+}
+
+void BenchmarkContext::startup(Coordination::ZooKeeper & zookeeper)
+{
+    if (root_nodes.empty())
+        return;
+
+    std::cerr << "---- Creating test data ----" << std::endl;
+    for (const auto & node : root_nodes)
+    {
+        auto node_name = node->name.getString();
+        node->name.setString(node_name);
+
+        std::string root_path = std::filesystem::path("/") / node_name;
+        std::cerr << "Cleaning up " << root_path << std::endl;
+        removeRecursive(zookeeper, root_path);
+
+        node->createNode(zookeeper, "/", default_acls);
+    }
+    std::cerr << "---- Created test data ----\n" << std::endl;
+}
+
+void BenchmarkContext::cleanup(Coordination::ZooKeeper & zookeeper)
+{
+    if (root_nodes.empty())
+        return;
+
+    std::cerr << "---- Cleaning up test data ----" << std::endl;
+    for (const auto & node : root_nodes)
+    {
+        auto node_name = node->name.getString();
+        std::string root_path = std::filesystem::path("/") / node_name;
+        std::cerr << "Cleaning up " << root_path << std::endl;
+        removeRecursive(zookeeper, root_path);
+    }
 }
diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h
index 4f4a75e6ecf..0c646eb2166 100644
--- a/utils/keeper-bench/Runner.h
+++ b/utils/keeper-bench/Runner.h
@@ -1,5 +1,5 @@
 #pragma once
-#include "Common/ZooKeeper/ZooKeeperConstants.h"
+#include "Common/ZooKeeper/ZooKeeperArgs.h"
 #include <Common/ZooKeeper/ZooKeeperImpl.h>
 #include "Generator.h"
 #include <Common/ZooKeeper/IKeeper.h>
@@ -12,6 +12,7 @@
 
 #include <Core/Types.h>
 #include <Poco/Util/AbstractConfiguration.h>
+#include "Interpreters/Context.h"
 #include "Stats.h"
 
 #include <filesystem>
@@ -19,12 +20,40 @@
 using Ports = std::vector<UInt16>;
 using Strings = std::vector<std::string>;
 
+struct BenchmarkContext
+{
+public:
+    void initializeFromConfig(const Poco::Util::AbstractConfiguration & config);
+
+    void startup(Coordination::ZooKeeper & zookeeper);
+    void cleanup(Coordination::ZooKeeper & zookeeper);
+private:
+    struct Node
+    {
+        StringGetter name;
+        std::optional<StringGetter> data;
+        std::vector<std::shared_ptr<Node>> children;
+        size_t repeat_count = 0;
+
+        std::shared_ptr<Node> clone() const;
+
+        void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const;
+        void dumpTree(int level = 0) const;
+    };
+
+    static std::shared_ptr<Node> parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config);
+
+    std::vector<std::shared_ptr<Node>> root_nodes;
+    Coordination::ACLs default_acls;
+};
+
 class Runner
 {
 public:
     Runner(
         std::optional<size_t> concurrency_,
         const std::string & config_path,
+        const std::string & input_request_log_,
         const Strings & hosts_strings_,
         std::optional<double> max_time_,
         std::optional<double> delay_,
@@ -44,8 +73,30 @@ public:
 
     ~Runner();
 private:
+    struct ConnectionInfo
+    {
+        std::string host;
+
+        bool secure = false;
+        int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
+        int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
+        int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
+        bool use_compression = false;
+
+        size_t sessions = 1;
+    };
+
     void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config);
 
+    void runBenchmarkWithGenerator();
+    void runBenchmarkFromLog();
+
+    void createConnections();
+    std::vector<std::shared_ptr<Coordination::ZooKeeper>> refreshConnections();
+    std::shared_ptr<Coordination::ZooKeeper> getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx);
+
+    std::string input_request_log;
+
     size_t concurrency = 1;
 
     std::optional<ThreadPool> pool;
@@ -54,7 +105,8 @@ private:
     double max_time = 0;
     double delay = 1;
     bool continue_on_error = false;
-    std::atomic<size_t> max_iterations = 0;
+    size_t max_iterations = 0;
+
     std::atomic<size_t> requests_executed = 0;
     std::atomic<bool> shutdown = false;
 
@@ -71,25 +123,14 @@ private:
     using Queue = ConcurrentBoundedQueue<Coordination::ZooKeeperRequestPtr>;
     std::optional<Queue> queue;
 
-    struct ConnectionInfo
-    {
-        std::string host;
-
-        bool secure = false;
-        int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
-        int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS;
-        int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
-        bool use_compression = false;
-
-        size_t sessions = 1;
-    };
-
     std::mutex connection_mutex;
+    ConnectionInfo default_connection_info;
     std::vector<ConnectionInfo> connection_infos;
     std::vector<std::shared_ptr<Coordination::ZooKeeper>> connections;
     std::unordered_map<size_t, size_t> connections_to_info_map;
 
-    void createConnections();
-    std::shared_ptr<Coordination::ZooKeeper> getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx);
-    std::vector<std::shared_ptr<Coordination::ZooKeeper>> refreshConnections();
+    DB::SharedContextHolder shared_context;
+    DB::ContextMutablePtr global_context;
+
+    BenchmarkContext benchmark_context;
 };
diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp
index 0753d66850f..45fc28f3bca 100644
--- a/utils/keeper-bench/main.cpp
+++ b/utils/keeper-bench/main.cpp
@@ -1,8 +1,6 @@
 #include <iostream>
 #include <boost/program_options.hpp>
 #include "Runner.h"
-#include "Stats.h"
-#include "Generator.h"
 #include "Common/Exception.h"
 #include <Common/TerminalSize.h>
 #include <Core/Types.h>
@@ -27,6 +25,10 @@ int main(int argc, char *argv[])
 
     bool print_stacktrace = true;
 
+    //Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel(std::cerr));
+    //Poco::Logger::root().setChannel(channel);
+    //Poco::Logger::root().setLevel("trace");
+
     try
     {
         using boost::program_options::value;
@@ -34,12 +36,13 @@ int main(int argc, char *argv[])
         boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
         desc.add_options()
             ("help",                                                                         "produce help message")
-            ("config",         value<std::string>()->default_value(""),                      "yaml/xml file containing configuration")
-            ("concurrency,c",  value<unsigned>(),                                            "number of parallel queries")
-            ("report-delay,d", value<double>(),                                              "delay between intermediate reports in seconds (set 0 to disable reports)")
-            ("iterations,i",   value<size_t>(),                                              "amount of queries to be executed")
-            ("time-limit,t",   value<double>(),                                              "stop launch of queries after specified time limit")
-            ("hosts,h",        value<Strings>()->multitoken()->default_value(Strings{}, ""), "")
+            ("config",            value<std::string>()->default_value(""),                      "yaml/xml file containing configuration")
+            ("input-request-log", value<std::string>()->default_value(""),                      "log of requests that will be replayed")
+            ("concurrency,c",     value<unsigned>(),                                            "number of parallel queries")
+            ("report-delay,d",    value<double>(),                                              "delay between intermediate reports in seconds (set 0 to disable reports)")
+            ("iterations,i",      value<size_t>(),                                              "amount of queries to be executed")
+            ("time-limit,t",      value<double>(),                                              "stop launch of queries after specified time limit")
+            ("hosts,h",           value<Strings>()->multitoken()->default_value(Strings{}, ""), "")
             ("continue_on_errors", "continue testing even if a query fails")
         ;
 
@@ -56,6 +59,7 @@ int main(int argc, char *argv[])
 
         Runner runner(valueToOptional<unsigned>(options["concurrency"]),
                       options["config"].as<std::string>(),
+                      options["input-request-log"].as<std::string>(),
                       options["hosts"].as<Strings>(),
                       valueToOptional<double>(options["time-limit"]),
                       valueToOptional<double>(options["report-delay"]),
@@ -66,9 +70,9 @@ int main(int argc, char *argv[])
         {
             runner.runBenchmark();
         }
-        catch (const DB::Exception & e)
+        catch (...)
         {
-            std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl;
+            std::cout << "Got exception while trying to run benchmark: " << DB::getCurrentExceptionMessage(true) << std::endl;
         }
 
         return 0;

From 652796acd6a10515e862260d18e002bae27f3c85 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 15 Apr 2024 16:37:38 +0100
Subject: [PATCH 067/392] Fix MergeTree with HDFS

---
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 38 +++++++++++++++----
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   | 16 ++++++--
 2 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 8bfba6fcfad..82c9a6c6c21 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -34,15 +34,21 @@ void HDFSObjectStorage::startup()
 ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
     /// what ever data_source_description.description value is, consider that key as relative key
-    return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32));
+    chassert(data_directory.starts_with("/"));
+    return ObjectStorageKey::createAsRelative(
+        fs::path(url_without_path) / data_directory.substr(1), getRandomASCIIString(32));
 }
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
 {
+    std::string path = object.remote_path;
+    if (path.starts_with(url_without_path))
+        path = path.substr(url_without_path.size());
+
     // const auto & path = object.remote_path;
     // const size_t begin_of_path = path.find('/', path.find("//") + 2);
     // const String remote_fs_object_path = path.substr(begin_of_path);
-    return (0 == hdfsExists(hdfs_fs.get(), object.remote_path.c_str()));
+    return (0 == hdfsExists(hdfs_fs.get(), path.c_str()));
 }
 
 std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLINT
@@ -51,7 +57,14 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    return std::make_unique<ReadBufferFromHDFS>(hdfs_root_path, object.remote_path, config, patchSettings(read_settings));
+    std::string path = object.remote_path;
+    if (path.starts_with(url))
+        path = path.substr(url.size());
+    if (path.starts_with("/"))
+        path.substr(1);
+
+    return std::make_unique<ReadBufferFromHDFS>(
+        fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings));
 }
 
 std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLINT
@@ -69,8 +82,13 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
         // auto hdfs_path = path.substr(begin_of_path);
         // auto hdfs_uri = path.substr(0, begin_of_path);
 
+        std::string path = object_.remote_path;
+        if (path.starts_with(url))
+            path = path.substr(url.size());
+        if (path.starts_with("/"))
+            path.substr(1);
         return std::make_unique<ReadBufferFromHDFS>(
-            hdfs_root_path, object_.remote_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
+            fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
     };
 
     return std::make_unique<ReadBufferFromRemoteFSGather>(
@@ -89,8 +107,11 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
             ErrorCodes::UNSUPPORTED_METHOD,
             "HDFS API doesn't support custom attributes/metadata for stored objects");
 
-    auto path = object.remote_path.starts_with('/') ? object.remote_path.substr(1) : object.remote_path;
-    path = fs::path(hdfs_root_path) / path;
+    std::string path = object.remote_path;
+    if (path.starts_with("/"))
+        path = path.substr(1);
+    if (!path.starts_with(url))
+        path = fs::path(url) / path;
 
     /// Single O_WRONLY in libhdfs adds O_TRUNC
     return std::make_unique<WriteBufferFromHDFS>(
@@ -102,8 +123,9 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
 /// Remove file. Throws exception if file doesn't exists or it's a directory.
 void HDFSObjectStorage::removeObject(const StoredObject & object)
 {
-    const auto & path = object.remote_path;
-    // const size_t begin_of_path = path.find('/', path.find("//") + 2);
+    auto path = object.remote_path;
+    if (path.starts_with(url_without_path))
+        path = path.substr(url_without_path.size());
 
     /// Add path from root to file name
     int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0);
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index 24642ec635a..8987fa5eaf1 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -40,15 +40,21 @@ public:
         , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config))
         , hdfs_fs(createHDFSFS(hdfs_builder.get()))
         , settings(std::move(settings_))
-        , hdfs_root_path(hdfs_root_path_)
     {
+        const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2);
+        url = hdfs_root_path_;
+        url_without_path = url.substr(0, begin_of_path);
+        if (begin_of_path < url.size())
+            data_directory = url.substr(begin_of_path);
+        else
+            data_directory = "/";
     }
 
     std::string getName() const override { return "HDFSObjectStorage"; }
 
-    std::string getCommonKeyPrefix() const override { return hdfs_root_path; }
+    std::string getCommonKeyPrefix() const override { return url; }
 
-    std::string getDescription() const override { return hdfs_root_path; }
+    std::string getDescription() const override { return url; }
 
     ObjectStorageType getType() const override { return ObjectStorageType::HDFS; }
 
@@ -116,7 +122,9 @@ private:
     HDFSBuilderWrapper hdfs_builder;
     HDFSFSPtr hdfs_fs;
     SettingsPtr settings;
-    const std::string hdfs_root_path;
+    std::string url;
+    std::string url_without_path;
+    std::string data_directory;
 };
 
 }

From ccee2d668793370c3f947a4be24d1edbabba1724 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 15 Apr 2024 23:28:14 +0100
Subject: [PATCH 068/392] Fix parsing

---
 src/Storages/ObjectStorage/HDFS/Configuration.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 220857fead6..e12c2f15b28 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -63,9 +63,6 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const
 
 void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
 {
-    std::string url_str;
-    url_str = checkAndGetLiteralArgument<String>(args[0], "url");
-
     const size_t max_args_num = with_structure ? 4 : 3;
     if (!args.size() || args.size() > max_args_num)
     {
@@ -73,6 +70,9 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit
                         "Expected not more than {} arguments", max_args_num);
     }
 
+    std::string url_str;
+    url_str = checkAndGetLiteralArgument<String>(args[0], "url");
+
     if (args.size() > 1)
     {
         args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context);

From 11be538ac870d231a13a2648038ea1b469f73a08 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 16 Apr 2024 10:20:56 +0100
Subject: [PATCH 069/392] Fix several tests

---
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  8 +++++--
 src/Disks/ObjectStorages/S3/diskSettings.h    |  3 ++-
 .../ObjectStorage/AzureBlob/Configuration.cpp |  7 +++---
 .../ObjectStorage/HDFS/Configuration.cpp      |  2 +-
 .../ObjectStorage/S3/Configuration.cpp        |  2 +-
 .../StorageObjectStorageSink.cpp              |  3 +--
 src/Storages/S3Queue/S3QueueSource.cpp        | 14 ++++++++++++
 src/Storages/S3Queue/S3QueueSource.h          |  1 +
 src/Storages/StorageS3Settings.cpp            | 22 +++++++++++--------
 src/Storages/StorageS3Settings.h              | 10 +++++----
 10 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 9bd4bf699e8..2bca7df7db9 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -36,11 +36,15 @@ extern const int NO_ELEMENTS_IN_CONFIG;
 }
 
 std::unique_ptr<S3ObjectStorageSettings> getSettings(
-    const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
+    const Poco::Util::AbstractConfiguration & config,
+    const String & config_prefix,
+    ContextPtr context,
+    bool validate_settings)
 {
     const Settings & settings = context->getSettingsRef();
-    auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_");
+    auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings);
     auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
+
     return std::make_unique<S3ObjectStorageSettings>(
         request_settings,
         auth_settings,
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.h b/src/Disks/ObjectStorages/S3/diskSettings.h
index 5b655f35508..11ac64ce913 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.h
+++ b/src/Disks/ObjectStorages/S3/diskSettings.h
@@ -17,7 +17,8 @@ struct S3ObjectStorageSettings;
 std::unique_ptr<S3ObjectStorageSettings> getSettings(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
-    ContextPtr context);
+    ContextPtr context,
+    bool validate_settings = true);
 
 std::unique_ptr<S3::Client> getClient(
     const Poco::Util::AbstractConfiguration & config,
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index fe01251e58a..44ace9c3b65 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -282,12 +282,11 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
 
     auto is_format_arg = [] (const std::string & s) -> bool
     {
-        return s == "auto" || FormatFactory::instance().getAllFormats().contains(s);
+        return s == "auto" || FormatFactory::instance().getAllFormats().contains(Poco::toLower(s));
     };
 
     if (engine_args.size() == 4)
     {
-        //'c1 UInt64, c2 UInt64
         auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
         if (is_format_arg(fourth_arg))
         {
@@ -298,7 +297,9 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
             if (with_structure)
                 structure = fourth_arg;
             else
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown format or account name specified without account key");
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Unknown format or account name specified without account key: {}", fourth_arg);
         }
     }
     else if (engine_args.size() == 5)
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index e12c2f15b28..af191070329 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -64,7 +64,7 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const
 void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
 {
     const size_t max_args_num = with_structure ? 4 : 3;
-    if (!args.size() || args.size() > max_args_num)
+    if (args.empty() || args.size() > max_args_num)
     {
         throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                         "Expected not more than {} arguments", max_args_num);
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index f532af24017..46be0a01862 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -77,7 +77,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context,
     const auto & config = context->getConfigRef();
     const std::string config_prefix = "s3.";
 
-    auto s3_settings = getSettings(config, config_prefix, context);
+    auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting
 
     auth_settings.updateFrom(s3_settings->auth_settings);
     s3_settings->auth_settings = auth_settings;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index cf1c583ca62..8381737a4f5 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -96,10 +96,9 @@ void StorageObjectStorageSink::finalize()
 void StorageObjectStorageSink::release()
 {
     writer.reset();
-    write_buf->finalize();
+    write_buf.reset();
 }
 
-
 PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     ObjectStoragePtr object_storage_,
     StorageObjectStorageConfigurationPtr configuration_,
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 8e7155205c4..7c6d952d181 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -197,8 +197,22 @@ String StorageS3QueueSource::getName() const
     return name;
 }
 
+void StorageS3QueueSource::lazyInitialize()
+{
+    if (initialized)
+        return;
+
+    internal_source->lazyInitialize(processing_id);
+    reader = std::move(internal_source->reader);
+    if (reader)
+        reader_future = std::move(internal_source->reader_future);
+    initialized = true;
+}
+
 Chunk StorageS3QueueSource::generate()
 {
+    lazyInitialize();
+
     while (true)
     {
         if (!reader)
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index 8c785e683c2..c1b45108b36 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -117,6 +117,7 @@ private:
 
     void applyActionAfterProcessing(const String & path);
     void appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed);
+    void lazyInitialize();
 };
 
 }
diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp
index 2780249e3fd..b767805f637 100644
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@@ -18,18 +18,20 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
 }
 
-S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings)
+S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings, bool validate_settings)
 {
     updateFromSettings(settings, false);
-    validate();
+    if (validate_settings)
+        validate();
 }
 
 S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
     const Settings & settings,
-    String setting_name_prefix)
-    : PartUploadSettings(settings)
+    String setting_name_prefix,
+    bool validate_settings)
+    : PartUploadSettings(settings, validate_settings)
 {
     String key = config_prefix + "." + setting_name_prefix;
     strict_upload_part_size = config.getUInt64(key + "strict_upload_part_size", strict_upload_part_size);
@@ -46,7 +48,8 @@ S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(
     storage_class_name = config.getString(config_prefix + ".s3_storage_class", storage_class_name);
     storage_class_name = Poco::toUpperInPlace(storage_class_name);
 
-    validate();
+    if (validate_settings)
+        validate();
 }
 
 S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection)
@@ -170,8 +173,8 @@ void S3Settings::RequestSettings::PartUploadSettings::validate()
 }
 
 
-S3Settings::RequestSettings::RequestSettings(const Settings & settings)
-    : upload_settings(settings)
+S3Settings::RequestSettings::RequestSettings(const Settings & settings, bool validate_settings)
+    : upload_settings(settings, validate_settings)
 {
     updateFromSettingsImpl(settings, false);
 }
@@ -190,8 +193,9 @@ S3Settings::RequestSettings::RequestSettings(
     const Poco::Util::AbstractConfiguration & config,
     const String & config_prefix,
     const Settings & settings,
-    String setting_name_prefix)
-    : upload_settings(config, config_prefix, settings, setting_name_prefix)
+    String setting_name_prefix,
+    bool validate_settings)
+    : upload_settings(config, config_prefix, settings, setting_name_prefix, validate_settings)
 {
     String key = config_prefix + "." + setting_name_prefix;
     max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries);
diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h
index e09be8654e7..c3bc8aa6ed6 100644
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@@ -44,13 +44,14 @@ struct S3Settings
 
         private:
             PartUploadSettings() = default;
-            explicit PartUploadSettings(const Settings & settings);
+            explicit PartUploadSettings(const Settings & settings, bool validate_settings = true);
             explicit PartUploadSettings(const NamedCollection & collection);
             PartUploadSettings(
                 const Poco::Util::AbstractConfiguration & config,
                 const String & config_prefix,
                 const Settings & settings,
-                String setting_name_prefix = {});
+                String setting_name_prefix = {},
+                bool validate_settings = true);
 
             friend struct RequestSettings;
         };
@@ -78,7 +79,7 @@ struct S3Settings
         void setStorageClassName(const String & storage_class_name) { upload_settings.storage_class_name = storage_class_name; }
 
         RequestSettings() = default;
-        explicit RequestSettings(const Settings & settings);
+        explicit RequestSettings(const Settings & settings, bool validate_settings = true);
         explicit RequestSettings(const NamedCollection & collection);
 
         /// What's the setting_name_prefix, and why do we need it?
@@ -92,7 +93,8 @@ struct S3Settings
             const Poco::Util::AbstractConfiguration & config,
             const String & config_prefix,
             const Settings & settings,
-            String setting_name_prefix = {});
+            String setting_name_prefix = {},
+            bool validate_settings = true);
 
         void updateFromSettingsIfChanged(const Settings & settings);
 

From 4e1005bc43fabce6baf28f5f91b8a6db0315cc7d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 17 Apr 2024 14:13:21 +0100
Subject: [PATCH 070/392] Fix s3 throttler

---
 src/Storages/ObjectStorage/S3/Configuration.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 46be0a01862..4c9e49d0705 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -79,7 +79,9 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context,
 
     auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting
 
+    request_settings.updateFromSettingsIfChanged(context->getSettingsRef());
     auth_settings.updateFrom(s3_settings->auth_settings);
+
     s3_settings->auth_settings = auth_settings;
     s3_settings->request_settings = request_settings;
 

From 6bb3ad3133e3c7c767048bb32d85276bed726247 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 17 Apr 2024 13:15:07 +0000
Subject: [PATCH 071/392] Save the stacktrace of thread waiting on failing
 AsyncLoader job into exception

---
 src/Common/AsyncLoader.cpp | 3 ++-
 src/Common/ErrorCodes.cpp  | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp
index 3bec30893b9..29ea59b82ed 100644
--- a/src/Common/AsyncLoader.cpp
+++ b/src/Common/AsyncLoader.cpp
@@ -30,6 +30,7 @@ namespace ErrorCodes
     extern const int ASYNC_LOAD_CYCLE;
     extern const int ASYNC_LOAD_FAILED;
     extern const int ASYNC_LOAD_CANCELED;
+    extern const int ASYNC_LOAD_WAIT_FAILED;
     extern const int LOGICAL_ERROR;
 }
 
@@ -433,7 +434,7 @@ void AsyncLoader::wait(const LoadJobPtr & job, bool no_throw)
     std::unique_lock job_lock{job->mutex};
     wait(job_lock, job);
     if (!no_throw && job->load_exception)
-        std::rethrow_exception(job->load_exception);
+        throw Exception(ErrorCodes::ASYNC_LOAD_WAIT_FAILED, "Waited job failed: {}", getExceptionMessage(job->load_exception, /* with_stacktrace = */ false));
 }
 
 void AsyncLoader::remove(const LoadJobSet & jobs)
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 97a339b2bac..9fad2f1ff02 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -598,6 +598,7 @@
     M(717, EXPERIMENTAL_FEATURE_ERROR) \
     M(718, TOO_SLOW_PARSING) \
     M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
+    M(720, ASYNC_LOAD_WAIT_FAILED) \
     \
     M(900, DISTRIBUTED_CACHE_ERROR) \
     M(901, CANNOT_USE_DISTRIBUTED_CACHE) \

From 51c8dd133888964b50c2fa3db5cf6069ccca0252 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 17 Apr 2024 16:17:57 +0100
Subject: [PATCH 072/392] Fix delta lake tests

---
 .../DataLakes/IStorageDataLake.h              | 24 +++++++++++++++----
 src/TableFunctions/ITableFunctionDataLake.h   |  6 +++--
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 0e83bb70a2f..21ebc32c8ae 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -57,8 +57,8 @@ public:
         }
 
         return std::make_shared<IStorageDataLake<DataLakeMetadata, StorageSettings>>(
-            base_configuration, std::move(metadata), configuration, object_storage, engine_name_, context,
-            table_id_,
+            base_configuration, std::move(metadata), configuration, object_storage,
+            engine_name_, context, table_id_,
             columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
             constraints_, comment_, format_settings_);
     }
@@ -68,11 +68,23 @@ public:
     static ColumnsDescription getTableStructureFromData(
         ObjectStoragePtr object_storage_,
         ConfigurationPtr base_configuration,
-        const std::optional<FormatSettings> &,
+        const std::optional<FormatSettings> & format_settings_,
         ContextPtr local_context)
     {
         auto metadata = DataLakeMetadata::create(object_storage_, base_configuration, local_context);
-        return ColumnsDescription(metadata->getTableSchema());
+
+        auto schema_from_metadata = metadata->getTableSchema();
+        if (schema_from_metadata != NamesAndTypesList{})
+        {
+            return ColumnsDescription(std::move(schema_from_metadata));
+        }
+        else
+        {
+            ConfigurationPtr configuration = base_configuration->clone();
+            configuration->getPaths() = metadata->getDataFiles();
+            return Storage::getTableStructureFromData(
+                object_storage_, configuration, format_settings_, local_context);
+        }
     }
 
     void updateConfiguration(ContextPtr local_context) override
@@ -102,6 +114,10 @@ public:
         , base_configuration(base_configuration_)
         , current_metadata(std::move(metadata_))
     {
+        if (base_configuration->format == "auto")
+        {
+            base_configuration->format = Storage::configuration->format;
+        }
     }
 
 private:
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index c86970307c0..8cbd855bb96 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -57,8 +57,10 @@ protected:
             auto object_storage = TableFunction::getObjectStorage(context, !is_insert_query);
             return Storage::getTableStructureFromData(object_storage, configuration, std::nullopt, context);
         }
-
-        return parseColumnsListFromString(configuration->structure, context);
+        else
+        {
+            return parseColumnsListFromString(configuration->structure, context);
+        }
     }
 
     void parseArguments(const ASTPtr & ast_function, ContextPtr context) override

From c8915a16a51719e6ba569806b377f01859971e87 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 18 Apr 2024 17:22:51 +0100
Subject: [PATCH 073/392] Fix a few mote tests

---
 src/Backups/BackupIO_AzureBlobStorage.cpp          |  3 ++-
 .../registerBackupEngineAzureBlobStorage.cpp       |  6 ++++--
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp    |  5 ++++-
 src/Disks/ObjectStorages/S3/diskSettings.cpp       |  5 ++---
 src/Storages/ObjectStorage/DataLakes/Common.cpp    |  2 +-
 .../ObjectStorage/DataLakes/DeltaLakeMetadata.cpp  | 12 ++++++------
 .../ObjectStorage/DataLakes/DeltaLakeMetadata.h    |  6 ++++--
 .../ObjectStorage/DataLakes/HudiMetadata.h         |  4 +++-
 .../ObjectStorage/DataLakes/IStorageDataLake.h     | 14 +++++++++++---
 9 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 4dd54712e5e..673930b5976 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -193,7 +193,8 @@ void BackupWriterAzureBlobStorage::copyDataToFile(
 {
     copyDataToAzureBlobStorageFile(
         create_read_buffer, start_pos, length, client, configuration.container,
-        path_in_backup, settings, threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
+        fs::path(configuration.blob_path) / path_in_backup, settings,
+        threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
 }
 
 BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 700c8cb222f..049a4b1a338 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -117,8 +117,10 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
                 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
 
             auto path = configuration.getPath();
-            configuration.setPath(removeFileNameFromURL(path));
-            archive_params.archive_name = configuration.getPath();
+            auto filename = removeFileNameFromURL(path);
+            configuration.setPath(path);
+
+            archive_params.archive_name = filename;
             archive_params.compression_method = params.compression_method;
             archive_params.compression_level = params.compression_level;
             archive_params.password = params.password;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index f97d6f937ef..a2522212f90 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -60,7 +60,10 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
     if (!response.IsSuccess())
     {
         const auto & err = response.GetError();
-        throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType())), err.GetErrorType());
+        throw S3Exception(
+            fmt::format("{} (Code: {}, s3 exception: {})",
+                        err.GetMessage(), static_cast<size_t>(err.GetErrorType()), err.GetExceptionName()),
+            err.GetErrorType());
     }
 }
 
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 2bca7df7db9..66731e85d41 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -72,7 +72,6 @@ std::unique_ptr<S3::Client> getClient(
     if (for_disk_s3)
     {
         String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
-
         url = S3::URI(endpoint);
         if (!url.key.ends_with('/'))
             url.key.push_back('/');
@@ -103,8 +102,8 @@ std::unique_ptr<S3::Client> getClient(
 
     client_configuration.endpointOverride = url.endpoint;
     client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
-    client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS);
-    client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS);
+    client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value);
+    client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value);
     client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
     client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT);
     client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS);
diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp
index 5f0138078d4..0c9237127b9 100644
--- a/src/Storages/ObjectStorage/DataLakes/Common.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp
@@ -21,7 +21,7 @@ std::vector<String> listFiles(
         if (filename.ends_with(suffix))
             res.push_back(filename);
     }
-    LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files", res.size());
+    LOG_TRACE(getLogger("DataLakeCommon"), "Listed {} files ({})", res.size(), fmt::join(res, ", "));
     return res;
 }
 
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 123c63439b0..d0f203b32bd 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -27,10 +27,11 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-struct DeltaLakeMetadata::Impl final : private WithContext
+struct DeltaLakeMetadata::Impl
 {
     ObjectStoragePtr object_storage;
     ConfigurationPtr configuration;
+    ContextPtr context;
 
     /**
      * Useful links:
@@ -39,9 +40,9 @@ struct DeltaLakeMetadata::Impl final : private WithContext
      Impl(ObjectStoragePtr object_storage_,
           ConfigurationPtr configuration_,
           ContextPtr context_)
-        : WithContext(context_)
-        , object_storage(object_storage_)
+        : object_storage(object_storage_)
         , configuration(configuration_)
+        , context(context_)
     {
     }
 
@@ -137,7 +138,7 @@ struct DeltaLakeMetadata::Impl final : private WithContext
      */
     void processMetadataFile(const String & key, std::set<String> & result)
     {
-        auto read_settings = getContext()->getReadSettings();
+        auto read_settings = context->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(key), read_settings);
 
         char c;
@@ -190,7 +191,7 @@ struct DeltaLakeMetadata::Impl final : private WithContext
             return 0;
 
         String json_str;
-        auto read_settings = getContext()->getReadSettings();
+        auto read_settings = context->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings);
         readJSONObjectPossiblyInvalid(json_str, *buf);
 
@@ -252,7 +253,6 @@ struct DeltaLakeMetadata::Impl final : private WithContext
 
         LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
 
-        auto context = getContext();
         auto read_settings = context->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings);
         auto format_settings = getFormatSettings(context);
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
index 1a5bb85586a..5050b88d809 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
@@ -9,7 +9,7 @@
 namespace DB
 {
 
-class DeltaLakeMetadata final : public IDataLakeMetadata, private WithContext
+class DeltaLakeMetadata final : public IDataLakeMetadata
 {
 public:
     using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
@@ -28,7 +28,9 @@ public:
     bool operator ==(const IDataLakeMetadata & other) const override
     {
         const auto * deltalake_metadata = dynamic_cast<const DeltaLakeMetadata *>(&other);
-        return deltalake_metadata && getDataFiles() == deltalake_metadata->getDataFiles();
+        return deltalake_metadata
+            && !data_files.empty() && !deltalake_metadata->data_files.empty()
+            && data_files == deltalake_metadata->data_files;
     }
 
     static DataLakeMetadataPtr create(
diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
index ee8b1ea4978..6054c3f15d6 100644
--- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
@@ -29,7 +29,9 @@ public:
     bool operator ==(const IDataLakeMetadata & other) const override
     {
         const auto * hudi_metadata = dynamic_cast<const HudiMetadata *>(&other);
-        return hudi_metadata && getDataFiles() == hudi_metadata->getDataFiles();
+        return hudi_metadata
+            && !data_files.empty() && !hudi_metadata->data_files.empty()
+            && data_files == hudi_metadata->data_files;
     }
 
     static DataLakeMetadataPtr create(
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 21ebc32c8ae..64228e880f8 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -42,17 +42,25 @@ public:
         auto object_storage = base_configuration->createObjectStorage(context);
         DataLakeMetadataPtr metadata;
         NamesAndTypesList schema_from_metadata;
+
+        if (base_configuration->format == "auto")
+            base_configuration->format = "Parquet";
+
         ConfigurationPtr configuration = base_configuration->clone();
+
         try
         {
             metadata = DataLakeMetadata::create(object_storage, base_configuration, context);
             schema_from_metadata = metadata->getTableSchema();
-            configuration->getPaths() = metadata->getDataFiles();
+            configuration->setPaths(metadata->getDataFiles());
         }
         catch (...)
         {
             if (mode <= LoadingStrictnessLevel::CREATE)
                 throw;
+
+            metadata.reset();
+            configuration->setPaths({});
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
@@ -100,8 +108,8 @@ public:
 
         current_metadata = std::move(new_metadata);
         auto updated_configuration = base_configuration->clone();
-        /// If metadata wasn't changed, we won't list data files again.
-        updated_configuration->getPaths() = current_metadata->getDataFiles();
+        updated_configuration->setPaths(current_metadata->getDataFiles());
+
         Storage::configuration = updated_configuration;
     }
 

From e2e6071063b4ce09530746c9ef49d12a36ccec37 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 19 Apr 2024 13:43:43 +0100
Subject: [PATCH 074/392] Fix a few more tests

---
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp | 21 ++++
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |  8 +-
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  3 +-
 .../ObjectStorage/AzureBlob/Configuration.cpp | 97 +++++++++++++++----
 .../ObjectStorage/HDFS/Configuration.cpp      | 10 +-
 .../ObjectStorage/StorageObjectStorage.cpp    | 50 +++-------
 .../StorageObjectStorageQuerySettings.h       |  2 +-
 .../StorageObjectStorageSink.cpp              |  9 ++
 .../ObjectStorage/StorageObjectStorageSink.h  |  3 +
 src/Storages/ObjectStorage/Utils.cpp          | 43 ++++++++
 src/Storages/ObjectStorage/Utils.h            | 17 ++++
 tests/integration/test_storage_hdfs/test.py   |  8 +-
 .../test_storage_kerberized_hdfs/test.py      |  2 +-
 13 files changed, 204 insertions(+), 69 deletions(-)
 create mode 100644 src/Storages/ObjectStorage/Utils.cpp
 create mode 100644 src/Storages/ObjectStorage/Utils.h

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 82c9a6c6c21..fc7d49324c7 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -31,8 +31,18 @@ void HDFSObjectStorage::startup()
 {
 }
 
+void HDFSObjectStorage::initializeHDFS() const
+{
+    if (hdfs_fs)
+        return;
+
+    hdfs_builder = createHDFSBuilder(url, config);
+    hdfs_fs = createHDFSFS(hdfs_builder.get());
+}
+
 ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
+    initializeHDFS();
     /// what ever data_source_description.description value is, consider that key as relative key
     chassert(data_directory.starts_with("/"));
     return ObjectStorageKey::createAsRelative(
@@ -41,6 +51,7 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string &
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
 {
+    initializeHDFS();
     std::string path = object.remote_path;
     if (path.starts_with(url_without_path))
         path = path.substr(url_without_path.size());
@@ -57,6 +68,7 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
     std::optional<size_t>,
     std::optional<size_t>) const
 {
+    initializeHDFS();
     std::string path = object.remote_path;
     if (path.starts_with(url))
         path = path.substr(url.size());
@@ -73,6 +85,7 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
     std::optional<size_t>,
     std::optional<size_t>) const
 {
+    initializeHDFS();
     auto disk_read_settings = patchSettings(read_settings);
     auto read_buffer_creator =
         [this, disk_read_settings]
@@ -102,6 +115,7 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
     size_t buf_size,
     const WriteSettings & write_settings)
 {
+    initializeHDFS();
     if (attributes.has_value())
         throw Exception(
             ErrorCodes::UNSUPPORTED_METHOD,
@@ -123,6 +137,7 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
 /// Remove file. Throws exception if file doesn't exists or it's a directory.
 void HDFSObjectStorage::removeObject(const StoredObject & object)
 {
+    initializeHDFS();
     auto path = object.remote_path;
     if (path.starts_with(url_without_path))
         path = path.substr(url_without_path.size());
@@ -136,24 +151,28 @@ void HDFSObjectStorage::removeObject(const StoredObject & object)
 
 void HDFSObjectStorage::removeObjects(const StoredObjects & objects)
 {
+    initializeHDFS();
     for (const auto & object : objects)
         removeObject(object);
 }
 
 void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object)
 {
+    initializeHDFS();
     if (exists(object))
         removeObject(object);
 }
 
 void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 {
+    initializeHDFS();
     for (const auto & object : objects)
         removeObjectIfExists(object);
 }
 
 ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
 {
+    initializeHDFS();
     auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
     if (!file_info)
         throw Exception(ErrorCodes::HDFS_ERROR,
@@ -169,6 +188,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
 
 void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
+    initializeHDFS();
     auto * log = &Poco::Logger::get("HDFSObjectStorage");
     LOG_TRACE(log, "Trying to list files for {}", path);
 
@@ -222,6 +242,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT
     const WriteSettings & write_settings,
     std::optional<ObjectAttributes> object_to_attributes)
 {
+    initializeHDFS();
     if (object_to_attributes.has_value())
         throw Exception(
             ErrorCodes::UNSUPPORTED_METHOD,
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index 8987fa5eaf1..f57b7e1fda8 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -37,8 +37,6 @@ public:
         SettingsPtr settings_,
         const Poco::Util::AbstractConfiguration & config_)
         : config(config_)
-        , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config))
-        , hdfs_fs(createHDFSFS(hdfs_builder.get()))
         , settings(std::move(settings_))
     {
         const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2);
@@ -117,10 +115,12 @@ public:
     bool isRemote() const override { return true; }
 
 private:
+    void initializeHDFS() const;
+
     const Poco::Util::AbstractConfiguration & config;
 
-    HDFSBuilderWrapper hdfs_builder;
-    HDFSFSPtr hdfs_fs;
+    mutable HDFSBuilderWrapper hdfs_builder;
+    mutable HDFSFSPtr hdfs_fs;
     SettingsPtr settings;
     std::string url;
     std::string url_without_path;
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 66731e85d41..49300a9cd89 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -157,7 +157,8 @@ std::unique_ptr<S3::Client> getClient(
         auth_settings.server_side_encryption_customer_key_base64,
         std::move(sse_kms_config),
         auth_settings.headers,
-        credentials_configuration);
+        credentials_configuration,
+        auth_settings.session_token);
 }
 
 }
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 44ace9c3b65..4b826a0c721 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -381,7 +381,7 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
 }
 
 void StorageAzureBlobConfiguration::addStructureAndFormatToArgs(
-    ASTs & args, const String & structure_, const String & /* format */, ContextPtr context)
+    ASTs & args, const String & structure_, const String & format_, ContextPtr context)
 {
     if (tryGetNamedCollectionWithOverrides(args, context))
     {
@@ -397,66 +397,129 @@ void StorageAzureBlobConfiguration::addStructureAndFormatToArgs(
         {
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                             "Storage Azure requires 3 to 7 arguments: "
-                            "StorageObjectStorage(connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])");
+                            "StorageObjectStorage(connection_string|storage_account_url, container_name, "
+                            "blobpath, [account_name, account_key, format, compression, structure])");
         }
 
+        for (auto & arg : args)
+            arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
+
         auto structure_literal = std::make_shared<ASTLiteral>(structure_);
+        auto format_literal = std::make_shared<ASTLiteral>(format_);
         auto is_format_arg
             = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().getAllFormats().contains(s); };
 
+        /// (connection_string, container_name, blobpath)
         if (args.size() == 3)
         {
-            /// Add format=auto & compression=auto before structure argument.
-            args.push_back(std::make_shared<ASTLiteral>("auto"));
+            args.push_back(format_literal);
+            /// Add compression = "auto" before structure argument.
             args.push_back(std::make_shared<ASTLiteral>("auto"));
             args.push_back(structure_literal);
         }
+        /// (connection_string, container_name, blobpath, structure) or
+        /// (connection_string, container_name, blobpath, format)
+        /// We can distinguish them by looking at the 4-th argument: check if it's format name or not.
         else if (args.size() == 4)
         {
             auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name/structure");
+            /// (..., format) -> (..., format, compression, structure)
             if (is_format_arg(fourth_arg))
             {
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
                 /// Add compression=auto before structure argument.
                 args.push_back(std::make_shared<ASTLiteral>("auto"));
                 args.push_back(structure_literal);
             }
+            /// (..., structure) -> (..., format, compression, structure)
             else
             {
-                args.back() = structure_literal;
+                auto structure_arg = args.back();
+                args[3] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (fourth_arg == "auto")
+                    args.push_back(structure_literal);
+                else
+                    args.push_back(structure_arg);
             }
         }
+        /// (connection_string, container_name, blobpath, format, compression) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key)
+        /// We can distinguish them by looking at the 4-th argument: check if it's format name or not.
         else if (args.size() == 5)
         {
             auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
+            /// (..., format, compression) -> (..., format, compression, structure)
+            if (is_format_arg(fourth_arg))
             {
-                /// Add format=auto & compression=auto before structure argument.
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
-                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
+                args.push_back(structure_literal);
             }
-            args.push_back(structure_literal);
-        }
-        else if (args.size() == 6)
-        {
-            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
-            if (!is_format_arg(fourth_arg))
+            /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure)
+            else
             {
+                args.push_back(format_literal);
                 /// Add compression=auto before structure argument.
                 args.push_back(std::make_shared<ASTLiteral>("auto"));
                 args.push_back(structure_literal);
             }
+        }
+        /// (connection_string, container_name, blobpath, format, compression, structure) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, structure) or
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format)
+        else if (args.size() == 6)
+        {
+            auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/account_name");
+            auto sixth_arg = checkAndGetLiteralArgument<String>(args[5], "format/structure");
+
+            /// (..., format, compression, structure)
+            if (is_format_arg(fourth_arg))
+            {
+                if (fourth_arg == "auto")
+                    args[3] = format_literal;
+                if (checkAndGetLiteralArgument<String>(args[5], "structure") == "auto")
+                    args[5] = structure_literal;
+            }
+            /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure)
+            else if (is_format_arg(sixth_arg))
+            {
+                if (sixth_arg == "auto")
+                    args[5] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                args.push_back(structure_literal);
+            }
+            /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure)
             else
             {
-                args.back() = structure_literal;
+                auto structure_arg = args.back();
+                args[5] = format_literal;
+                /// Add compression=auto before structure argument.
+                args.push_back(std::make_shared<ASTLiteral>("auto"));
+                if (sixth_arg == "auto")
+                    args.push_back(structure_literal);
+                else
+                    args.push_back(structure_arg);
             }
         }
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression)
         else if (args.size() == 7)
         {
+            /// (..., format, compression) -> (..., format, compression, structure)
+            if (checkAndGetLiteralArgument<String>(args[5], "format") == "auto")
+                args[5] = format_literal;
             args.push_back(structure_literal);
         }
+        /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
         else if (args.size() == 8)
         {
-            args.back() = structure_literal;
+            if (checkAndGetLiteralArgument<String>(args[5], "format") == "auto")
+                args[5] = format_literal;
+            if (checkAndGetLiteralArgument<String>(args[7], "structure") == "auto")
+                args[7] = structure_literal;
         }
     }
 }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index af191070329..84f0a7bfe9f 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -73,9 +73,11 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit
     std::string url_str;
     url_str = checkAndGetLiteralArgument<String>(args[0], "url");
 
+    for (auto & arg : args)
+        arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
+
     if (args.size() > 1)
     {
-        args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(args[1], context);
         format = checkAndGetLiteralArgument<String>(args[1], "format_name");
     }
 
@@ -83,18 +85,15 @@ void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool wit
     {
         if (args.size() > 2)
         {
-            args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
             structure = checkAndGetLiteralArgument<String>(args[2], "structure");
         }
         if (args.size() > 3)
         {
-            args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(args[3], context);
             compression_method = checkAndGetLiteralArgument<String>(args[3], "compression_method");
         }
     }
     else if (args.size() > 2)
     {
-        args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(args[2], context);
         compression_method = checkAndGetLiteralArgument<String>(args[2], "compression_method");
     }
 
@@ -165,6 +164,9 @@ void StorageHDFSConfiguration::addStructureAndFormatToArgs(
         auto format_literal = std::make_shared<ASTLiteral>(format_);
         auto structure_literal = std::make_shared<ASTLiteral>(structure_);
 
+        for (auto & arg : args)
+            arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
+
         /// hdfs(url)
         if (count == 1)
         {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 8fc3de4de1b..13f3557d927 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -16,6 +16,7 @@
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 #include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
+#include <Storages/ObjectStorage/Utils.h>
 #include <Storages/Cache/SchemaCache.h>
 
 
@@ -193,6 +194,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 {
     updateConfiguration(local_context);
     const auto sample_block = metadata_snapshot->getSampleBlock();
+    const auto & query_settings = StorageSettings::create(local_context->getSettingsRef());
 
     if (configuration->withWildcard())
     {
@@ -209,7 +211,8 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
         {
             LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath());
             return std::make_shared<PartitionedStorageObjectStorageSink>(
-                object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
+                object_storage, configuration, query_settings,
+                format_settings, sample_block, local_context, partition_by_ast);
         }
     }
 
@@ -220,46 +223,19 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
                         getName(), configuration->getPath());
     }
 
-    const auto storage_settings = StorageSettings::create(local_context->getSettingsRef());
-
-    auto configuration_copy = configuration->clone();
-    if (!storage_settings.truncate_on_insert
-        && object_storage->exists(StoredObject(configuration->getPath())))
+    auto & paths = configuration->getPaths();
+    if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
+            *object_storage, *configuration, query_settings, paths.front(), paths.size()))
     {
-        if (storage_settings.create_new_file_on_insert)
-        {
-            auto & paths = configuration_copy->getPaths();
-            size_t index = paths.size();
-            const auto & first_key = paths[0];
-            auto pos = first_key.find_first_of('.');
-            String new_key;
-
-            do
-            {
-                new_key = first_key.substr(0, pos)
-                    + "."
-                    + std::to_string(index)
-                    + (pos == std::string::npos ? "" : first_key.substr(pos));
-                ++index;
-            }
-            while (object_storage->exists(StoredObject(new_key)));
-
-            paths.push_back(new_key);
-            configuration->getPaths().push_back(new_key);
-        }
-        else
-        {
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "Object in bucket {} with key {} already exists. "
-                "If you want to overwrite it, enable setting [engine_name]_truncate_on_insert, if you "
-                "want to create a new file on each insert, enable setting [engine_name]_create_new_file_on_insert",
-                configuration_copy->getNamespace(), configuration_copy->getPaths().back());
-        }
+        paths.push_back(*new_key);
     }
 
     return std::make_shared<StorageObjectStorageSink>(
-        object_storage, configuration_copy, format_settings, sample_block, local_context);
+        object_storage,
+        configuration->clone(),
+        format_settings,
+        sample_block,
+        local_context);
 }
 
 template <typename StorageSettings>
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
index f0687776aa7..606456011c3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
@@ -84,7 +84,7 @@ struct HDFSStorageSettings
             .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
             .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
             .schema_inference_mode = settings.schema_inference_mode,
-            .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for hdfs
+            .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
             .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
             .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
             .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 8381737a4f5..42371764920 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -2,6 +2,7 @@
 #include <Formats/FormatFactory.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/isValidUTF8.h>
+#include <Storages/ObjectStorage/Utils.h>
 
 namespace DB
 {
@@ -102,6 +103,7 @@ void StorageObjectStorageSink::release()
 PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     ObjectStoragePtr object_storage_,
     StorageObjectStorageConfigurationPtr configuration_,
+    const StorageObjectStorageSettings & query_settings_,
     std::optional<FormatSettings> format_settings_,
     const Block & sample_block_,
     ContextPtr context_,
@@ -109,6 +111,7 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     : PartitionedSink(partition_by, context_, sample_block_)
     , object_storage(object_storage_)
     , configuration(configuration_)
+    , query_settings(query_settings_)
     , format_settings(format_settings_)
     , sample_block(sample_block_)
     , context(context_)
@@ -123,6 +126,12 @@ SinkPtr PartitionedStorageObjectStorageSink::createSinkForPartition(const String
     auto partition_key = replaceWildcards(configuration->getPath(), partition_id);
     validateKey(partition_key);
 
+    if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
+            *object_storage, *configuration, query_settings, partition_key, /* sequence_number */1))
+    {
+        partition_key = *new_key;
+    }
+
     return std::make_shared<StorageObjectStorageSink>(
         object_storage,
         configuration,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index a352e2c66a3..38805332a35 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Storages/PartitionedSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
 
@@ -46,6 +47,7 @@ public:
     PartitionedStorageObjectStorageSink(
         ObjectStoragePtr object_storage_,
         StorageObjectStorageConfigurationPtr configuration_,
+        const StorageObjectStorageSettings & query_settings_,
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context_,
@@ -59,6 +61,7 @@ private:
 
     ObjectStoragePtr object_storage;
     StorageObjectStorageConfigurationPtr configuration;
+    const StorageObjectStorageSettings query_settings;
     const std::optional<FormatSettings> format_settings;
     const Block sample_block;
     const ContextPtr context;
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
new file mode 100644
index 00000000000..6cc3962209f
--- /dev/null
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -0,0 +1,43 @@
+#include <Storages/ObjectStorage/Utils.h>
+#include <Disks/ObjectStorages/IObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
+
+
+namespace DB
+{
+
+std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
+    const IObjectStorage & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorageSettings & query_settings,
+    const String & key,
+    size_t sequence_number)
+{
+    if (query_settings.truncate_on_insert
+        || !object_storage.exists(StoredObject(key)))
+        return std::nullopt;
+
+    if (query_settings.create_new_file_on_insert)
+    {
+        auto pos = key.find_first_of('.');
+        String new_key;
+        do
+        {
+            new_key = key.substr(0, pos) + "." + std::to_string(sequence_number) + (pos == std::string::npos ? "" : key.substr(pos));
+            ++sequence_number;
+        }
+        while (object_storage.exists(StoredObject(new_key)));
+
+        return new_key;
+    }
+
+    throw Exception(
+        ErrorCodes::BAD_ARGUMENTS,
+        "Object in bucket {} with key {} already exists. "
+        "If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
+        "want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
+        configuration.getNamespace(), key);
+}
+
+}
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
new file mode 100644
index 00000000000..9291bb72615
--- /dev/null
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -0,0 +1,17 @@
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class IObjectStorage;
+class StorageObjectStorageConfiguration;
+struct StorageObjectStorageSettings;
+
+std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded(
+    const IObjectStorage & object_storage,
+    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorageSettings & query_settings,
+    const std::string & key,
+    size_t sequence_number);
+
+}
diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index d8dab85ee6a..dc375b9ec36 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -980,7 +980,7 @@ def test_read_subcolumns(started_cluster):
 
     assert (
         res
-        == "2\thdfs://hdfs1:9000/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
+        == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
     )
 
     res = node.query(
@@ -989,7 +989,7 @@ def test_read_subcolumns(started_cluster):
 
     assert (
         res
-        == "2\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
+        == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
     )
 
     res = node.query(
@@ -998,7 +998,7 @@ def test_read_subcolumns(started_cluster):
 
     assert (
         res
-        == "0\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
+        == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
     )
 
     res = node.query(
@@ -1007,7 +1007,7 @@ def test_read_subcolumns(started_cluster):
 
     assert (
         res
-        == "42\thdfs://hdfs1:9000/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
+        == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
     )
 
 
diff --git a/tests/integration/test_storage_kerberized_hdfs/test.py b/tests/integration/test_storage_kerberized_hdfs/test.py
index c72152fa376..ddfc1f6483d 100644
--- a/tests/integration/test_storage_kerberized_hdfs/test.py
+++ b/tests/integration/test_storage_kerberized_hdfs/test.py
@@ -130,7 +130,7 @@ def test_prohibited(started_cluster):
         assert False, "Exception have to be thrown"
     except Exception as ex:
         assert (
-            "Unable to open HDFS file: /storage_user_two_prohibited error: Permission denied: user=specuser, access=WRITE"
+            "Unable to open HDFS file: /storage_user_two_prohibited (hdfs://suser@kerberizedhdfs1:9010/storage_user_two_prohibited) error: Permission denied: user=specuser, access=WRITE"
             in str(ex)
         )
 

From 191937c0c6c5e5a31c6045269026ca1a1e5171c7 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 20 Apr 2024 10:19:55 +0100
Subject: [PATCH 075/392] Fix style check

---
 tests/integration/test_storage_hdfs/test.py | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index dc375b9ec36..820e3db6eb1 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -978,37 +978,25 @@ def test_read_subcolumns(started_cluster):
         f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert (
-        res
-        == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
-    )
+    assert res == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
 
     res = node.query(
         f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert (
-        res
-        == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
-    )
+    assert res == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
 
     res = node.query(
         f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert (
-        res
-        == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
-    )
+    assert res == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
 
     res = node.query(
         f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
     )
 
-    assert (
-        res
-        == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
-    )
+    assert res == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
 
 
 def test_union_schema_inference_mode(started_cluster):

From c7f0cfc4c2df850cf97c81febd61b3411c4e7869 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 20 Apr 2024 11:47:41 +0100
Subject: [PATCH 076/392] Fix style check

---
 src/Storages/ObjectStorage/Utils.cpp | 5 +++++
 src/Storages/ObjectStorage/Utils.h   | 1 +
 2 files changed, 6 insertions(+)

diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index 6cc3962209f..9caab709081 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -7,6 +7,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
     const StorageObjectStorageConfiguration & configuration,
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
index 9291bb72615..afc0f31a33f 100644
--- a/src/Storages/ObjectStorage/Utils.h
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -1,3 +1,4 @@
+#pragma once
 #include <Core/Types.h>
 
 namespace DB

From a4daf2b454c44e1891a61eaddf3a2fd965e5f880 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 20 Apr 2024 14:46:32 +0100
Subject: [PATCH 077/392] Fix hdfs race

---
 src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp | 7 ++++++-
 src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h   | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index fc7d49324c7..ed63795cb05 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -33,11 +33,16 @@ void HDFSObjectStorage::startup()
 
 void HDFSObjectStorage::initializeHDFS() const
 {
-    if (hdfs_fs)
+    if (initialized)
+        return;
+
+    std::lock_guard lock(init_mutex);
+    if (initialized)
         return;
 
     hdfs_builder = createHDFSBuilder(url, config);
     hdfs_fs = createHDFSFS(hdfs_builder.get());
+    initialized = true;
 }
 
 ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index f57b7e1fda8..b626d3dc779 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -121,6 +121,10 @@ private:
 
     mutable HDFSBuilderWrapper hdfs_builder;
     mutable HDFSFSPtr hdfs_fs;
+
+    mutable std::mutex init_mutex;
+    mutable std::atomic_bool initialized{false};
+
     SettingsPtr settings;
     std::string url;
     std::string url_without_path;

From 399414bb40e517b315ab396669875af8e365ece0 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 20 Apr 2024 17:27:54 +0100
Subject: [PATCH 078/392] Better

---
 src/Common/CurrentMetrics.cpp                 |   3 +
 src/Interpreters/InterpreterSystemQuery.cpp   |   9 +-
 .../ObjectStorage/AzureBlob/Configuration.cpp |  15 ++
 .../ObjectStorage/AzureBlob/Configuration.h   |  11 +-
 .../DataLakes/DeltaLakeMetadata.cpp           |   2 +-
 .../DataLakes/IStorageDataLake.h              |  17 +-
 .../DataLakes/registerDataLakeStorages.cpp    |   7 +-
 .../ObjectStorage/HDFS/Configuration.cpp      |  14 ++
 .../ObjectStorage/HDFS/Configuration.h        |  11 +-
 .../ObjectStorage/ReadBufferIterator.cpp      |   4 +-
 .../ObjectStorage/ReadBufferIterator.h        |   4 +-
 ...rage.cpp => ReadFromObjectStorageStep.cpp} |  32 +---
 ...tStorage.h => ReadFromObjectStorageStep.h} |  18 +-
 .../ObjectStorage/S3/Configuration.cpp        |  15 ++
 src/Storages/ObjectStorage/S3/Configuration.h |  11 +-
 .../ObjectStorage/StorageObjectStorage.cpp    | 181 +++++++-----------
 .../ObjectStorage/StorageObjectStorage.h      |  69 +++----
 .../StorageObjectStorageCluster.cpp           |  89 ++++-----
 .../StorageObjectStorageCluster.h             |  28 +--
 .../StorageObjectStorageConfiguration.h       |   7 +
 .../StorageObjectStorageQuerySettings.h       | 102 ----------
 .../StorageObjectStorageSink.cpp              |   3 +-
 .../ObjectStorage/StorageObjectStorageSink.h  |   4 +-
 .../StorageObjectStorageSource.cpp            |  49 ++---
 .../StorageObjectStorageSource.h              |  26 +--
 src/Storages/ObjectStorage/Utils.cpp          |  42 +++-
 src/Storages/ObjectStorage/Utils.h            |  14 +-
 .../registerStorageObjectStorage.cpp          |  20 +-
 src/Storages/S3Queue/S3QueueSource.h          |   3 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |  25 +--
 src/Storages/S3Queue/StorageS3Queue.h         |   3 +-
 .../StorageSystemSchemaInferenceCache.cpp     |   9 +-
 src/TableFunctions/ITableFunctionDataLake.h   |   2 +-
 .../TableFunctionObjectStorage.cpp            |  78 ++++----
 .../TableFunctionObjectStorage.h              |   8 +-
 .../TableFunctionObjectStorageCluster.cpp     |  16 +-
 .../TableFunctionObjectStorageCluster.h       |  12 +-
 37 files changed, 427 insertions(+), 536 deletions(-)
 rename src/Storages/ObjectStorage/{ReadFromStorageObjectStorage.cpp => ReadFromObjectStorageStep.cpp} (62%)
 rename src/Storages/ObjectStorage/{ReadFromStorageObjectStorage.h => ReadFromObjectStorageStep.h} (70%)
 delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h

diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 0f25397a961..983e737991c 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -168,6 +168,9 @@
     M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
     M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
     M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \
+    M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \
+    M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \
+    M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \
     M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
     M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
     M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index 27b2a9460b7..af9dc08e8c7 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -53,6 +53,9 @@
 #include <Storages/StorageFile.h>
 #include <Storages/StorageURL.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 #include <Storages/MaterializedView/RefreshTask.h>
 #include <Storages/System/StorageSystemFilesystemCache.h>
 #include <Parsers/ASTSystemQuery.h>
@@ -489,17 +492,17 @@ BlockIO InterpreterSystemQuery::execute()
                 StorageFile::getSchemaCache(getContext()).clear();
 #if USE_AWS_S3
             if (caches_to_drop.contains("S3"))
-                StorageS3::getSchemaCache(getContext()).clear();
+                StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear();
 #endif
 #if USE_HDFS
             if (caches_to_drop.contains("HDFS"))
-                StorageHDFS::getSchemaCache(getContext()).clear();
+                StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear();
 #endif
             if (caches_to_drop.contains("URL"))
                 StorageURL::getSchemaCache(getContext()).clear();
 #if USE_AZURE_BLOB_STORAGE
             if (caches_to_drop.contains("AZURE"))
-                StorageAzureBlob::getSchemaCache(getContext()).clear();
+                StorageObjectStorage::getSchemaCache(getContext(), StorageAzureBlobConfiguration::type_name).clear();
 #endif
             break;
         }
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index 4b826a0c721..c9bc59d62aa 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -101,6 +101,21 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co
     return settings_ptr;
 }
 
+StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySettings(const ContextPtr & context) const
+{
+    const auto & settings = context->getSettingsRef();
+    return StorageObjectStorage::QuerySettings{
+        .truncate_on_insert = settings.azure_truncate_on_insert,
+        .create_new_file_on_insert = settings.azure_create_new_file_on_insert,
+        .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
+        .schema_inference_mode = settings.schema_inference_mode,
+        .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
+        .list_object_keys_size = settings.azure_list_object_keys_size,
+        .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+        .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
+    };
+}
+
 ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
     assertInitialized();
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
index c12ff81197d..7e105ea82b5 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.h
@@ -18,9 +18,15 @@ class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration
     friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory);
 
 public:
+    static constexpr auto type_name = "azure";
+    static constexpr auto engine_name = "Azure";
+
     StorageAzureBlobConfiguration() = default;
     StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other);
 
+    std::string getTypeName() const override { return type_name; }
+    std::string getEngineName() const override { return engine_name; }
+
     Path getPath() const override { return blob_path; }
     void setPath(const Path & path) override { blob_path = path; }
 
@@ -30,6 +36,7 @@ public:
 
     String getDataSourceDescription() override { return fs::path(connection_url) / container; }
     String getNamespace() const override { return container; }
+    StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
@@ -37,8 +44,8 @@ public:
 
     void fromNamedCollection(const NamedCollection & collection) override;
     void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
-    static void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure_, const String & format_, ContextPtr context);
+    void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
 
 protected:
     using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index d0f203b32bd..c6590ba8d43 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -184,7 +184,7 @@ struct DeltaLakeMetadata::Impl
      *
      *  We need to get "version", which is the version of the checkpoint we need to read.
      */
-    size_t readLastCheckpointIfExists()
+    size_t readLastCheckpointIfExists() const
     {
         const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint";
         if (!object_storage->exists(StoredObject(last_checkpoint_file)))
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 64228e880f8..e1851775925 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -21,17 +21,16 @@ namespace DB
 /// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/)
 /// Right now it's implemented on top of StorageS3 and right now it doesn't support
 /// many Iceberg features like schema evolution, partitioning, positional and equality deletes.
-template <typename DataLakeMetadata, typename StorageSettings>
-class IStorageDataLake final : public StorageObjectStorage<StorageSettings>
+template <typename DataLakeMetadata>
+class IStorageDataLake final : public StorageObjectStorage
 {
 public:
-    using Storage = StorageObjectStorage<StorageSettings>;
+    using Storage = StorageObjectStorage;
     using ConfigurationPtr = Storage::ConfigurationPtr;
 
     static StoragePtr create(
         ConfigurationPtr base_configuration,
         ContextPtr context,
-        const String & engine_name_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
@@ -64,9 +63,9 @@ public:
             tryLogCurrentException(__PRETTY_FUNCTION__);
         }
 
-        return std::make_shared<IStorageDataLake<DataLakeMetadata, StorageSettings>>(
+        return std::make_shared<IStorageDataLake<DataLakeMetadata>>(
             base_configuration, std::move(metadata), configuration, object_storage,
-            engine_name_, context, table_id_,
+            context, table_id_,
             columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
             constraints_, comment_, format_settings_);
     }
@@ -133,9 +132,9 @@ private:
     DataLakeMetadataPtr current_metadata;
 };
 
-using StorageIceberg = IStorageDataLake<IcebergMetadata, S3StorageSettings>;
-using StorageDeltaLake = IStorageDataLake<DeltaLakeMetadata, S3StorageSettings>;
-using StorageHudi = IStorageDataLake<HudiMetadata, S3StorageSettings>;
+using StorageIceberg = IStorageDataLake<IcebergMetadata>;
+using StorageDeltaLake = IStorageDataLake<DeltaLakeMetadata>;
+using StorageHudi = IStorageDataLake<HudiMetadata>;
 
 }
 
diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
index d11dd1ca836..a5170e5ed6b 100644
--- a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
@@ -6,7 +6,6 @@
 #include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
 #include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 
 
 namespace DB
@@ -24,7 +23,7 @@ void registerStorageIceberg(StorageFactory & factory)
             StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageIceberg::create(
-                configuration, args.getContext(), "Iceberg", args.table_id, args.columns,
+                configuration, args.getContext(), args.table_id, args.columns,
                 args.constraints, args.comment, std::nullopt, args.mode);
         },
         {
@@ -47,7 +46,7 @@ void registerStorageDeltaLake(StorageFactory & factory)
             StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageDeltaLake::create(
-                configuration, args.getContext(), "DeltaLake", args.table_id, args.columns,
+                configuration, args.getContext(), args.table_id, args.columns,
                 args.constraints, args.comment, std::nullopt, args.mode);
         },
         {
@@ -68,7 +67,7 @@ void registerStorageHudi(StorageFactory & factory)
             StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageHudi::create(
-                configuration, args.getContext(), "Hudi", args.table_id, args.columns,
+                configuration, args.getContext(), args.table_id, args.columns,
                 args.constraints, args.comment, std::nullopt, args.mode);
         },
         {
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 84f0a7bfe9f..0062ac969ac 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -60,6 +60,20 @@ std::string StorageHDFSConfiguration::getPathWithoutGlob() const
         return "/";
     return path.substr(0, end_of_path_without_globs);
 }
+StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(const ContextPtr & context) const
+{
+    const auto & settings = context->getSettingsRef();
+    return StorageObjectStorage::QuerySettings{
+        .truncate_on_insert = settings.hdfs_truncate_on_insert,
+        .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
+        .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
+        .schema_inference_mode = settings.schema_inference_mode,
+        .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
+        .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
+        .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+        .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
+    };
+}
 
 void StorageHDFSConfiguration::fromAST(ASTs & args, ContextPtr context, bool with_structure)
 {
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 23a7e8e4549..0a502857153 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -13,9 +13,15 @@ namespace DB
 class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
 {
 public:
+    static constexpr auto type_name = "hdfs";
+    static constexpr auto engine_name = "HDFS";
+
     StorageHDFSConfiguration() = default;
     StorageHDFSConfiguration(const StorageHDFSConfiguration & other);
 
+    std::string getTypeName() const override { return type_name; }
+    std::string getEngineName() const override { return engine_name; }
+
     Path getPath() const override { return path; }
     void setPath(const Path & path_) override { path = path_; }
 
@@ -25,13 +31,14 @@ public:
 
     String getNamespace() const override { return ""; }
     String getDataSourceDescription() override { return url; }
+    StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
-    static void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure_, const String & format_, ContextPtr context);
+    void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
 
     std::string getPathWithoutGlob() const override;
 
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 0b6e34fb831..f8ce90a2b1f 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -1,5 +1,4 @@
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <IO/ReadBufferFromFileBase.h>
 
@@ -19,7 +18,6 @@ ReadBufferIterator::ReadBufferIterator(
     ConfigurationPtr configuration_,
     const FileIterator & file_iterator_,
     const std::optional<FormatSettings> & format_settings_,
-    const StorageObjectStorageSettings & query_settings_,
     SchemaCache & schema_cache_,
     ObjectInfos & read_keys_,
     const ContextPtr & context_)
@@ -28,7 +26,7 @@ ReadBufferIterator::ReadBufferIterator(
     , configuration(configuration_)
     , file_iterator(file_iterator_)
     , format_settings(format_settings_)
-    , query_settings(query_settings_)
+    , query_settings(configuration->getQuerySettings(context_))
     , schema_cache(schema_cache_)
     , read_keys(read_keys_)
     , format(configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 053bcbf894f..2d58e1c789e 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -2,7 +2,6 @@
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Formats/ReadSchemaUtils.h>
 
 
@@ -19,7 +18,6 @@ public:
         ConfigurationPtr configuration_,
         const FileIterator & file_iterator_,
         const std::optional<FormatSettings> & format_settings_,
-        const StorageObjectStorageSettings & query_settings_,
         SchemaCache & schema_cache_,
         ObjectInfos & read_keys_,
         const ContextPtr & context_);
@@ -50,7 +48,7 @@ private:
     const ConfigurationPtr configuration;
     const FileIterator file_iterator;
     const std::optional<FormatSettings> & format_settings;
-    const StorageObjectStorageSettings query_settings;
+    const StorageObjectStorage::QuerySettings query_settings;
     SchemaCache & schema_cache;
     ObjectInfos & read_keys;
     std::optional<String> format;
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
similarity index 62%
rename from src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
rename to src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
index 89d33191f41..f19e01cdc3e 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
@@ -1,11 +1,11 @@
-#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
+#include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
 #include <Processors/Sources/NullSource.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
 {
 
-ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
+ReadFromObjectStorageStep::ReadFromObjectStorageStep(
     ObjectStoragePtr object_storage_,
     ConfigurationPtr configuration_,
     const String & name_,
@@ -14,49 +14,41 @@ ReadFromStorageObejctStorage::ReadFromStorageObejctStorage(
     const SelectQueryInfo & query_info_,
     const StorageSnapshotPtr & storage_snapshot_,
     const std::optional<DB::FormatSettings> & format_settings_,
-    const StorageObjectStorageSettings & query_settings_,
     bool distributed_processing_,
     ReadFromFormatInfo info_,
     SchemaCache & schema_cache_,
     const bool need_only_count_,
     ContextPtr context_,
     size_t max_block_size_,
-    size_t num_streams_,
-    CurrentMetrics::Metric metric_threads_count_,
-    CurrentMetrics::Metric metric_threads_active_,
-    CurrentMetrics::Metric metric_threads_scheduled_)
+    size_t num_streams_)
     : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
     , object_storage(object_storage_)
     , configuration(configuration_)
     , info(std::move(info_))
     , virtual_columns(virtual_columns_)
     , format_settings(format_settings_)
-    , query_settings(query_settings_)
+    , query_settings(configuration->getQuerySettings(context_))
     , schema_cache(schema_cache_)
     , name(name_ + "Source")
     , need_only_count(need_only_count_)
     , max_block_size(max_block_size_)
     , num_streams(num_streams_)
     , distributed_processing(distributed_processing_)
-    , metric_threads_count(metric_threads_count_)
-    , metric_threads_active(metric_threads_active_)
-    , metric_threads_scheduled(metric_threads_scheduled_)
 {
 }
 
-void ReadFromStorageObejctStorage::createIterator(const ActionsDAG::Node * predicate)
+void ReadFromObjectStorageStep::createIterator(const ActionsDAG::Node * predicate)
 {
     if (!iterator_wrapper)
     {
         auto context = getContext();
         iterator_wrapper = StorageObjectStorageSource::createFileIterator(
-            configuration, object_storage, query_settings, distributed_processing,
-            context, predicate, virtual_columns, nullptr, metric_threads_count,
-            metric_threads_active, metric_threads_scheduled, context->getFileProgressCallback());
+            configuration, object_storage, distributed_processing,
+            context, predicate, virtual_columns, nullptr, context->getFileProgressCallback());
     }
 }
 
-void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_nodes)
+void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
 {
     filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
     const ActionsDAG::Node * predicate = nullptr;
@@ -66,7 +58,7 @@ void ReadFromStorageObejctStorage::applyFilters(ActionDAGNodes added_filter_node
     createIterator(predicate);
 }
 
-void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
+void ReadFromObjectStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     createIterator(nullptr);
     auto context = getContext();
@@ -74,13 +66,9 @@ void ReadFromStorageObejctStorage::initializePipeline(QueryPipelineBuilder & pip
     Pipes pipes;
     for (size_t i = 0; i < num_streams; ++i)
     {
-        auto threadpool = std::make_shared<ThreadPool>(
-            metric_threads_count, metric_threads_active, metric_threads_scheduled, /* max_threads */1);
-
         auto source = std::make_shared<StorageObjectStorageSource>(
             getName(), object_storage, configuration, info, format_settings, query_settings,
-            context, max_block_size, iterator_wrapper, need_only_count, schema_cache,
-            std::move(threadpool), metric_threads_count, metric_threads_active, metric_threads_scheduled);
+            context, max_block_size, iterator_wrapper, need_only_count, schema_cache);
 
         source->setKeyCondition(filter_actions_dag, context);
         pipes.emplace_back(std::move(source));
diff --git a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
similarity index 70%
rename from src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
rename to src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
index c0dd02d75f8..d98ebfef1f2 100644
--- a/src/Storages/ObjectStorage/ReadFromStorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
@@ -1,17 +1,16 @@
 #pragma once
-#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Processors/QueryPlan/SourceStepWithFilter.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 
 namespace DB
 {
 
-class ReadFromStorageObejctStorage : public SourceStepWithFilter
+class ReadFromObjectStorageStep : public SourceStepWithFilter
 {
 public:
     using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
 
-    ReadFromStorageObejctStorage(
+    ReadFromObjectStorageStep(
         ObjectStoragePtr object_storage_,
         ConfigurationPtr configuration_,
         const String & name_,
@@ -20,17 +19,13 @@ public:
         const SelectQueryInfo & query_info_,
         const StorageSnapshotPtr & storage_snapshot_,
         const std::optional<DB::FormatSettings> & format_settings_,
-        const StorageObjectStorageSettings & query_settings_,
         bool distributed_processing_,
         ReadFromFormatInfo info_,
         SchemaCache & schema_cache_,
         bool need_only_count_,
         ContextPtr context_,
         size_t max_block_size_,
-        size_t num_streams_,
-        CurrentMetrics::Metric metric_threads_count_,
-        CurrentMetrics::Metric metric_threads_active_,
-        CurrentMetrics::Metric metric_threads_scheduled_);
+        size_t num_streams_);
 
     std::string getName() const override { return name; }
 
@@ -46,16 +41,13 @@ private:
     const ReadFromFormatInfo info;
     const NamesAndTypesList virtual_columns;
     const std::optional<DB::FormatSettings> format_settings;
-    const StorageObjectStorageSettings query_settings;
+    const StorageObjectStorage::QuerySettings query_settings;
     SchemaCache & schema_cache;
     const String name;
     const bool need_only_count;
     const size_t max_block_size;
     const size_t num_streams;
     const bool distributed_processing;
-    const CurrentMetrics::Metric metric_threads_count;
-    const CurrentMetrics::Metric metric_threads_active;
-    const CurrentMetrics::Metric metric_threads_scheduled;
 
     void createIterator(const ActionsDAG::Node * predicate);
 };
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 4c9e49d0705..139d9004f8e 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -70,6 +70,21 @@ StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & ot
     keys = other.keys;
 }
 
+StorageObjectStorage::QuerySettings StorageS3Configuration::getQuerySettings(const ContextPtr & context) const
+{
+    const auto & settings = context->getSettingsRef();
+    return StorageObjectStorage::QuerySettings{
+        .truncate_on_insert = settings.s3_truncate_on_insert,
+        .create_new_file_on_insert = settings.s3_create_new_file_on_insert,
+        .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
+        .schema_inference_mode = settings.schema_inference_mode,
+        .skip_empty_files = settings.s3_skip_empty_files,
+        .list_object_keys_size = settings.s3_list_object_keys_size,
+        .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+        .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist,
+    };
+}
+
 ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, bool /* is_readonly */) /// NOLINT
 {
     assertInitialized();
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index ff5e8680e66..de4a6d17579 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -7,6 +7,7 @@
 #include <IO/S3/getObjectInfo.h>
 #include <Storages/StorageS3Settings.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Common/CurrentMetrics.h>
 
 namespace DB
 {
@@ -14,9 +15,14 @@ namespace DB
 class StorageS3Configuration : public StorageObjectStorageConfiguration
 {
 public:
+    static constexpr auto type_name = "s3";
+
     StorageS3Configuration() = default;
     StorageS3Configuration(const StorageS3Configuration & other);
 
+    std::string getTypeName() const override { return type_name; }
+    std::string getEngineName() const override { return url.storage_name; }
+
     Path getPath() const override { return url.key; }
     void setPath(const Path & path) override { url.key = path; }
 
@@ -26,6 +32,7 @@ public:
 
     String getNamespace() const override { return url.bucket; }
     String getDataSourceDescription() override;
+    StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
     void validateNamespace(const String & name) const override;
@@ -34,8 +41,8 @@ public:
     bool isStaticConfiguration() const override { return static_configuration; }
 
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
-    static void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure, const String & format, ContextPtr context);
+    void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure, const String & format, ContextPtr context) override;
 
 private:
     void fromNamedCollection(const NamedCollection & collection) override;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 13f3557d927..441639629a3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -11,10 +11,9 @@
 #include <Storages/StorageFactory.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/ReadFromStorageObjectStorage.h>
+#include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
 #include <Storages/ObjectStorage/Utils.h>
 #include <Storages/Cache/SchemaCache.h>
@@ -25,53 +24,13 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int DATABASE_ACCESS_DENIED;
     extern const int NOT_IMPLEMENTED;
 }
 
-template <typename StorageSettings>
-std::unique_ptr<StorageInMemoryMetadata> getStorageMetadata(
-    ObjectStoragePtr object_storage,
-    const StorageObjectStorageConfigurationPtr & configuration,
-    const ColumnsDescription & columns,
-    const ConstraintsDescription & constraints,
-    std::optional<FormatSettings> format_settings,
-    const String & comment,
-    const std::string & engine_name,
-    const ContextPtr & context)
-{
-    using Storage = StorageObjectStorage<StorageSettings>;
-
-    auto storage_metadata = std::make_unique<StorageInMemoryMetadata>();
-    if (columns.empty())
-    {
-        auto fetched_columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context);
-        storage_metadata->setColumns(fetched_columns);
-    }
-    else if (!columns.hasOnlyOrdinary())
-    {
-        /// We don't allow special columns.
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table engine {} doesn't support special columns "
-                        "like MATERIALIZED, ALIAS or EPHEMERAL", engine_name);
-    }
-    else
-    {
-        if (configuration->format == "auto")
-            Storage::setFormatFromData(object_storage, configuration, format_settings, context);
-
-        storage_metadata->setColumns(columns);
-    }
-    storage_metadata->setConstraints(constraints);
-    storage_metadata->setComment(comment);
-    return storage_metadata;
-}
-
-template <typename StorageSettings>
-StorageObjectStorage<StorageSettings>::StorageObjectStorage(
+StorageObjectStorage::StorageObjectStorage(
     ConfigurationPtr configuration_,
     ObjectStoragePtr object_storage_,
-    const String & engine_name_,
     ContextPtr context,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
@@ -80,16 +39,13 @@ StorageObjectStorage<StorageSettings>::StorageObjectStorage(
     std::optional<FormatSettings> format_settings_,
     bool distributed_processing_,
     ASTPtr partition_by_)
-    : IStorage(table_id_, getStorageMetadata<StorageSettings>(
-                   object_storage_, configuration_, columns_, constraints_, format_settings_,
-                   comment, engine_name, context))
-    , engine_name(engine_name_)
+    : IStorage(table_id_)
+    , configuration(configuration_)
+    , object_storage(object_storage_)
     , format_settings(format_settings_)
     , partition_by(partition_by_)
     , distributed_processing(distributed_processing_)
-    , log(getLogger("Storage" + engine_name_))
-    , object_storage(object_storage_)
-    , configuration(configuration_)
+    , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName())))
 {
     FormatFactory::instance().checkFormatName(configuration->format);
     configuration->check(context);
@@ -98,46 +54,41 @@ StorageObjectStorage<StorageSettings>::StorageObjectStorage(
     for (const auto & key : configuration->getPaths())
         objects.emplace_back(key);
 
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(getInMemoryMetadataPtr()->getColumns()));
+    auto metadata = getStorageMetadata(
+        object_storage_, configuration_, columns_,
+        constraints_, format_settings_, comment, context);
+
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
+    setInMemoryMetadata(std::move(metadata));
 }
 
-template <typename StorageSettings>
-bool StorageObjectStorage<StorageSettings>::prefersLargeBlocks() const
+String StorageObjectStorage::getName() const
+{
+    return configuration->getEngineName();
+}
+
+bool StorageObjectStorage::prefersLargeBlocks() const
 {
     return FormatFactory::instance().checkIfOutputFormatPrefersLargeBlocks(configuration->format);
 }
 
-template <typename StorageSettings>
-bool StorageObjectStorage<StorageSettings>::parallelizeOutputAfterReading(ContextPtr context) const
+bool StorageObjectStorage::parallelizeOutputAfterReading(ContextPtr context) const
 {
     return FormatFactory::instance().checkParallelizeOutputAfterReading(configuration->format, context);
 }
 
-template <typename StorageSettings>
-bool StorageObjectStorage<StorageSettings>::supportsSubsetOfColumns(const ContextPtr & context) const
+bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) const
 {
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context, format_settings);
 }
 
-template <typename StorageSettings>
-void StorageObjectStorage<StorageSettings>::updateConfiguration(ContextPtr context)
+void StorageObjectStorage::updateConfiguration(ContextPtr context)
 {
     if (!configuration->isStaticConfiguration())
         object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
 }
 
-template <typename StorageSettings>
-SchemaCache & StorageObjectStorage<StorageSettings>::getSchemaCache(const ContextPtr & context)
-{
-    static SchemaCache schema_cache(
-        context->getConfigRef().getUInt(
-            StorageSettings::SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING,
-            DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
-}
-
-template <typename StorageSettings>
-void StorageObjectStorage<StorageSettings>::read(
+void StorageObjectStorage::read(
     QueryPlan & query_plan,
     const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
@@ -155,13 +106,12 @@ void StorageObjectStorage<StorageSettings>::read(
                         getName());
     }
 
-    const auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
+    const auto read_from_format_info = prepareReadingFromFormat(
+        column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
     const bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
         && local_context->getSettingsRef().optimize_count_from_files;
 
-    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII SOURCE HEADER: {}", read_from_format_info.source_header.dumpStructure());
-    LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII FORMAT HEADER: {}", read_from_format_info.format_header.dumpStructure());
-    auto read_step = std::make_unique<ReadFromStorageObejctStorage>(
+    auto read_step = std::make_unique<ReadFromObjectStorageStep>(
         object_storage,
         configuration,
         getName(),
@@ -170,23 +120,18 @@ void StorageObjectStorage<StorageSettings>::read(
         query_info,
         storage_snapshot,
         format_settings,
-        StorageSettings::create(local_context->getSettingsRef()),
         distributed_processing,
         std::move(read_from_format_info),
         getSchemaCache(local_context),
         need_only_count,
         local_context,
         max_block_size,
-        num_streams,
-        StorageSettings::ObjectStorageThreads(),
-        StorageSettings::ObjectStorageThreadsActive(),
-        StorageSettings::ObjectStorageThreadsScheduled());
+        num_streams);
 
     query_plan.addStep(std::move(read_step));
 }
 
-template <typename StorageSettings>
-SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
+SinkToStoragePtr StorageObjectStorage::write(
     const ASTPtr & query,
     const StorageMetadataPtr & metadata_snapshot,
     ContextPtr local_context,
@@ -194,7 +139,7 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 {
     updateConfiguration(local_context);
     const auto sample_block = metadata_snapshot->getSampleBlock();
-    const auto & query_settings = StorageSettings::create(local_context->getSettingsRef());
+    const auto & settings = configuration->getQuerySettings(local_context);
 
     if (configuration->withWildcard())
     {
@@ -209,23 +154,22 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
 
         if (partition_by_ast)
         {
-            LOG_TEST(log, "Using PartitionedSink for {}", configuration->getPath());
             return std::make_shared<PartitionedStorageObjectStorageSink>(
-                object_storage, configuration, query_settings,
-                format_settings, sample_block, local_context, partition_by_ast);
+                object_storage, configuration, format_settings, sample_block, local_context, partition_by_ast);
         }
     }
 
     if (configuration->withGlobs())
     {
-        throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
-                        "{} key '{}' contains globs, so the table is in readonly mode",
-                        getName(), configuration->getPath());
+        throw Exception(
+            ErrorCodes::DATABASE_ACCESS_DENIED,
+            "{} key '{}' contains globs, so the table is in readonly mode",
+            getName(), configuration->getPath());
     }
 
     auto & paths = configuration->getPaths();
     if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
-            *object_storage, *configuration, query_settings, paths.front(), paths.size()))
+            *object_storage, *configuration, settings, paths.front(), paths.size()))
     {
         paths.push_back(*new_key);
     }
@@ -238,9 +182,11 @@ SinkToStoragePtr StorageObjectStorage<StorageSettings>::write(
         local_context);
 }
 
-template <typename StorageSettings>
-void StorageObjectStorage<StorageSettings>::truncate(
-    const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &)
+void StorageObjectStorage::truncate(
+    const ASTPtr &,
+    const StorageMetadataPtr &,
+    ContextPtr,
+    TableExclusiveLockHolder &)
 {
     if (configuration->withGlobs())
     {
@@ -257,34 +203,37 @@ void StorageObjectStorage<StorageSettings>::truncate(
     object_storage->removeObjectsIfExist(objects);
 }
 
-template <typename StorageSettings>
-std::unique_ptr<ReadBufferIterator> StorageObjectStorage<StorageSettings>::createReadBufferIterator(
+std::unique_ptr<ReadBufferIterator> StorageObjectStorage::createReadBufferIterator(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
     ObjectInfos & read_keys,
     const ContextPtr & context)
 {
-    const auto settings = StorageSettings::create(context->getSettingsRef());
     auto file_iterator = StorageObjectStorageSource::createFileIterator(
-        configuration, object_storage, settings, /* distributed_processing */false,
-        context, /* predicate */{}, /* virtual_columns */{}, &read_keys,
-        StorageSettings::ObjectStorageThreads(), StorageSettings::ObjectStorageThreadsActive(), StorageSettings::ObjectStorageThreadsScheduled());
+        configuration,
+        object_storage,
+        false/* distributed_processing */,
+        context,
+        {}/* predicate */,
+        {}/* virtual_columns */,
+        &read_keys);
 
     return std::make_unique<ReadBufferIterator>(
         object_storage, configuration, file_iterator,
-        format_settings, StorageSettings::create(context->getSettingsRef()), getSchemaCache(context), read_keys, context);
+        format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context);
 }
 
-template <typename StorageSettings>
-ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromData(
+ColumnsDescription StorageObjectStorage::getTableStructureFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
-    auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    auto read_buffer_iterator = createReadBufferIterator(
+        object_storage, configuration, format_settings, read_keys, context);
+
     if (configuration->format == "auto")
     {
         auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
@@ -297,20 +246,34 @@ ColumnsDescription StorageObjectStorage<StorageSettings>::getTableStructureFromD
     }
 }
 
-template <typename StorageSettings>
-void StorageObjectStorage<StorageSettings>::setFormatFromData(
+void StorageObjectStorage::setFormatFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
-    auto read_buffer_iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    auto read_buffer_iterator = createReadBufferIterator(
+        object_storage, configuration, format_settings, read_keys, context);
     configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second;
 }
 
-template class StorageObjectStorage<S3StorageSettings>;
-template class StorageObjectStorage<AzureStorageSettings>;
-template class StorageObjectStorage<HDFSStorageSettings>;
+SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context)
+{
+    static SchemaCache schema_cache(
+        context->getConfigRef().getUInt(
+            "schema_inference_cache_max_elements_for_" + configuration->getTypeName(),
+            DEFAULT_SCHEMA_CACHE_ELEMENTS));
+    return schema_cache;
+}
+
+SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name)
+{
+    static SchemaCache schema_cache(
+        context->getConfigRef().getUInt(
+            "schema_inference_cache_max_elements_for_" + storage_type_name,
+            DEFAULT_SCHEMA_CACHE_ELEMENTS));
+    return schema_cache;
+}
 
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index a2112f7ed01..3dbe010e406 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -1,31 +1,22 @@
 #pragma once
-
-#include <Common/re2.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/threadPoolCallbackRunner.h>
-#include <Common/logger_useful.h>
 #include <Storages/IStorage.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Processors/Formats/IInputFormat.h>
 
-
 namespace DB
 {
 
-struct SelectQueryInfo;
 class StorageObjectStorageConfiguration;
-struct S3StorageSettings;
-struct HDFSStorageSettings;
-struct AzureStorageSettings;
-class PullingPipelineExecutor;
-using ReadTaskCallback = std::function<String()>;
-class IOutputFormat;
-class IInputFormat;
-class SchemaCache;
 class ReadBufferIterator;
+class SchemaCache;
 
-
-template <typename StorageSettings>
+/**
+ * A general class containing implementation for external table engines
+ * such as StorageS3, StorageAzure, StorageHDFS.
+ * Works with an object of IObjectStorage class.
+ */
 class StorageObjectStorage : public IStorage
 {
 public:
@@ -35,10 +26,26 @@ public:
     using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
     using ObjectInfos = std::vector<ObjectInfoPtr>;
 
+    struct QuerySettings
+    {
+        /// Insert settings:
+        bool truncate_on_insert;
+        bool create_new_file_on_insert;
+
+        /// Schema inference settings:
+        bool schema_inference_use_cache;
+        SchemaInferenceMode schema_inference_mode;
+
+        /// List settings:
+        bool skip_empty_files;
+        size_t list_object_keys_size;
+        bool throw_on_zero_files_match;
+        bool ignore_non_existent_file;
+    };
+
     StorageObjectStorage(
         ConfigurationPtr configuration_,
         ObjectStoragePtr object_storage_,
-        const String & engine_name_,
         ContextPtr context_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
@@ -48,17 +55,17 @@ public:
         bool distributed_processing_ = false,
         ASTPtr partition_by_ = nullptr);
 
-    String getName() const override { return engine_name; }
+    String getName() const override;
 
     void read(
         QueryPlan & query_plan,
-        const Names &,
-        const StorageSnapshotPtr &,
-        SelectQueryInfo &,
-        ContextPtr,
-        QueryProcessingStage::Enum,
-        size_t,
-        size_t) override;
+        const Names & column_names,
+        const StorageSnapshotPtr & storage_snapshot,
+        SelectQueryInfo & query_info,
+        ContextPtr local_context,
+        QueryProcessingStage::Enum processed_stage,
+        size_t max_block_size,
+        size_t num_streams) override;
 
     SinkToStoragePtr write(
         const ASTPtr & query,
@@ -84,7 +91,9 @@ public:
 
     bool parallelizeOutputAfterReading(ContextPtr context) const override;
 
-    static SchemaCache & getSchemaCache(const ContextPtr & context);
+    SchemaCache & getSchemaCache(const ContextPtr & context);
+
+    static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name);
 
     static ColumnsDescription getTableStructureFromData(
         const ObjectStoragePtr & object_storage,
@@ -108,19 +117,15 @@ protected:
         ObjectInfos & read_keys,
         const ContextPtr & context);
 
+    ConfigurationPtr configuration;
+    const ObjectStoragePtr object_storage;
     const std::string engine_name;
-    std::optional<FormatSettings> format_settings;
+    const std::optional<FormatSettings> format_settings;
     const ASTPtr partition_by;
     const bool distributed_processing;
 
     LoggerPtr log;
-    ObjectStoragePtr object_storage;
-    ConfigurationPtr configuration;
     std::mutex configuration_update_mutex;
 };
 
-using StorageS3 = StorageObjectStorage<S3StorageSettings>;
-using StorageAzureBlob = StorageObjectStorage<AzureStorageSettings>;
-using StorageHDFS = StorageObjectStorage<HDFSStorageSettings>;
-
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index f023bb068d4..72a35ae33eb 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -15,6 +15,7 @@
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Common/Exception.h>
 #include <Parsers/queryToString.h>
+#include <Storages/ObjectStorage/Utils.h>
 
 namespace DB
 {
@@ -24,47 +25,34 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::StorageObjectStorageCluster(
+StorageObjectStorageCluster::StorageObjectStorageCluster(
     const String & cluster_name_,
-    const Storage::ConfigurationPtr & configuration_,
+    ConfigurationPtr configuration_,
     ObjectStoragePtr object_storage_,
-    const String & engine_name_,
     const StorageID & table_id_,
     const ColumnsDescription & columns_,
     const ConstraintsDescription & constraints_,
     ContextPtr context_)
-    : IStorageCluster(cluster_name_,
-                      table_id_,
-                      getLogger(fmt::format("{}({})", engine_name_, table_id_.table_name)))
-    , engine_name(engine_name_)
+    : IStorageCluster(
+        cluster_name_, table_id_, getLogger(fmt::format("{}({})", configuration_->getEngineName(), table_id_.table_name)))
     , configuration{configuration_}
     , object_storage(object_storage_)
 {
     configuration->check(context_);
-    StorageInMemoryMetadata storage_metadata;
+    auto metadata = getStorageMetadata(
+        object_storage, configuration, columns_, constraints_,
+        {}/* format_settings */, ""/* comment */, context_);
 
-    if (columns_.empty())
-    {
-        ColumnsDescription columns = Storage::getTableStructureFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        if (configuration->format == "auto")
-            StorageS3::setFormatFromData(object_storage, configuration, /*format_settings=*/std::nullopt, context_);
-
-        storage_metadata.setColumns(columns_);
-    }
-
-    storage_metadata.setConstraints(constraints_);
-    setInMemoryMetadata(storage_metadata);
-
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
+    setInMemoryMetadata(std::move(metadata));
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::updateQueryToSendIfNeeded(
+std::string StorageObjectStorageCluster::getName() const
+{
+    return configuration->getEngineName();
+}
+
+void StorageObjectStorageCluster::updateQueryToSendIfNeeded(
     ASTPtr & query,
     const DB::StorageSnapshotPtr & storage_snapshot,
     const ContextPtr & context)
@@ -72,24 +60,32 @@ void StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::up
     ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
     if (!expression_list)
     {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
-                        "Expected SELECT query from table function {}, got '{}'",
-                        engine_name, queryToString(query));
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Expected SELECT query from table function {}, got '{}'",
+            configuration->getEngineName(), queryToString(query));
     }
 
-    TableFunction::updateStructureAndFormatArgumentsIfNeeded(
-        expression_list->children,
-        storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(),
-        configuration->format,
-        context);
+    ASTs & args = expression_list->children;
+    const auto & structure = storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription();
+    if (args.empty())
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Unexpected empty list of arguments for {}Cluster table function",
+            configuration->getEngineName());
+    }
+
+    ASTPtr cluster_name_arg = args.front();
+    args.erase(args.begin());
+    configuration->addStructureAndFormatToArgs(args, structure, configuration->format, context);
+    args.insert(args.begin(), cluster_name_arg);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-RemoteQueryExecutor::Extension
-StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTaskIteratorExtension(
+RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension(
     const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
 {
-    const auto settings = StorageSettings::create(local_context->getSettingsRef());
+    const auto settings = configuration->getQuerySettings(local_context);
     auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
         object_storage, configuration, predicate, virtual_columns, local_context,
         nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match,
@@ -106,17 +102,4 @@ StorageObjectStorageCluster<Definition, StorageSettings, Configuration>::getTask
     return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) };
 }
 
-
-#if USE_AWS_S3
-template class StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
-#endif
-
-#if USE_AZURE_BLOB_STORAGE
-template class StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
-#endif
-
-#if USE_HDFS
-template class StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
-#endif
-
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index ac894e14f24..2db8f5c352e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -11,32 +11,25 @@
 namespace DB
 {
 
-class StorageS3Settings;
-class StorageAzureBlobSettings;
-
 class Context;
 
-template <typename Definition, typename StorageSettings, typename Configuration>
 class StorageObjectStorageCluster : public IStorageCluster
 {
 public:
-    using Storage = StorageObjectStorage<StorageSettings>;
-    using TableFunction = TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     StorageObjectStorageCluster(
         const String & cluster_name_,
-        const Storage::ConfigurationPtr & configuration_,
+        ConfigurationPtr configuration_,
         ObjectStoragePtr object_storage_,
-        const String & engine_name_,
         const StorageID & table_id_,
         const ColumnsDescription & columns_,
         const ConstraintsDescription & constraints_,
         ContextPtr context_);
 
-    std::string getName() const override { return engine_name; }
+    std::string getName() const override;
 
-    RemoteQueryExecutor::Extension
-    getTaskIteratorExtension(
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(
         const ActionsDAG::Node * predicate,
         const ContextPtr & context) const override;
 
@@ -53,20 +46,9 @@ private:
         const ContextPtr & context) override;
 
     const String engine_name;
-    const Storage::ConfigurationPtr configuration;
+    const StorageObjectStorage::ConfigurationPtr configuration;
     const ObjectStoragePtr object_storage;
     NamesAndTypesList virtual_columns;
 };
 
-
-#if USE_AWS_S3
-using StorageS3Cluster = StorageObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
-#endif
-#if USE_AZURE_BLOB_STORAGE
-using StorageAzureBlobCluster = StorageObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
-#endif
-#if USE_HDFS
-using StorageHDFSCluster = StorageObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
-#endif
-
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 647575aaa90..34965174bf9 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Storages/NamedCollectionsHelpers.h>
+#include "StorageObjectStorage.h"
 #include <filesystem>
 
 namespace fs = std::filesystem;
@@ -27,6 +28,9 @@ public:
         ContextPtr local_context,
         bool with_table_structure);
 
+    virtual std::string getTypeName() const = 0;
+    virtual std::string getEngineName() const = 0;
+
     virtual Path getPath() const = 0;
     virtual void setPath(const Path & path) = 0;
 
@@ -36,6 +40,9 @@ public:
 
     virtual String getDataSourceDescription() = 0;
     virtual String getNamespace() const = 0;
+    virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
+    virtual void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
 
     bool withWildcard() const;
     bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h b/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
deleted file mode 100644
index 606456011c3..00000000000
--- a/src/Storages/ObjectStorage/StorageObjectStorageQuerySettings.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#pragma once
-#include <Interpreters/Context_fwd.h>
-#include <Core/Settings.h>
-#include <Common/CurrentMetrics.h>
-
-namespace CurrentMetrics
-{
-    extern const Metric ObjectStorageAzureThreads;
-    extern const Metric ObjectStorageAzureThreadsActive;
-    extern const Metric ObjectStorageAzureThreadsScheduled;
-
-    extern const Metric ObjectStorageS3Threads;
-    extern const Metric ObjectStorageS3ThreadsActive;
-    extern const Metric ObjectStorageS3ThreadsScheduled;
-}
-
-namespace DB
-{
-
-struct StorageObjectStorageSettings
-{
-    bool truncate_on_insert;
-    bool create_new_file_on_insert;
-    bool schema_inference_use_cache;
-    SchemaInferenceMode schema_inference_mode;
-    bool skip_empty_files;
-    size_t list_object_keys_size;
-    bool throw_on_zero_files_match;
-    bool ignore_non_existent_file;
-};
-
-struct S3StorageSettings
-{
-    static StorageObjectStorageSettings create(const Settings & settings)
-    {
-        return StorageObjectStorageSettings{
-            .truncate_on_insert = settings.s3_truncate_on_insert,
-            .create_new_file_on_insert = settings.s3_create_new_file_on_insert,
-            .schema_inference_use_cache = settings.schema_inference_use_cache_for_s3,
-            .schema_inference_mode = settings.schema_inference_mode,
-            .skip_empty_files = settings.s3_skip_empty_files,
-            .list_object_keys_size = settings.s3_list_object_keys_size,
-            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
-            .ignore_non_existent_file = settings.s3_ignore_file_doesnt_exist,
-        };
-    }
-
-    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_s3";
-
-    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
-};
-
-struct AzureStorageSettings
-{
-    static StorageObjectStorageSettings create(const Settings & settings)
-    {
-        return StorageObjectStorageSettings{
-            .truncate_on_insert = settings.azure_truncate_on_insert,
-            .create_new_file_on_insert = settings.azure_create_new_file_on_insert,
-            .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
-            .schema_inference_mode = settings.schema_inference_mode,
-            .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
-            .list_object_keys_size = settings.azure_list_object_keys_size,
-            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
-            .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
-        };
-    }
-
-    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_azure";
-
-    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageAzureThreads; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageAzureThreadsActive; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageAzureThreadsScheduled; } /// NOLINT
-};
-
-struct HDFSStorageSettings
-{
-    static StorageObjectStorageSettings create(const Settings & settings)
-    {
-        return StorageObjectStorageSettings{
-            .truncate_on_insert = settings.hdfs_truncate_on_insert,
-            .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
-            .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
-            .schema_inference_mode = settings.schema_inference_mode,
-            .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
-            .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
-            .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
-            .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
-        };
-    }
-
-    static constexpr auto SCHEMA_CACHE_MAX_ELEMENTS_CONFIG_SETTING = "schema_inference_cache_max_elements_for_hdfs";
-
-    /// TODO: s3 -> hdfs
-    static CurrentMetrics::Metric ObjectStorageThreads() { return CurrentMetrics::ObjectStorageS3Threads; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsActive() { return CurrentMetrics::ObjectStorageS3ThreadsActive; } /// NOLINT
-    static CurrentMetrics::Metric ObjectStorageThreadsScheduled() { return CurrentMetrics::ObjectStorageS3ThreadsScheduled; } /// NOLINT
-};
-
-}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 42371764920..62367a6b933 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -103,7 +103,6 @@ void StorageObjectStorageSink::release()
 PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     ObjectStoragePtr object_storage_,
     StorageObjectStorageConfigurationPtr configuration_,
-    const StorageObjectStorageSettings & query_settings_,
     std::optional<FormatSettings> format_settings_,
     const Block & sample_block_,
     ContextPtr context_,
@@ -111,7 +110,7 @@ PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     : PartitionedSink(partition_by, context_, sample_block_)
     , object_storage(object_storage_)
     , configuration(configuration_)
-    , query_settings(query_settings_)
+    , query_settings(configuration_->getQuerySettings(context_))
     , format_settings(format_settings_)
     , sample_block(sample_block_)
     , context(context_)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index 38805332a35..6c2f73e40e3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -1,7 +1,6 @@
 #pragma once
 #include <Storages/PartitionedSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Processors/Formats/IOutputFormat.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
 
@@ -47,7 +46,6 @@ public:
     PartitionedStorageObjectStorageSink(
         ObjectStoragePtr object_storage_,
         StorageObjectStorageConfigurationPtr configuration_,
-        const StorageObjectStorageSettings & query_settings_,
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context_,
@@ -61,7 +59,7 @@ private:
 
     ObjectStoragePtr object_storage;
     StorageObjectStorageConfigurationPtr configuration;
-    const StorageObjectStorageSettings query_settings;
+    const StorageObjectStorage::QuerySettings query_settings;
     const std::optional<FormatSettings> format_settings;
     const Block sample_block;
     const ContextPtr context;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 82824b0e7f7..3101a7ebf51 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -10,7 +10,6 @@
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/parseGlobs.h>
 
@@ -20,6 +19,13 @@ namespace ProfileEvents
     extern const Event EngineFileLikeReadFiles;
 }
 
+namespace CurrentMetrics
+{
+    extern const Metric StorageObjectStorageThreads;
+    extern const Metric StorageObjectStorageThreadsActive;
+    extern const Metric StorageObjectStorageThreadsScheduled;
+}
+
 namespace DB
 {
 
@@ -37,16 +43,12 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     ConfigurationPtr configuration_,
     const ReadFromFormatInfo & info,
     std::optional<FormatSettings> format_settings_,
-    const StorageObjectStorageSettings & query_settings_,
+    const StorageObjectStorage::QuerySettings & query_settings_,
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IIterator> file_iterator_,
     bool need_only_count_,
-    SchemaCache & schema_cache_,
-    std::shared_ptr<ThreadPool> reader_pool_,
-    CurrentMetrics::Metric metric_threads_,
-    CurrentMetrics::Metric metric_threads_active_,
-    CurrentMetrics::Metric metric_threads_scheduled_)
+    SchemaCache & schema_cache_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
     , name(std::move(name_))
@@ -57,13 +59,14 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
     , read_from_format_info(info)
-    , create_reader_pool(reader_pool_)
+    , create_reader_pool(std::make_shared<ThreadPool>(
+        CurrentMetrics::StorageObjectStorageThreads,
+        CurrentMetrics::StorageObjectStorageThreadsActive,
+        CurrentMetrics::StorageObjectStorageThreadsScheduled,
+        1/* max_threads */))
     , columns_desc(info.columns_description)
     , file_iterator(file_iterator_)
     , schema_cache(schema_cache_)
-    , metric_threads(metric_threads_)
-    , metric_threads_active(metric_threads_active_)
-    , metric_threads_scheduled(metric_threads_scheduled_)
     , create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(*create_reader_pool, "Reader"))
 {
 }
@@ -76,26 +79,23 @@ StorageObjectStorageSource::~StorageObjectStorageSource()
 std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
     ConfigurationPtr configuration,
     ObjectStoragePtr object_storage,
-    const StorageObjectStorageSettings & settings,
     bool distributed_processing,
     const ContextPtr & local_context,
     const ActionsDAG::Node * predicate,
     const NamesAndTypesList & virtual_columns,
     ObjectInfos * read_keys,
-    CurrentMetrics::Metric metric_threads_,
-    CurrentMetrics::Metric metric_threads_active_,
-    CurrentMetrics::Metric metric_threads_scheduled_,
     std::function<void(FileProgress)> file_progress_callback)
 {
     if (distributed_processing)
         return std::make_shared<ReadTaskIterator>(
             local_context->getReadTaskCallback(),
-            local_context->getSettingsRef().max_threads,
-            metric_threads_, metric_threads_active_, metric_threads_scheduled_);
+            local_context->getSettingsRef().max_threads);
 
     if (configuration->isNamespaceWithGlobs())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
 
+    auto settings = configuration->getQuerySettings(local_context);
+
     if (configuration->isPathWithGlobs())
     {
         /// Iterate through disclosed globs and make a source for each file
@@ -568,7 +568,8 @@ StorageObjectStorageSource::ReaderHolder::ReaderHolder(
 {
 }
 
-StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept
+StorageObjectStorageSource::ReaderHolder &
+StorageObjectStorageSource::ReaderHolder::operator=(ReaderHolder && other) noexcept
 {
     /// The order of destruction is important.
     /// reader uses pipeline, pipeline uses read_buf.
@@ -581,15 +582,15 @@ StorageObjectStorageSource::ReaderHolder & StorageObjectStorageSource::ReaderHol
 }
 
 StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
-    const ReadTaskCallback & callback_,
-    size_t max_threads_count,
-    CurrentMetrics::Metric metric_threads_,
-    CurrentMetrics::Metric metric_threads_active_,
-    CurrentMetrics::Metric metric_threads_scheduled_)
+    const ReadTaskCallback & callback_, size_t max_threads_count)
     : IIterator("ReadTaskIterator")
     , callback(callback_)
 {
-    ThreadPool pool(metric_threads_, metric_threads_active_, metric_threads_scheduled_, max_threads_count);
+    ThreadPool pool(
+        CurrentMetrics::StorageObjectStorageThreads,
+        CurrentMetrics::StorageObjectStorageThreadsActive,
+        CurrentMetrics::StorageObjectStorageThreadsScheduled, max_threads_count);
+
     auto pool_scheduler = threadPoolCallbackRunnerUnsafe<String>(pool, "ReadTaskIter");
 
     std::vector<std::future<String>> keys;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index f75bfc390bb..3c2cc3f80cd 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -3,7 +3,6 @@
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 #include <Processors/Formats/IInputFormat.h>
 
@@ -28,16 +27,12 @@ public:
         ConfigurationPtr configuration,
         const ReadFromFormatInfo & info,
         std::optional<FormatSettings> format_settings_,
-        const StorageObjectStorageSettings & query_settings_,
+        const StorageObjectStorage::QuerySettings & query_settings_,
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IIterator> file_iterator_,
         bool need_only_count_,
-        SchemaCache & schema_cache_,
-        std::shared_ptr<ThreadPool> reader_pool_,
-        CurrentMetrics::Metric metric_threads_,
-        CurrentMetrics::Metric metric_threads_active_,
-        CurrentMetrics::Metric metric_threads_scheduled_);
+        SchemaCache & schema_cache_);
 
     ~StorageObjectStorageSource() override;
 
@@ -53,15 +48,11 @@ public:
     static std::shared_ptr<IIterator> createFileIterator(
         ConfigurationPtr configuration,
         ObjectStoragePtr object_storage,
-        const StorageObjectStorageSettings & settings,
         bool distributed_processing,
         const ContextPtr & local_context,
         const ActionsDAG::Node * predicate,
         const NamesAndTypesList & virtual_columns,
         ObjectInfos * read_keys,
-        CurrentMetrics::Metric metric_threads_,
-        CurrentMetrics::Metric metric_threads_active_,
-        CurrentMetrics::Metric metric_threads_scheduled_,
         std::function<void(FileProgress)> file_progress_callback = {});
 
 protected:
@@ -69,7 +60,7 @@ protected:
     ObjectStoragePtr object_storage;
     const ConfigurationPtr configuration;
     const std::optional<FormatSettings> format_settings;
-    const StorageObjectStorageSettings query_settings;
+    const StorageObjectStorage::QuerySettings query_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
     const ReadFromFormatInfo read_from_format_info;
@@ -79,10 +70,6 @@ protected:
     SchemaCache & schema_cache;
     bool initialized = false;
 
-    const CurrentMetrics::Metric metric_threads;
-    const CurrentMetrics::Metric metric_threads_active;
-    const CurrentMetrics::Metric metric_threads_scheduled;
-
     size_t total_rows_in_file = 0;
     LoggerPtr log = getLogger("StorageObjectStorageSource");
 
@@ -149,12 +136,7 @@ protected:
 class StorageObjectStorageSource::ReadTaskIterator : public IIterator
 {
 public:
-    ReadTaskIterator(
-        const ReadTaskCallback & callback_,
-        size_t max_threads_count,
-        CurrentMetrics::Metric metric_threads_,
-        CurrentMetrics::Metric metric_threads_active_,
-        CurrentMetrics::Metric metric_threads_scheduled_);
+    ReadTaskIterator(const ReadTaskCallback & callback_, size_t max_threads_count);
 
     size_t estimatedKeysCount() override { return buffer.size(); }
 
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index 9caab709081..94d6dadee3b 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -1,8 +1,6 @@
 #include <Storages/ObjectStorage/Utils.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
-
 
 namespace DB
 {
@@ -15,15 +13,15 @@ namespace ErrorCodes
 std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
     const StorageObjectStorageConfiguration & configuration,
-    const StorageObjectStorageSettings & query_settings,
+    const StorageObjectStorage::QuerySettings & settings,
     const String & key,
     size_t sequence_number)
 {
-    if (query_settings.truncate_on_insert
+    if (settings.truncate_on_insert
         || !object_storage.exists(StoredObject(key)))
         return std::nullopt;
 
-    if (query_settings.create_new_file_on_insert)
+    if (settings.create_new_file_on_insert)
     {
         auto pos = key.find_first_of('.');
         String new_key;
@@ -45,4 +43,38 @@ std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
         configuration.getNamespace(), key);
 }
 
+StorageInMemoryMetadata getStorageMetadata(
+    ObjectStoragePtr object_storage,
+    const StorageObjectStorageConfigurationPtr & configuration,
+    const ColumnsDescription & columns,
+    const ConstraintsDescription & constraints,
+    std::optional<FormatSettings> format_settings,
+    const String & comment,
+    const ContextPtr & context)
+{
+    StorageInMemoryMetadata storage_metadata;
+    if (columns.empty())
+    {
+        auto fetched_columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context);
+        storage_metadata.setColumns(fetched_columns);
+    }
+    else if (!columns.hasOnlyOrdinary())
+    {
+        /// We don't allow special columns.
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Special columns are not supported for {} storage"
+                        "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName());
+    }
+    else
+    {
+        if (configuration->format == "auto")
+            StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context);
+
+        storage_metadata.setColumns(columns);
+    }
+    storage_metadata.setConstraints(constraints);
+    storage_metadata.setComment(comment);
+    return storage_metadata;
+}
+
 }
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
index afc0f31a33f..37bd49a77c0 100644
--- a/src/Storages/ObjectStorage/Utils.h
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -1,18 +1,30 @@
 #pragma once
 #include <Core/Types.h>
+#include "StorageObjectStorage.h"
 
 namespace DB
 {
 
 class IObjectStorage;
 class StorageObjectStorageConfiguration;
+using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
 struct StorageObjectStorageSettings;
 
 std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
     const StorageObjectStorageConfiguration & configuration,
-    const StorageObjectStorageSettings & query_settings,
+    const StorageObjectStorage::QuerySettings & settings,
     const std::string & key,
     size_t sequence_number);
 
+
+StorageInMemoryMetadata getStorageMetadata(
+    ObjectStoragePtr object_storage,
+    const StorageObjectStorageConfigurationPtr & configuration,
+    const ColumnsDescription & columns,
+    const ConstraintsDescription & constraints,
+    std::optional<FormatSettings> format_settings,
+    const String & comment,
+    const ContextPtr & context);
+
 }
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index 3271b766f68..06b8aefb716 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -2,22 +2,23 @@
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Storages/StorageFactory.h>
 #include <Formats/FormatFactory.h>
 
 namespace DB
 {
 
+#if USE_AWS_S3 || USE_AZURE_BLOB_STORAGE || USE_HDFS
+
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
 }
 
-template <typename StorageSettings>
-static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjectStorage(
+static std::shared_ptr<StorageObjectStorage> createStorageObjectStorage(
     const StorageFactory::Arguments & args,
-    typename StorageObjectStorage<StorageSettings>::ConfigurationPtr configuration,
-    const String & engine_name,
+    typename StorageObjectStorage::ConfigurationPtr configuration,
     ContextPtr context)
 {
     auto & engine_args = args.engine_args;
@@ -54,10 +55,9 @@ static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjec
     if (args.storage_def->partition_by)
         partition_by = args.storage_def->partition_by->clone();
 
-    return std::make_shared<StorageObjectStorage<StorageSettings>>(
+    return std::make_shared<StorageObjectStorage>(
         configuration,
         configuration->createObjectStorage(context),
-        engine_name,
         args.getContext(),
         args.table_id,
         args.columns,
@@ -68,6 +68,8 @@ static std::shared_ptr<StorageObjectStorage<StorageSettings>> createStorageObjec
         partition_by);
 }
 
+#endif
+
 #if USE_AZURE_BLOB_STORAGE
 void registerStorageAzure(StorageFactory & factory)
 {
@@ -76,7 +78,7 @@ void registerStorageAzure(StorageFactory & factory)
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
         StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage<AzureStorageSettings>(args, configuration, "Azure", context);
+        return createStorageObjectStorage(args, configuration, context);
     },
     {
         .supports_settings = true,
@@ -95,7 +97,7 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageS3Configuration>();
         StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage<S3StorageSettings>(args, configuration, name, context);
+        return createStorageObjectStorage(args, configuration, context);
     },
     {
         .supports_settings = true,
@@ -130,7 +132,7 @@ void registerStorageHDFS(StorageFactory & factory)
         auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageHDFSConfiguration>();
         StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage<HDFSStorageSettings>(args, configuration, "HDFS", context);
+        return createStorageObjectStorage(args, configuration, context);
     },
     {
         .supports_settings = true,
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index c1b45108b36..5a1f0f6dd04 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -7,7 +7,6 @@
 #include <Storages/S3Queue/S3QueueFilesMetadata.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObjectStorageQuerySettings.h>
 #include <Interpreters/S3QueueLog.h>
 
 
@@ -21,7 +20,7 @@ struct ObjectMetadata;
 class StorageS3QueueSource : public ISource, WithContext
 {
 public:
-    using Storage = StorageObjectStorage<S3StorageSettings>;
+    using Storage = StorageObjectStorage;
 
     using ConfigurationPtr = Storage::ConfigurationPtr;
     using GlobIterator = StorageObjectStorageSource::GlobIterator;
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index c5799d23abd..6b504b0d986 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -37,13 +37,6 @@ namespace ProfileEvents
     extern const Event S3ListObjects;
 }
 
-namespace CurrentMetrics
-{
-    extern const Metric ObjectStorageS3Threads;
-    extern const Metric ObjectStorageS3ThreadsActive;
-    extern const Metric ObjectStorageS3ThreadsScheduled;
-}
-
 namespace DB
 {
 
@@ -151,14 +144,14 @@ StorageS3Queue::StorageS3Queue(
     StorageInMemoryMetadata storage_metadata;
     if (columns_.empty())
     {
-        auto columns = Storage::getTableStructureFromData(object_storage, configuration, format_settings, context_);
+        auto columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context_);
         storage_metadata.setColumns(columns);
     }
     else
     {
         if (configuration->format == "auto")
         {
-            StorageObjectStorage<S3StorageSettings>::setFormatFromData(object_storage, configuration, format_settings, context_);
+            StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_);
         }
         storage_metadata.setColumns(columns_);
     }
@@ -370,26 +363,18 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
     size_t max_block_size,
     ContextPtr local_context)
 {
-    auto threadpool = std::make_shared<ThreadPool>(CurrentMetrics::ObjectStorageS3Threads,
-                                                   CurrentMetrics::ObjectStorageS3ThreadsActive,
-                                                   CurrentMetrics::ObjectStorageS3ThreadsScheduled,
-                                                   /* max_threads */1);
     auto internal_source = std::make_unique<StorageObjectStorageSource>(
         getName(),
         object_storage,
         configuration,
         info,
         format_settings,
-        S3StorageSettings::create(local_context->getSettingsRef()),
+        configuration->getQuerySettings(local_context),
         local_context,
         max_block_size,
         file_iterator,
         false,
-        Storage::getSchemaCache(local_context),
-        threadpool,
-        CurrentMetrics::ObjectStorageS3Threads,
-        CurrentMetrics::ObjectStorageS3ThreadsActive,
-        CurrentMetrics::ObjectStorageS3ThreadsScheduled);
+        StorageObjectStorage::getSchemaCache(local_context, configuration->getTypeName()));
 
     auto file_deleter = [=, this](const std::string & path) mutable
     {
@@ -596,7 +581,7 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
 
 std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
 {
-    auto settings = S3StorageSettings::create(local_context->getSettingsRef());
+    auto settings = configuration->getQuerySettings(local_context);
     auto glob_iterator = std::make_unique<StorageObjectStorageSource::GlobIterator>(
         object_storage, configuration, predicate, getVirtualsList(), local_context, nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match);
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index 72c41a6a694..1464e15ebf2 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -21,8 +21,7 @@ class S3QueueFilesMetadata;
 class StorageS3Queue : public IStorage, WithContext
 {
 public:
-    using Storage = StorageObjectStorage<S3StorageSettings>;
-    using ConfigurationPtr = Storage::ConfigurationPtr;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     StorageS3Queue(
         std::unique_ptr<S3QueueSettings> s3queue_settings_,
diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
index 9ef64f2b90d..a2d3f342a63 100644
--- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
+++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
@@ -9,6 +9,9 @@
 #include <Interpreters/Context.h>
 #include <IO/WriteHelpers.h>
 #include <Formats/ReadSchemaUtils.h>
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
 
 namespace DB
 {
@@ -74,14 +77,14 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C
 {
     fillDataImpl(res_columns, StorageFile::getSchemaCache(context), "File");
 #if USE_AWS_S3
-    fillDataImpl(res_columns, StorageS3::getSchemaCache(context), "S3");
+    fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageS3Configuration::type_name), "S3");
 #endif
 #if USE_HDFS
-    fillDataImpl(res_columns, StorageHDFS::getSchemaCache(context), "HDFS");
+    fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageHDFSConfiguration::type_name), "HDFS");
 #endif
     fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL");
 #if USE_AZURE_BLOB_STORAGE
-    fillDataImpl(res_columns, StorageAzureBlob::getSchemaCache(context), "Azure"); /// FIXME
+    fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureBlobConfiguration::type_name), "Azure");
 #endif
 }
 
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 8cbd855bb96..02c8c623e61 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -39,7 +39,7 @@ protected:
             columns = cached_columns;
 
         StoragePtr storage = Storage::create(
-            configuration, context, "", StorageID(TableFunction::getDatabaseName(), table_name),
+            configuration, context, StorageID(TableFunction::getDatabaseName(), table_name),
             columns, ConstraintsDescription{}, String{}, std::nullopt, LoadingStrictnessLevel::CREATE);
 
         storage->startup();
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index 9223642a7e6..2b5c774ff78 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -27,27 +27,27 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 ObjectStoragePtr TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
+    Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
 {
     if (!object_storage)
         object_storage = configuration->createObjectStorage(context, create_readonly);
     return object_storage;
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 StorageObjectStorageConfigurationPtr TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::getConfiguration() const
+    Definition, Configuration>::getConfiguration() const
 {
     if (!configuration)
         configuration = std::make_shared<Configuration>();
     return configuration;
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 std::vector<size_t> TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
+    Definition, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
 {
     auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
     auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
@@ -63,22 +63,21 @@ std::vector<size_t> TableFunctionObjectStorage<
     return result;
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
+template <typename Definition, typename Configuration>
+void TableFunctionObjectStorage<Definition, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
         ASTs & args, const String & structure, const String & format, const ContextPtr & context)
 {
-    Configuration::addStructureAndFormatToArgs(args, structure, format, context);
+    Configuration().addStructureAndFormatToArgs(args, structure, format, context);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
+template <typename Definition, typename Configuration>
+void TableFunctionObjectStorage<Definition, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
 {
     StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context)
+template <typename Definition, typename Configuration>
+void TableFunctionObjectStorage<Definition, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
     /// Clone ast function, because we can modify its arguments like removing headers.
     auto ast_copy = ast_function->clone();
@@ -90,38 +89,38 @@ void TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::par
     parseArgumentsImpl(args, context);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 ColumnsDescription TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
+    Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
 {
     chassert(configuration);
     if (configuration->structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
         auto storage = getObjectStorage(context, !is_insert_query);
-        return StorageObjectStorage<StorageSettings>::getTableStructureFromData(storage, configuration, std::nullopt, context);
+        return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context);
     }
 
     return parseColumnsListFromString(configuration->structure, context);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 bool TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
+    Definition, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
 {
     chassert(configuration);
     return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 std::unordered_set<String> TableFunctionObjectStorage<
-    Definition, StorageSettings, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
+    Definition, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
 {
     return VirtualColumnUtils::getVirtualNamesForFileLikeStorage();
 }
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration>::executeImpl(
+template <typename Definition, typename Configuration>
+StoragePtr TableFunctionObjectStorage<Definition, Configuration>::executeImpl(
     const ASTPtr & /* ast_function */,
     ContextPtr context,
     const std::string & table_name,
@@ -137,10 +136,9 @@ StoragePtr TableFunctionObjectStorage<Definition, StorageSettings, Configuration
     else if (!cached_columns.empty())
         columns = cached_columns;
 
-    StoragePtr storage = std::make_shared<StorageObjectStorage<StorageSettings>>(
+    StoragePtr storage = std::make_shared<StorageObjectStorage>(
         configuration,
         getObjectStorage(context, !is_insert_query),
-        Definition::storage_type_name,
         context,
         StorageID(getDatabaseName(), table_name),
         columns,
@@ -159,7 +157,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 {
     UNUSED(factory);
 #if USE_AWS_S3
-    factory.registerFunction<TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>>(
+    factory.registerFunction<TableFunctionObjectStorage<S3Definition, StorageS3Configuration>>(
     {
         .documentation =
         {
@@ -170,7 +168,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
         .allow_readonly = false
     });
 
-    factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>>(
+    factory.registerFunction<TableFunctionObjectStorage<GCSDefinition, StorageS3Configuration>>(
     {
         .documentation =
         {
@@ -181,7 +179,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
         .allow_readonly = false
     });
 
-    factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>>(
+    factory.registerFunction<TableFunctionObjectStorage<COSNDefinition, StorageS3Configuration>>(
     {
         .documentation =
         {
@@ -191,7 +189,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
         .categories{"DataLake"}},
         .allow_readonly = false
     });
-    factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>>(
+    factory.registerFunction<TableFunctionObjectStorage<OSSDefinition, StorageS3Configuration>>(
     {
         .documentation =
         {
@@ -204,7 +202,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-    factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>>(
+    factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>>(
     {
         .documentation =
         {
@@ -220,7 +218,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
     });
 #endif
 #if USE_HDFS
-    factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>>(
+    factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>>(
     {
         .allow_readonly = false
     });
@@ -228,21 +226,21 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 }
 
 #if USE_AZURE_BLOB_STORAGE
-template class TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
-template class TableFunctionObjectStorage<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorage<AzureClusterDefinition, StorageAzureBlobConfiguration>;
 #endif
 
 #if USE_AWS_S3
-template class TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>;
-template class TableFunctionObjectStorage<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
-template class TableFunctionObjectStorage<GCSDefinition, S3StorageSettings, StorageS3Configuration>;
-template class TableFunctionObjectStorage<COSNDefinition, S3StorageSettings, StorageS3Configuration>;
-template class TableFunctionObjectStorage<OSSDefinition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorage<S3Definition, StorageS3Configuration>;
+template class TableFunctionObjectStorage<S3ClusterDefinition, StorageS3Configuration>;
+template class TableFunctionObjectStorage<GCSDefinition, StorageS3Configuration>;
+template class TableFunctionObjectStorage<COSNDefinition, StorageS3Configuration>;
+template class TableFunctionObjectStorage<OSSDefinition, StorageS3Configuration>;
 #endif
 
 #if USE_HDFS
-template class TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
-template class TableFunctionObjectStorage<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+template class TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
+template class TableFunctionObjectStorage<HDFSClusterDefinition, StorageHDFSConfiguration>;
 #endif
 
 }
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
index 9022f6e577f..bd43cae3697 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.h
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -85,7 +85,7 @@ struct HDFSDefinition
                                       " - uri, format, structure, compression_method\n";
 };
 
-template <typename Definition, typename StorageSettings, typename Configuration>
+template <typename Definition, typename Configuration>
 class TableFunctionObjectStorage : public ITableFunction
 {
 public:
@@ -142,14 +142,14 @@ protected:
 };
 
 #if USE_AWS_S3
-using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, S3StorageSettings, StorageS3Configuration>;
+using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, StorageS3Configuration>;
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
 #endif
 
 #if USE_HDFS
-using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+using TableFunctionHDFS = TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>;
 #endif
 }
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index 909ace788eb..ce78076dd21 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -14,8 +14,8 @@
 namespace DB
 {
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Configuration>::executeImpl(
+template <typename Definition, typename Configuration>
+StoragePtr TableFunctionObjectStorageCluster<Definition, Configuration>::executeImpl(
     const ASTPtr & /*function*/, ContextPtr context,
     const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const
 {
@@ -34,10 +34,9 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
     if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
     {
         /// On worker node this filename won't contains globs
-        storage = std::make_shared<StorageObjectStorage<StorageSettings>>(
+        storage = std::make_shared<StorageObjectStorage>(
             configuration,
             object_storage,
-            Definition::storage_type_name,
             context,
             StorageID(Base::getDatabaseName(), table_name),
             columns,
@@ -49,11 +48,10 @@ StoragePtr TableFunctionObjectStorageCluster<Definition, StorageSettings, Config
     }
     else
     {
-        storage = std::make_shared<StorageObjectStorageCluster<Definition, StorageSettings, Configuration>>(
+        storage = std::make_shared<StorageObjectStorageCluster>(
             ITableFunctionCluster<Base>::cluster_name,
             configuration,
             object_storage,
-            Definition::storage_type_name,
             StorageID(Base::getDatabaseName(), table_name),
             columns,
             ConstraintsDescription{},
@@ -107,14 +105,14 @@ void registerTableFunctionObjectStorageCluster(TableFunctionFactory & factory)
 }
 
 #if USE_AWS_S3
-template class TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+template class TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3Configuration>;
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-template class TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
 #endif
 
 #if USE_HDFS
-template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+template class TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
 #endif
 }
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h
index 21c2f8995dc..a8bc11b5e40 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.h
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h
@@ -56,8 +56,8 @@ struct HDFSClusterDefinition
                                       " - cluster_name, uri, format, structure, compression_method\n";
 };
 
-template <typename Definition, typename StorageSettings, typename Configuration>
-class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, StorageSettings, Configuration>>
+template <typename Definition, typename Configuration>
+class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, Configuration>>
 {
 public:
     static constexpr auto name = Definition::name;
@@ -67,7 +67,7 @@ public:
     String getSignature() const override { return signature; }
 
 protected:
-    using Base = TableFunctionObjectStorage<Definition, StorageSettings, Configuration>;
+    using Base = TableFunctionObjectStorage<Definition, Configuration>;
 
     StoragePtr executeImpl(
         const ASTPtr & ast_function,
@@ -86,14 +86,14 @@ protected:
 };
 
 #if USE_AWS_S3
-using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefinition, S3StorageSettings, StorageS3Configuration>;
+using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3Configuration>;
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, AzureStorageSettings, StorageAzureBlobConfiguration>;
+using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
 #endif
 
 #if USE_HDFS
-using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, HDFSStorageSettings, StorageHDFSConfiguration>;
+using TableFunctionHDFSCluster = TableFunctionObjectStorageCluster<HDFSClusterDefinition, StorageHDFSConfiguration>;
 #endif
 }

From 9eb9a76592dada103c40baa2c4acf5a3918b8e95 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 22 Apr 2024 14:18:46 +0100
Subject: [PATCH 079/392] Fix

---
 .../ObjectStorage/AzureBlob/Configuration.cpp |  1 +
 .../DataLakes/IStorageDataLake.h              |  2 +-
 .../ObjectStorage/HDFS/Configuration.cpp      |  1 +
 .../ObjectStorage/S3/Configuration.cpp        |  1 +
 .../ObjectStorage/StorageObjectStorage.cpp    | 47 +++++++++++--------
 .../ObjectStorage/StorageObjectStorage.h      | 10 +++-
 .../StorageObjectStorageCluster.cpp           |  9 ++--
 .../StorageObjectStorageConfiguration.cpp     |  5 ++
 .../StorageObjectStorageConfiguration.h       |  2 +-
 src/Storages/ObjectStorage/Utils.cpp          | 33 ++++++-------
 src/Storages/ObjectStorage/Utils.h            | 10 ++--
 src/Storages/S3Queue/StorageS3Queue.cpp       | 21 +++------
 .../TableFunctionObjectStorage.cpp            |  5 +-
 13 files changed, 80 insertions(+), 67 deletions(-)

diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
index c9bc59d62aa..f268b812c03 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
@@ -77,6 +77,7 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const
         url_to_check = Poco::URI(connection_url);
 
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check);
+    StorageObjectStorageConfiguration::check(context);
 }
 
 StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other)
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index e1851775925..144cc16939c 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -89,7 +89,7 @@ public:
         {
             ConfigurationPtr configuration = base_configuration->clone();
             configuration->getPaths() = metadata->getDataFiles();
-            return Storage::getTableStructureFromData(
+            return Storage::resolveSchemaFromData(
                 object_storage_, configuration, format_settings_, local_context);
         }
     }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 0062ac969ac..12e3f3adb12 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -34,6 +34,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(url));
     checkHDFSURL(fs::path(url) / path.substr(1));
+    StorageObjectStorageConfiguration::check(context);
 }
 
 ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 139d9004f8e..bfd61c647f8 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -54,6 +54,7 @@ void StorageS3Configuration::check(ContextPtr context) const
     validateNamespace(url.bucket);
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri);
     context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast);
+    StorageObjectStorageConfiguration::check(context);
 }
 
 void StorageS3Configuration::validateNamespace(const String & name) const
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 441639629a3..36a8beba41a 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -47,17 +47,19 @@ StorageObjectStorage::StorageObjectStorage(
     , distributed_processing(distributed_processing_)
     , log(getLogger(fmt::format("Storage{}({})", configuration->getEngineName(), table_id_.getFullTableName())))
 {
-    FormatFactory::instance().checkFormatName(configuration->format);
+    ColumnsDescription columns{columns_};
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context);
     configuration->check(context);
 
+    StorageInMemoryMetadata metadata;
+    metadata.setColumns(columns);
+    metadata.setConstraints(constraints_);
+    metadata.setComment(comment);
+
     StoredObjects objects;
     for (const auto & key : configuration->getPaths())
         objects.emplace_back(key);
 
-    auto metadata = getStorageMetadata(
-        object_storage_, configuration_, columns_,
-        constraints_, format_settings_, comment, context);
-
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
     setInMemoryMetadata(std::move(metadata));
 }
@@ -224,7 +226,7 @@ std::unique_ptr<ReadBufferIterator> StorageObjectStorage::createReadBufferIterat
         format_settings, getSchemaCache(context, configuration->getTypeName()), read_keys, context);
 }
 
-ColumnsDescription StorageObjectStorage::getTableStructureFromData(
+ColumnsDescription StorageObjectStorage::resolveSchemaFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
@@ -233,20 +235,11 @@ ColumnsDescription StorageObjectStorage::getTableStructureFromData(
     ObjectInfos read_keys;
     auto read_buffer_iterator = createReadBufferIterator(
         object_storage, configuration, format_settings, read_keys, context);
-
-    if (configuration->format == "auto")
-    {
-        auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
-        configuration->format = format;
-        return columns;
-    }
-    else
-    {
-        return readSchemaFromFormat(configuration->format, format_settings, *read_buffer_iterator, context);
-    }
+    return readSchemaFromFormat(
+        configuration->format, format_settings, *read_buffer_iterator, context);
 }
 
-void StorageObjectStorage::setFormatFromData(
+std::string StorageObjectStorage::resolveFormatFromData(
     const ObjectStoragePtr & object_storage,
     const ConfigurationPtr & configuration,
     const std::optional<FormatSettings> & format_settings,
@@ -255,7 +248,23 @@ void StorageObjectStorage::setFormatFromData(
     ObjectInfos read_keys;
     auto read_buffer_iterator = createReadBufferIterator(
         object_storage, configuration, format_settings, read_keys, context);
-    configuration->format = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context).second;
+    return detectFormatAndReadSchema(
+        format_settings, *read_buffer_iterator, context).second;
+}
+
+std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAndFormatFromData(
+    const ObjectStoragePtr & object_storage,
+    const ConfigurationPtr & configuration,
+    const std::optional<FormatSettings> & format_settings,
+    const ContextPtr & context)
+{
+    ObjectInfos read_keys;
+    auto read_buffer_iterator = createReadBufferIterator(
+        object_storage, configuration, format_settings, read_keys, context);
+
+    auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
+    configuration->format = format;
+    return std::pair(columns, format);
 }
 
 SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 3dbe010e406..d46a875bf42 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -95,13 +95,19 @@ public:
 
     static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name);
 
-    static ColumnsDescription getTableStructureFromData(
+    static ColumnsDescription resolveSchemaFromData(
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
         const ContextPtr & context);
 
-    static void setFormatFromData(
+    static std::string resolveFormatFromData(
+        const ObjectStoragePtr & object_storage,
+        const ConfigurationPtr & configuration,
+        const std::optional<FormatSettings> & format_settings,
+        const ContextPtr & context);
+
+    static std::pair<ColumnsDescription, std::string> resolveSchemaAndFormatFromData(
         const ObjectStoragePtr & object_storage,
         const ConfigurationPtr & configuration,
         const std::optional<FormatSettings> & format_settings,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 72a35ae33eb..f98fc32a3cc 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -38,10 +38,13 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     , configuration{configuration_}
     , object_storage(object_storage_)
 {
+    ColumnsDescription columns{columns_};
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, {}, context_);
     configuration->check(context_);
-    auto metadata = getStorageMetadata(
-        object_storage, configuration, columns_, constraints_,
-        {}/* format_settings */, ""/* comment */, context_);
+
+    StorageInMemoryMetadata metadata;
+    metadata.setColumns(columns);
+    metadata.setConstraints(constraints_);
 
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
     setInMemoryMetadata(std::move(metadata));
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 61e569cee05..3635269db34 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -30,6 +30,11 @@ void StorageObjectStorageConfiguration::initialize(
     configuration.initialized = true;
 }
 
+void StorageObjectStorageConfiguration::check(ContextPtr) const
+{
+    FormatFactory::instance().checkFormatName(format);
+}
+
 StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other)
 {
     format = other.format;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
index 34965174bf9..c55362aa8bd 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
@@ -50,7 +50,7 @@ public:
     bool isNamespaceWithGlobs() const;
     virtual std::string getPathWithoutGlob() const;
 
-    virtual void check(ContextPtr context) const = 0;
+    virtual void check(ContextPtr context) const;
     virtual void validateNamespace(const String & /* name */) const {}
 
     virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index 94d6dadee3b..2a7236ab196 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -12,7 +12,7 @@ namespace ErrorCodes
 
 std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorage::Configuration & configuration,
     const StorageObjectStorage::QuerySettings & settings,
     const String & key,
     size_t sequence_number)
@@ -43,38 +43,33 @@ std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
         configuration.getNamespace(), key);
 }
 
-StorageInMemoryMetadata getStorageMetadata(
+void resolveSchemaAndFormat(
+    ColumnsDescription & columns,
+    std::string & format,
     ObjectStoragePtr object_storage,
     const StorageObjectStorageConfigurationPtr & configuration,
-    const ColumnsDescription & columns,
-    const ConstraintsDescription & constraints,
     std::optional<FormatSettings> format_settings,
-    const String & comment,
     const ContextPtr & context)
 {
-    StorageInMemoryMetadata storage_metadata;
     if (columns.empty())
     {
-        auto fetched_columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context);
-        storage_metadata.setColumns(fetched_columns);
+        if (format == "auto")
+            std::tie(columns, format) = StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context);
+        else
+            columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context);
     }
-    else if (!columns.hasOnlyOrdinary())
+    else if (format == "auto")
+    {
+        format = StorageObjectStorage::resolveFormatFromData(object_storage, configuration, format_settings, context);
+    }
+
+    if (!columns.hasOnlyOrdinary())
     {
         /// We don't allow special columns.
         throw Exception(ErrorCodes::BAD_ARGUMENTS,
                         "Special columns are not supported for {} storage"
                         "like MATERIALIZED, ALIAS or EPHEMERAL", configuration->getTypeName());
     }
-    else
-    {
-        if (configuration->format == "auto")
-            StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context);
-
-        storage_metadata.setColumns(columns);
-    }
-    storage_metadata.setConstraints(constraints);
-    storage_metadata.setComment(comment);
-    return storage_metadata;
 }
 
 }
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
index 37bd49a77c0..3a752e6b8f0 100644
--- a/src/Storages/ObjectStorage/Utils.h
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -12,19 +12,17 @@ struct StorageObjectStorageSettings;
 
 std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorage::Configuration & configuration,
     const StorageObjectStorage::QuerySettings & settings,
     const std::string & key,
     size_t sequence_number);
 
-
-StorageInMemoryMetadata getStorageMetadata(
+void resolveSchemaAndFormat(
+    ColumnsDescription & columns,
+    std::string & format,
     ObjectStoragePtr object_storage,
     const StorageObjectStorageConfigurationPtr & configuration,
-    const ColumnsDescription & columns,
-    const ConstraintsDescription & constraints,
     std::optional<FormatSettings> format_settings,
-    const String & comment,
     const ContextPtr & context);
 
 }
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 6b504b0d986..229c40396c5 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -25,6 +25,7 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/Utils.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 #include <filesystem>
@@ -141,24 +142,14 @@ StorageS3Queue::StorageS3Queue(
     FormatFactory::instance().checkFormatName(configuration->format);
     configuration->check(context_);
 
-    StorageInMemoryMetadata storage_metadata;
-    if (columns_.empty())
-    {
-        auto columns = StorageObjectStorage::getTableStructureFromData(object_storage, configuration, format_settings, context_);
-        storage_metadata.setColumns(columns);
-    }
-    else
-    {
-        if (configuration->format == "auto")
-        {
-            StorageObjectStorage::setFormatFromData(object_storage, configuration, format_settings, context_);
-        }
-        storage_metadata.setColumns(columns_);
-    }
+    ColumnsDescription columns{columns_};
+    resolveSchemaAndFormat(columns, configuration->format, object_storage, configuration, format_settings, context_);
+    configuration->check(context_);
 
+    StorageInMemoryMetadata storage_metadata;
+    storage_metadata.setColumns(columns);
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index 2b5c774ff78..06676a8adfa 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -11,6 +11,7 @@
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 #include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Utils.h>
 #include <Storages/NamedCollectionsHelpers.h>
 #include <Storages/VirtualColumnUtils.h>
 #include <Analyzer/TableFunctionNode.h>
@@ -98,7 +99,9 @@ ColumnsDescription TableFunctionObjectStorage<
     {
         context->checkAccess(getSourceAccessType());
         auto storage = getObjectStorage(context, !is_insert_query);
-        return StorageObjectStorage::getTableStructureFromData(storage, configuration, std::nullopt, context);
+        ColumnsDescription columns;
+        resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context);
+        return columns;
     }
 
     return parseColumnsListFromString(configuration->structure, context);

From 16bc8aa0b1a68bd2422026ea7205a3746029e86b Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 25 Apr 2024 16:08:13 +0200
Subject: [PATCH 080/392] Fxi

---
 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
index 3635269db34..89c15085274 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
@@ -26,7 +26,6 @@ void StorageObjectStorageConfiguration::initialize(
     else
         FormatFactory::instance().checkFormatName(configuration.format);
 
-    configuration.check(local_context);
     configuration.initialized = true;
 }
 

From 193ff63f87a2cef958983b2ef106a7c52f6db8be Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 25 Apr 2024 22:44:12 +0200
Subject: [PATCH 081/392] Fix

---
 .../ObjectStorage/StorageObjectStorage.cpp    | 37 ++++++++++++++-----
 src/Storages/S3Queue/StorageS3Queue.cpp       |  1 +
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 36a8beba41a..f5bfb9d2a65 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -269,20 +269,37 @@ std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAn
 
 SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context)
 {
-    static SchemaCache schema_cache(
-        context->getConfigRef().getUInt(
-            "schema_inference_cache_max_elements_for_" + configuration->getTypeName(),
-            DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
+    return getSchemaCache(context, configuration->getTypeName());
 }
 
 SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name)
 {
-    static SchemaCache schema_cache(
-        context->getConfigRef().getUInt(
-            "schema_inference_cache_max_elements_for_" + storage_type_name,
-            DEFAULT_SCHEMA_CACHE_ELEMENTS));
-    return schema_cache;
+    if (storage_type_name == "s3")
+    {
+        static SchemaCache schema_cache(
+            context->getConfigRef().getUInt(
+                "schema_inference_cache_max_elements_for_s3",
+                DEFAULT_SCHEMA_CACHE_ELEMENTS));
+        return schema_cache;
+    }
+    else if (storage_type_name == "hdfs")
+    {
+        static SchemaCache schema_cache(
+            context->getConfigRef().getUInt(
+                "schema_inference_cache_max_elements_for_hdfs",
+                DEFAULT_SCHEMA_CACHE_ELEMENTS));
+        return schema_cache;
+    }
+    else if (storage_type_name == "azure")
+    {
+        static SchemaCache schema_cache(
+            context->getConfigRef().getUInt(
+                "schema_inference_cache_max_elements_for_azure",
+                DEFAULT_SCHEMA_CACHE_ELEMENTS));
+        return schema_cache;
+    }
+    else
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name);
 }
 
 }
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 229c40396c5..e84dabecf3b 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -151,6 +151,7 @@ StorageS3Queue::StorageS3Queue(
     storage_metadata.setConstraints(constraints_);
     storage_metadata.setComment(comment);
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
+    setInMemoryMetadata(storage_metadata);
 
     LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
     task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });

From 69a3aa7bcf0e7a2d311a076493715cf3b1b3a418 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 26 Apr 2024 11:01:32 +0000
Subject: [PATCH 082/392] Implement Dynamic data type

---
 docs/en/sql-reference/data-types/dynamic.md   | 157 ++++
 src/Columns/ColumnArray.cpp                   |  10 +
 src/Columns/ColumnArray.h                     |   3 +
 src/Columns/ColumnCompressed.h                |   3 +
 src/Columns/ColumnConst.cpp                   |   9 +
 src/Columns/ColumnConst.h                     |   4 +
 src/Columns/ColumnDynamic.cpp                 | 785 ++++++++++++++++++
 src/Columns/ColumnDynamic.h                   | 363 ++++++++
 src/Columns/ColumnMap.cpp                     |   9 +
 src/Columns/ColumnMap.h                       |   3 +
 src/Columns/ColumnNullable.cpp                |   9 +
 src/Columns/ColumnNullable.h                  |   3 +
 src/Columns/ColumnSparse.cpp                  |   9 +
 src/Columns/ColumnSparse.h                    |   3 +
 src/Columns/ColumnTuple.cpp                   |  28 +
 src/Columns/ColumnTuple.h                     |   3 +
 src/Columns/ColumnVariant.cpp                 | 185 ++++-
 src/Columns/ColumnVariant.h                   |  34 +-
 src/Columns/IColumn.cpp                       |   2 +
 src/Columns/IColumn.h                         |   2 +
 src/Columns/tests/gtest_column_dynamic.cpp    | 652 +++++++++++++++
 src/Core/Settings.h                           |   2 +
 src/Core/TypeId.h                             |   1 +
 src/DataTypes/DataTypeArray.cpp               |  21 +
 src/DataTypes/DataTypeArray.h                 |   7 +-
 src/DataTypes/DataTypeDynamic.cpp             | 144 ++++
 src/DataTypes/DataTypeDynamic.h               |  53 ++
 src/DataTypes/DataTypeFactory.cpp             |   1 +
 src/DataTypes/DataTypeFactory.h               |   1 +
 src/DataTypes/DataTypeMap.h                   |   2 +-
 src/DataTypes/DataTypeObject.h                |   2 +-
 src/DataTypes/DataTypeTuple.cpp               |   4 +-
 src/DataTypes/DataTypeTuple.h                 |   2 +-
 src/DataTypes/DataTypeVariant.cpp             |  23 +-
 src/DataTypes/DataTypeVariant.h               |   4 +-
 src/DataTypes/IDataType.cpp                   |  71 +-
 src/DataTypes/IDataType.h                     |  30 +-
 src/DataTypes/ObjectUtils.cpp                 |  12 +-
 src/DataTypes/ObjectUtils.h                   |   4 +-
 .../Serializations/ISerialization.cpp         |  19 +
 src/DataTypes/Serializations/ISerialization.h |  21 +-
 .../Serializations/SerializationArray.cpp     |   5 +-
 .../Serializations/SerializationArray.h       |   4 +-
 .../Serializations/SerializationDynamic.cpp   | 645 ++++++++++++++
 .../Serializations/SerializationDynamic.h     | 116 +++
 .../SerializationDynamicElement.cpp           |  99 +++
 .../SerializationDynamicElement.h             |  58 ++
 .../Serializations/SerializationInterval.cpp  |   4 +-
 .../Serializations/SerializationInterval.h    |   5 +-
 .../SerializationLowCardinality.cpp           |   3 +-
 .../SerializationLowCardinality.h             |   3 +-
 .../Serializations/SerializationMap.cpp       |   5 +-
 .../Serializations/SerializationMap.h         |   3 +-
 .../Serializations/SerializationNamed.cpp     |   5 +-
 .../Serializations/SerializationNamed.h       |   3 +-
 .../Serializations/SerializationNullable.cpp  |   5 +-
 .../Serializations/SerializationNullable.h    |   3 +-
 .../Serializations/SerializationObject.cpp    |   5 +-
 .../Serializations/SerializationObject.h      |   3 +-
 .../Serializations/SerializationSparse.cpp    |   7 +-
 .../Serializations/SerializationSparse.h      |   3 +-
 .../Serializations/SerializationTuple.cpp     |   5 +-
 .../Serializations/SerializationTuple.h       |   3 +-
 .../Serializations/SerializationVariant.cpp   |   5 +-
 .../Serializations/SerializationVariant.h     |   3 +-
 .../SerializationVariantElement.cpp           |  28 +-
 .../SerializationVariantElement.h             |  14 +-
 .../Serializations/SerializationWrapper.cpp   |   5 +-
 .../Serializations/SerializationWrapper.h     |   3 +-
 .../tests/gtest_object_serialization.cpp      |   2 +-
 src/DataTypes/Utils.cpp                       |   1 +
 src/Databases/DatabaseReplicated.cpp          |   1 +
 src/Formats/FormatSettings.h                  |   6 +-
 src/Formats/NativeReader.cpp                  |   2 +-
 src/Functions/FunctionsConversion.cpp         | 356 +++++++-
 src/Functions/dynamicElement.cpp              | 172 ++++
 src/Functions/dynamicType.cpp                 | 104 +++
 src/Functions/if.cpp                          |  11 +
 src/Functions/isNotNull.cpp                   |   6 +-
 src/Functions/isNull.cpp                      |   6 +-
 src/Functions/variantElement.cpp              |  52 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   2 +-
 src/Interpreters/InterpreterInsertQuery.cpp   |   2 +-
 src/Interpreters/TreeRewriter.cpp             |  34 +-
 src/Interpreters/convertFieldToType.cpp       |  11 +-
 .../parseColumnsListForTableFunction.cpp      |  14 +-
 .../parseColumnsListForTableFunction.h        |   2 +
 src/Parsers/ParserDataType.cpp                |  46 +-
 src/Processors/Formats/IOutputFormat.h        |   3 +-
 .../Algorithms/AggregatingSortedAlgorithm.cpp |  39 +-
 .../Algorithms/AggregatingSortedAlgorithm.h   |   3 +-
 .../Algorithms/CollapsingSortedAlgorithm.cpp  |  19 +-
 .../Algorithms/CollapsingSortedAlgorithm.h    |   2 -
 .../GraphiteRollupSortedAlgorithm.cpp         |  28 +-
 .../GraphiteRollupSortedAlgorithm.h           |   4 +-
 .../IMergingAlgorithmWithDelayedChunk.h       |   2 +-
 .../IMergingAlgorithmWithSharedChunks.cpp     |   5 +-
 .../IMergingAlgorithmWithSharedChunks.h       |   6 +-
 src/Processors/Merges/Algorithms/MergedData.h |  42 +-
 .../Algorithms/MergingSortedAlgorithm.cpp     |   3 +-
 .../Algorithms/ReplacingSortedAlgorithm.cpp   |  17 +-
 .../Algorithms/ReplacingSortedAlgorithm.h     |   2 -
 .../Algorithms/SummingSortedAlgorithm.cpp     |  76 +-
 .../Algorithms/SummingSortedAlgorithm.h       |   4 +-
 .../VersionedCollapsingAlgorithm.cpp          |  15 +-
 .../Algorithms/VersionedCollapsingAlgorithm.h |   2 -
 .../Transforms/ColumnGathererTransform.cpp    |  34 +-
 src/Storages/AlterCommands.cpp                |   6 +-
 src/Storages/ColumnsDescription.cpp           |  36 +-
 src/Storages/HDFS/StorageHDFS.h               |   2 +
 src/Storages/HDFS/StorageHDFSCluster.h        |   2 +
 src/Storages/IStorage.h                       |   4 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  30 +
 src/Storages/MergeTree/IMergeTreeDataPart.h   |   2 +
 src/Storages/MergeTree/MergeTreeData.cpp      |   2 +-
 src/Storages/MergeTree/MergeTreeData.h        |   1 +
 .../MergeTreeDataPartWriterCompact.cpp        |  17 +-
 .../MergeTreeDataPartWriterCompact.h          |   4 +-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp |  54 +-
 .../MergeTree/MergeTreeDataPartWriterWide.h   |  14 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |   2 +-
 src/Storages/MergeTree/MergeTreeIndexSet.cpp  |   2 +-
 .../MergeTree/MergeTreeReaderCompact.cpp      |   5 +-
 .../MergeTree/MergeTreeReaderWide.cpp         | 107 ++-
 src/Storages/MergeTree/MergeTreeReaderWide.h  |  38 +-
 src/Storages/MergeTree/MergeTreeSettings.h    |   1 +
 src/Storages/MergeTree/MutateTask.cpp         |  16 +-
 .../MergeTree/StorageFromMergeTreeDataPart.h  |   1 +
 src/Storages/S3Queue/StorageS3Queue.h         |   1 +
 src/Storages/StorageAzureBlob.h               |   2 +
 src/Storages/StorageAzureBlobCluster.h        |   2 +
 src/Storages/StorageBuffer.h                  |   2 +
 src/Storages/StorageDistributed.cpp           |   2 +-
 src/Storages/StorageDistributed.h             |   1 +
 src/Storages/StorageDummy.h                   |   1 +
 src/Storages/StorageFile.h                    |   2 +
 src/Storages/StorageFileCluster.h             |   2 +
 src/Storages/StorageInMemoryMetadata.cpp      |   6 +-
 src/Storages/StorageLog.cpp                   |   2 +-
 src/Storages/StorageMaterializedView.h        |   1 +
 src/Storages/StorageMemory.h                  |   1 +
 src/Storages/StorageMerge.h                   |   1 +
 src/Storages/StorageNull.h                    |   2 +
 src/Storages/StorageS3.h                      |   2 +
 src/Storages/StorageS3Cluster.h               |   2 +
 src/Storages/StorageSnapshot.cpp              |   2 +-
 src/Storages/StorageURL.h                     |   2 +
 src/Storages/StorageURLCluster.h              |   2 +
 src/Storages/getStructureOfRemoteTable.cpp    |   2 +-
 .../02943_variant_read_subcolumns.sh          |   2 +-
 ...03033_dynamic_text_serialization.reference |  55 ++
 .../03033_dynamic_text_serialization.sql      |  74 ++
 .../03034_dynamic_conversions.reference       |  63 ++
 .../0_stateless/03034_dynamic_conversions.sql |  24 +
 .../03035_dynamic_sorting.reference           | 299 +++++++
 .../0_stateless/03035_dynamic_sorting.sql     |  80 ++
 .../03036_dynamic_read_subcolumns.reference   |  57 ++
 .../03036_dynamic_read_subcolumns.sh          |  62 ++
 .../03037_dynamic_merges_1.reference          | 120 +++
 .../0_stateless/03037_dynamic_merges_1.sh     |  61 ++
 .../0_stateless/03037_dynamic_merges_2.sh     |  45 +
 .../03038_nested_dynamic_merges.reference     |  92 ++
 .../03038_nested_dynamic_merges.sh            |  53 ++
 ...9_dynamic_all_merge_algorithms_1.reference |  88 ++
 .../03039_dynamic_all_merge_algorithms_1.sh   |  65 ++
 ...9_dynamic_all_merge_algorithms_2.reference |  44 +
 .../03039_dynamic_all_merge_algorithms_2.sh   |  50 ++
 .../03040_dynamic_type_alters.reference       | 526 ++++++++++++
 .../0_stateless/03040_dynamic_type_alters.sh  |  76 ++
 169 files changed, 6770 insertions(+), 438 deletions(-)
 create mode 100644 docs/en/sql-reference/data-types/dynamic.md
 create mode 100644 src/Columns/ColumnDynamic.cpp
 create mode 100644 src/Columns/ColumnDynamic.h
 create mode 100644 src/Columns/tests/gtest_column_dynamic.cpp
 create mode 100644 src/DataTypes/DataTypeDynamic.cpp
 create mode 100644 src/DataTypes/DataTypeDynamic.h
 create mode 100644 src/DataTypes/Serializations/SerializationDynamic.cpp
 create mode 100644 src/DataTypes/Serializations/SerializationDynamic.h
 create mode 100644 src/DataTypes/Serializations/SerializationDynamicElement.cpp
 create mode 100644 src/DataTypes/Serializations/SerializationDynamicElement.h
 create mode 100644 src/Functions/dynamicElement.cpp
 create mode 100644 src/Functions/dynamicType.cpp
 create mode 100644 tests/queries/0_stateless/03033_dynamic_text_serialization.reference
 create mode 100644 tests/queries/0_stateless/03033_dynamic_text_serialization.sql
 create mode 100644 tests/queries/0_stateless/03034_dynamic_conversions.reference
 create mode 100644 tests/queries/0_stateless/03034_dynamic_conversions.sql
 create mode 100644 tests/queries/0_stateless/03035_dynamic_sorting.reference
 create mode 100644 tests/queries/0_stateless/03035_dynamic_sorting.sql
 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference
 create mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh
 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1.reference
 create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1.sh
 create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_2.sh
 create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges.reference
 create mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
 create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters.reference
 create mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters.sh

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
new file mode 100644
index 00000000000..e20bdad1e79
--- /dev/null
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -0,0 +1,157 @@
+---
+slug: /en/sql-reference/data-types/dynamic
+sidebar_position: 56
+sidebar_label: Dynamic
+---
+
+# Dynamic
+
+This type allows to store values of any type inside it without knowing all of them in advance.
+
+To declare a column of `Dynamic` type, use the following syntax:
+
+``` sql
+<column_name> Dynamic(max_types=N)
+```
+
+Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic`. If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
+
+:::note
+The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
+:::
+
+## Creating Dynamic
+
+Using `Dynamic` type in table column definition:
+
+```sql
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT d, dynamicType(d) FROM test;
+```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┐
+│ ᴺᵁᴸᴸ          │ None           │
+│ 42            │ Int64          │
+│ Hello, World! │ String         │
+│ [1,2,3]       │ Array(Int64)   │
+└───────────────┴────────────────┘
+```
+
+Using CAST from ordinary column:
+
+```sql
+SELECT 'Hello, World!'::Dynamic as d, dynamicType(d);
+```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┐
+│ Hello, World! │ String         │
+└───────────────┴────────────────┘
+```
+
+Using CAST from `Variant` column:
+
+```sql
+SET allow_experimental_variant_type = 1, use_variant_as_common_type = 1;
+SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3)
+```
+
+```text
+┌─d─────┬─dynamicType(d)─┐
+│ 0     │ UInt64         │
+│ [0,1] │ Array(UInt64)  │
+│ ᴺᵁᴸᴸ  │ None           │
+└───────┴────────────────┘
+```
+
+
+## Reading Dynamic nested types as subcolumns
+
+`Dynamic` type supports reading a single nested type from a `Dynamic` column using the type name as a subcolumn.
+So, if you have column `d Dynamic` you can read a subcolumn of any valid type `T` using syntax `d.T`,
+this subcolumn will have type `Nullable(T)` if `T` can be inside `Nullable` and `T` otherwise. This subcolumn will
+be the same size as original `Dynamic` column and will contain `NULL` values (or empty values if `T` cannot be inside `Nullable`)
+in all rows in which original `Dynamic` column doesn't have type `T`.
+
+`Dynamic` subcolumns can be also read using function `dynamicElement(dynamic_column, type_name)`.
+
+Examples:
+
+```sql
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT d, dynamicType(d), d.String, d.Int64, d.`Array(Int64)`, d.Date, d.`Array(String)` FROM test;
+```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┬─d.String──────┬─d.Int64─┬─d.Array(Int64)─┬─d.Date─┬─d.Array(String)─┐
+│ ᴺᵁᴸᴸ          │ None           │ ᴺᵁᴸᴸ          │    ᴺᵁᴸᴸ │ []             │   ᴺᵁᴸᴸ │ []              │
+│ 42            │ Int64          │ ᴺᵁᴸᴸ          │      42 │ []             │   ᴺᵁᴸᴸ │ []              │
+│ Hello, World! │ String         │ Hello, World! │    ᴺᵁᴸᴸ │ []             │   ᴺᵁᴸᴸ │ []              │
+│ [1,2,3]       │ Array(Int64)   │ ᴺᵁᴸᴸ          │    ᴺᵁᴸᴸ │ [1,2,3]        │   ᴺᵁᴸᴸ │ []              │
+└───────────────┴────────────────┴───────────────┴─────────┴────────────────┴────────┴─────────────────┘
+```
+
+```sql
+SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`), toTypeName(d.Date), toTypeName(d.`Array(String)`)  FROM test LIMIT 1;
+```
+
+```text
+┌─toTypeName(d.String)─┬─toTypeName(d.Int64)─┬─toTypeName(d.Array(Int64))─┬─toTypeName(d.Date)─┬─toTypeName(d.Array(String))─┐
+│ Nullable(String)     │ Nullable(Int64)     │ Array(Int64)               │ Nullable(Date)     │ Array(String)               │
+└──────────────────────┴─────────────────────┴────────────────────────────┴────────────────────┴─────────────────────────────┘
+```
+
+```sql
+SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐
+│ ᴺᵁᴸᴸ          │ None           │ ᴺᵁᴸᴸ                        │                       ᴺᵁᴸᴸ │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ 42            │ Int64          │ ᴺᵁᴸᴸ                        │                         42 │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ Hello, World! │ String         │ Hello, World!               │                       ᴺᵁᴸᴸ │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ [1,2,3]       │ Array(Int64)   │ ᴺᵁᴸᴸ                        │                       ᴺᵁᴸᴸ │ [1,2,3]                           │                      ᴺᵁᴸᴸ │ []                                 │
+└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘
+```
+
+To know what variant is stored in each row function `dynamicType(dynamic_column)` can be used. It returns `String` with value type name for each row (or `'None'` if row is `NULL`).
+
+Example:
+
+```sql
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT dynamicType(d) from test;
+```
+
+```text
+┌─dynamicType(d)─┐
+│ None           │
+│ Int64          │
+│ String         │
+│ Array(Int64)   │
+└────────────────┘
+```
+
+## Conversion between Dynamic column and other columns
+
+There are 4 possible conversions that can be performed with `Dynamic` column.
+
+### Converting an ordinary column to a Variant column
+
+```sql
+SELECT 'Hello, World!'::Dynamic as d, dynamicType(d);
+```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┐
+│ Hello, World! │ String         │
+└───────────────┴────────────────┘
+```
+
+
+
+
+
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 7b268b80116..29773492dc9 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -1289,4 +1289,14 @@ size_t ColumnArray::getNumberOfDimensions() const
     return 1 + nested_array->getNumberOfDimensions();   /// Every modern C++ compiler optimizes tail recursion.
 }
 
+void ColumnArray::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    Columns nested_source_columns;
+    nested_source_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+        nested_source_columns.push_back(assert_cast<const ColumnArray &>(*source_column).getDataPtr());
+
+    data->takeDynamicStructureFromSourceColumns(nested_source_columns);
+}
+
 }
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index 230d8830265..53eb5166df8 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -175,6 +175,9 @@ public:
 
     size_t getNumberOfDimensions() const;
 
+    bool hasDynamicStructure() const override { return getData().hasDynamicStructure(); }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
 private:
     WrappedPtr data;
     WrappedPtr offsets;
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index 6763410b46d..934adf07cf4 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -122,6 +122,9 @@ public:
     UInt64 getNumberOfDefaultRows() const override { throwMustBeDecompressed(); }
     void getIndicesOfNonDefaultRows(Offsets &, size_t, size_t) const override { throwMustBeDecompressed(); }
 
+    bool hasDynamicStructure() const override { throwMustBeDecompressed(); }
+    void takeDynamicStructureFromSourceColumns(const Columns &) override { throwMustBeDecompressed(); }
+
 protected:
     size_t rows;
     size_t bytes;
diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp
index f2cea83db0e..cf3f448516c 100644
--- a/src/Columns/ColumnConst.cpp
+++ b/src/Columns/ColumnConst.cpp
@@ -159,6 +159,15 @@ void ColumnConst::compareColumn(
     std::fill(compare_results.begin(), compare_results.end(), res);
 }
 
+void ColumnConst::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    Columns nested_source_columns;
+    nested_source_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+        nested_source_columns.push_back(assert_cast<const ColumnConst &>(*source_column).getDataColumnPtr());
+    data->takeDynamicStructureFromSourceColumns(nested_source_columns);
+}
+
 ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value)
 {
     auto data = column->cloneEmpty();
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index 4a3d40ca0d2..042468cbbcc 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -306,6 +306,10 @@ public:
     T getValue() const { return static_cast<T>(getField().safeGet<T>()); }
 
     bool isCollationSupported() const override { return data->isCollationSupported(); }
+
+    bool hasDynamicStructure() const override { return data->hasDynamicStructure(); }
+
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
 };
 
 ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value);
diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
new file mode 100644
index 00000000000..293055b43fc
--- /dev/null
+++ b/src/Columns/ColumnDynamic.cpp
@@ -0,0 +1,785 @@
+#include <Columns/ColumnDynamic.h>
+
+#include <Columns/ColumnCompressed.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/FieldToDataType.h>
+#include <Common/Arena.h>
+#include <Common/SipHash.h>
+#include <Processors/Transforms/ColumnGathererTransform.h>
+#include <Interpreters/castColumn.h>
+
+#include <Common/logger_useful.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int PARAMETER_OUT_OF_BOUND;
+}
+
+
+ColumnDynamic::ColumnDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
+{
+    /// Create empty Variant.
+    variant_info.variant_type = std::make_shared<DataTypeVariant>(DataTypes{});
+    variant_info.variant_name = variant_info.variant_type->getName();
+    variant_column = variant_info.variant_type->createColumn();
+}
+
+ColumnDynamic::ColumnDynamic(
+    MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_)
+    : variant_column(std::move(variant_column_))
+    , variant_info(variant_info_)
+    , max_dynamic_types(max_dynamic_types_)
+    , statistics(statistics_)
+{
+}
+
+ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_)
+{
+    VariantInfo variant_info;
+    variant_info.variant_type = variant_type;
+    variant_info.variant_name = variant_type->getName();
+    const auto & variants = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
+    variant_info.variant_names.reserve(variants.size());
+    variant_info.variant_name_to_discriminator.reserve(variants.size());
+    for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr)
+    {
+        variant_info.variant_names.push_back(variants[discr]->getName());
+        variant_info.variant_name_to_discriminator[variant_info.variant_names.back()] = discr;
+    }
+
+    return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_);
+}
+
+bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant)
+{
+    /// Check if we already have such variant.
+    if (variant_info.variant_name_to_discriminator.contains(new_variant->getName()))
+        return true;
+
+    /// Check if we reached maximum number of variants.
+    if (variant_info.variant_names.size() >= max_dynamic_types)
+    {
+        /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant.
+        /// Otherwise we won't be able to add cast new variants to Strings.
+        if (!variant_info.variant_name_to_discriminator.contains("String"))
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists");
+
+        return false;
+    }
+
+    /// If we have max_dynamic_types - 1 number of variants and don't have String variant, we can add only String variant.
+    if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String"))
+        return false;
+
+    const DataTypes & current_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
+    DataTypes all_variants = current_variants;
+    all_variants.push_back(new_variant);
+    auto new_variant_type = std::make_shared<DataTypeVariant>(all_variants);
+    const auto & new_variants = assert_cast<const DataTypeVariant &>(*new_variant_type).getVariants();
+
+    std::vector<ColumnVariant::Discriminator> current_to_new_discriminators;
+    current_to_new_discriminators.resize(variant_info.variant_names.size());
+    Names new_variant_names;
+    new_variant_names.reserve(new_variants.size());
+    std::unordered_map<String, ColumnVariant::Discriminator> new_variant_name_to_discriminator;
+    new_variant_name_to_discriminator.reserve(new_variants.size());
+    std::vector<std::pair<MutableColumnPtr, ColumnVariant::Discriminator>> new_variant_columns_and_discriminators_to_add;
+    new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size());
+
+    for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr)
+    {
+        String name = new_variants[discr]->getName();
+        new_variant_names.push_back(name);
+        new_variant_name_to_discriminator[name] = discr;
+        auto it = variant_info.variant_name_to_discriminator.find(name);
+        if (it == variant_info.variant_name_to_discriminator.end())
+            new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr);
+        else
+            current_to_new_discriminators[it->second] = discr;
+    }
+
+    variant_info.variant_type = new_variant_type;
+    variant_info.variant_name = new_variant_type->getName();
+    variant_info.variant_names = new_variant_names;
+    variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator;
+    assert_cast<ColumnVariant &>(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add));
+    variant_mappings_cache.clear();
+    return true;
+}
+
+void ColumnDynamic::addStringVariant()
+{
+    addNewVariant(std::make_shared<DataTypeString>());
+}
+
+void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type)
+{
+    const DataTypes & current_variants =  assert_cast<const DataTypeVariant *>(variant_info.variant_type.get())->getVariants();
+    const DataTypes & new_variants = assert_cast<const DataTypeVariant *>(new_variant_type.get())->getVariants();
+
+    Names new_variant_names;
+    new_variant_names.reserve(new_variants.size());
+    std::unordered_map<String, ColumnVariant::Discriminator> new_variant_name_to_discriminator;
+    new_variant_name_to_discriminator.reserve(new_variants.size());
+    std::vector<std::pair<MutableColumnPtr, ColumnVariant::Discriminator>> new_variant_columns_and_discriminators_to_add;
+    new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size());
+    std::vector<ColumnVariant::Discriminator> current_to_new_discriminators;
+    current_to_new_discriminators.resize(current_variants.size());
+
+    for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr)
+    {
+        String name = new_variants[discr]->getName();
+        new_variant_names.push_back(name);
+        new_variant_name_to_discriminator[name] = discr;
+
+        auto current_it = variant_info.variant_name_to_discriminator.find(name);
+        if (current_it == variant_info.variant_name_to_discriminator.end())
+            new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr);
+        else
+            current_to_new_discriminators[current_it->second] = discr;
+    }
+
+    variant_info.variant_type = new_variant_type;
+    variant_info.variant_name = new_variant_type->getName();
+    variant_info.variant_names = new_variant_names;
+    variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator;
+    assert_cast<ColumnVariant &>(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add));
+    /// Clear mappings cache because now with new Variant we will have new mappings.
+    variant_mappings_cache.clear();
+}
+
+std::vector<ColumnVariant::Discriminator> * ColumnDynamic::combineVariants(const DB::ColumnDynamic::VariantInfo & other_variant_info)
+{
+    /// Check if we already have global discriminators mapping for other Variant in cache.
+    /// It's used to not calculate the same mapping each call of insertFrom with the same columns.
+    auto cache_it = variant_mappings_cache.find(other_variant_info.variant_name);
+    if (cache_it != variant_mappings_cache.end())
+        return &cache_it->second;
+
+    /// Check if we already tried to combine these variants but failed due to max_dynamic_types limit.
+    if (variants_with_failed_combination.contains(other_variant_info.variant_name))
+        return nullptr;
+
+    const DataTypes & other_variants = assert_cast<const DataTypeVariant &>(*other_variant_info.variant_type).getVariants();
+
+    size_t num_new_variants = 0;
+    for (size_t i = 0; i != other_variants.size(); ++i)
+    {
+        if (!variant_info.variant_name_to_discriminator.contains(other_variant_info.variant_names[i]))
+            ++num_new_variants;
+    }
+
+    /// If we have new variants we need to update current variant info and extend Variant column
+    if (num_new_variants)
+    {
+        const DataTypes & current_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
+
+        /// We cannot combine Variants if total number of variants exceeds max_dynamic_types.
+        if (current_variants.size() + num_new_variants > max_dynamic_types)
+        {
+            /// Remember that we cannot combine our variant with this one, so we will not try to do it again.
+            variants_with_failed_combination.insert(other_variant_info.variant_name);
+            return nullptr;
+        }
+
+        /// We cannot combine Variants if total number of variants reaches max_dynamic_types and we don't have String variant.
+        if (current_variants.size() + num_new_variants == max_dynamic_types && !variant_info.variant_name_to_discriminator.contains("String") && !other_variant_info.variant_name_to_discriminator.contains("String"))
+        {
+            variants_with_failed_combination.insert(other_variant_info.variant_name);
+            return nullptr;
+        }
+
+        DataTypes all_variants = current_variants;
+        all_variants.insert(all_variants.end(), other_variants.begin(), other_variants.end());
+        auto new_variant_type = std::make_shared<DataTypeVariant>(all_variants);
+        updateVariantInfoAndExpandVariantColumn(new_variant_type);
+    }
+
+    /// Create a global discriminators mapping for other variant.
+    std::vector<ColumnVariant::Discriminator> other_to_new_discriminators;
+    other_to_new_discriminators.reserve(other_variants.size());
+    for (size_t i = 0; i != other_variants.size(); ++i)
+        other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator[other_variant_info.variant_names[i]]);
+
+    /// Save mapping to cache to not calculate it again for the same Variants.
+    auto [it, _] = variant_mappings_cache.emplace(other_variant_info.variant_name, std::move(other_to_new_discriminators));
+    return &it->second;
+}
+
+void ColumnDynamic::insert(const DB::Field & x)
+{
+    /// Check if we can insert field without Variant extension.
+    if (variant_column->tryInsert(x))
+        return;
+
+    /// If we cannot insert field into current variant column, extend it with new variant for this field from its type.
+    if (likely(addNewVariant(applyVisitor(FieldToDataType(), x))))
+    {
+        /// Now we should be able to insert this field into extended variant column.
+        variant_column->insert(x);
+    }
+    else
+    {
+        /// We reached maximum number of variants and couldn't add new variant.
+        /// This case should be really rare in real use cases.
+        /// We should always be able to add String variant and cast inserted value to String.
+        addStringVariant();
+        variant_column->insert(toString(x));
+    }
+}
+
+bool ColumnDynamic::tryInsert(const DB::Field & x)
+{
+    /// We can insert any value into Dynamic column.
+    insert(x);
+    return true;
+}
+
+
+void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
+{
+    const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
+
+    /// Check if we have the same variants in both columns.
+    if (variant_info.variant_name == dynamic_src.variant_info.variant_name)
+    {
+        variant_column->insertFrom(*dynamic_src.variant_column, n);
+        return;
+    }
+
+    auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+
+    /// If variants are different, we need to extend our variant with new variants.
+    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    {
+        variant_col.insertFrom(*dynamic_src.variant_column, n, *global_discriminators_mapping);
+        return;
+    }
+
+    /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
+    /// We need to insert single value, try to add only corresponding variant.
+    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
+    auto src_global_discr = src_variant_col.globalDiscriminatorAt(n);
+
+    /// NULL doesn't require Variant extension.
+    if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+    {
+        insertDefault();
+        return;
+    }
+
+    auto variant_type = assert_cast<const DataTypeVariant &>(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr];
+    if (addNewVariant(variant_type))
+    {
+        auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]];
+        variant_col.insertIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n));
+        return;
+    }
+
+    /// We reached maximum number of variants and couldn't add new variant.
+    /// We should always be able to add String variant and cast inserted value to String.
+    addStringVariant();
+    auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty();
+    tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(n));
+    auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
+    auto string_variant_discr = variant_info.variant_name_to_discriminator["String"];
+    variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
+}
+
+void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
+{
+    if (start + length > src_.size())
+        throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. "
+                                                            "[start({}) + length({}) > src.size()({})]", start, length, src_.size());
+
+    const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
+
+    /// Check if we have the same variants in both columns.
+    if (variant_info.variant_names == dynamic_src.variant_info.variant_names)
+    {
+        variant_column->insertRangeFrom(*dynamic_src.variant_column, start, length);
+        return;
+    }
+
+    auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+
+    /// If variants are different, we need to extend our variant with new variants.
+    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    {
+        variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping);
+        return;
+    }
+
+    /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
+    /// In this case we will add most frequent variants from this range and insert them as usual,
+    /// all other variants will be converted to String.
+    const auto & src_variant_column = dynamic_src.getVariantColumn();
+
+    /// Calculate ranges for each variant in current range.
+    std::vector<std::pair<size_t, size_t>> variants_ranges(dynamic_src.variant_info.variant_names.size(), {0, 0});
+    /// If we insert the whole column, no need to iterate through the range, we can just take variant sizes.
+    if (start == 0 && length == dynamic_src.size())
+    {
+        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
+            variants_ranges[i] = {0, src_variant_column.getVariantByGlobalDiscriminator(i).size()};
+    }
+    /// Otherwise we need to iterate through discriminators and calculate the range for each variant.
+    else
+    {
+        const auto & local_discriminators = src_variant_column.getLocalDiscriminators();
+        const auto & offsets = src_variant_column.getOffsets();
+        size_t end = start + length;
+        for (size_t i = start; i != end; ++i)
+        {
+            auto discr = src_variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
+            if (discr != ColumnVariant::NULL_DISCRIMINATOR)
+            {
+                if (!variants_ranges[discr].second)
+                    variants_ranges[discr].first = offsets[i];
+                ++variants_ranges[discr].second;
+            }
+        }
+    }
+
+    const auto & src_variants = assert_cast<const DataTypeVariant &>(*dynamic_src.variant_info.variant_type).getVariants();
+    /// List of variants that will be converted to String.
+    std::vector<ColumnVariant::Discriminator> variants_to_convert_to_string;
+    /// Mapping from global discriminators of src_variant to the new variant we will create.
+    std::vector<ColumnVariant::Discriminator> other_to_new_discriminators;
+    other_to_new_discriminators.reserve(dynamic_src.variant_info.variant_names.size());
+
+    /// Check if we cannot add any more new variants. In this case we will convert all new variants to String.
+    if (variant_info.variant_names.size() == max_dynamic_types || (variant_info.variant_names.size() == max_dynamic_types - 1 && !variant_info.variant_name_to_discriminator.contains("String")))
+    {
+        addStringVariant();
+        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
+        {
+            auto it = variant_info.variant_name_to_discriminator.find(dynamic_src.variant_info.variant_names[i]);
+            if (it == variant_info.variant_name_to_discriminator.end())
+            {
+                variants_to_convert_to_string.push_back(i);
+                other_to_new_discriminators.push_back(variant_info.variant_name_to_discriminator["String"]);
+            }
+            else
+            {
+                other_to_new_discriminators.push_back(it->second);
+            }
+        }
+    }
+    /// We still can add some new variants, but not all of them. Let's choose the most frequent variants in specified range.
+    else
+    {
+        std::vector<std::pair<size_t, ColumnVariant::Discriminator>> new_variants_with_sizes;
+        new_variants_with_sizes.reserve(dynamic_src.variant_info.variant_names.size());
+        for (size_t i = 0; i != dynamic_src.variant_info.variant_names.size(); ++i)
+        {
+            const auto & variant_name = dynamic_src.variant_info.variant_names[i];
+            if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
+                new_variants_with_sizes.emplace_back(variants_ranges[i].second, i);
+        }
+
+        std::sort(new_variants_with_sizes.begin(), new_variants_with_sizes.end(), std::greater());
+        DataTypes new_variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
+        if (!variant_info.variant_name_to_discriminator.contains("String"))
+            new_variants.push_back(std::make_shared<DataTypeString>());
+
+        for (const auto & [_, discr] : new_variants_with_sizes)
+        {
+            if (new_variants.size() != max_dynamic_types)
+                new_variants.push_back(src_variants[discr]);
+            else
+                variants_to_convert_to_string.push_back(discr);
+        }
+
+        auto new_variant_type = std::make_shared<DataTypeVariant>(new_variants);
+        updateVariantInfoAndExpandVariantColumn(new_variant_type);
+        auto string_variant_discriminator = variant_info.variant_name_to_discriminator.at("String");
+        for (const auto & variant_name : dynamic_src.variant_info.variant_names)
+        {
+            auto it = variant_info.variant_name_to_discriminator.find(variant_name);
+            if (it == variant_info.variant_name_to_discriminator.end())
+                other_to_new_discriminators.push_back(string_variant_discriminator);
+            else
+                other_to_new_discriminators.push_back(it->second);
+        }
+    }
+
+    /// Convert to String all variants that couldn't be added.
+    std::unordered_map<ColumnVariant::Discriminator, ColumnPtr> variants_converted_to_string;
+    variants_converted_to_string.reserve(variants_to_convert_to_string.size());
+    for (auto discr : variants_to_convert_to_string)
+    {
+        auto [variant_start, variant_length] = variants_ranges[discr];
+        const auto & variant = src_variant_column.getVariantPtrByGlobalDiscriminator(discr);
+        if (variant_start == 0 && variant_length == variant->size())
+            variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant, src_variants[discr], ""), std::make_shared<DataTypeString>());
+        else
+            variants_converted_to_string[discr] = castColumn(ColumnWithTypeAndName(variant->cut(variant_start, variant_length), src_variants[discr], ""), std::make_shared<DataTypeString>());
+    }
+
+    const auto & src_local_discriminators = src_variant_column.getLocalDiscriminators();
+    const auto & src_offsets = src_variant_column.getOffsets();
+    const auto & src_variant_columns = src_variant_column.getVariants();
+    size_t end = start + length;
+    for (size_t i = start; i != end; ++i)
+    {
+        auto local_discr = src_local_discriminators[i];
+        if (local_discr == ColumnVariant::NULL_DISCRIMINATOR)
+        {
+            variant_col.insertDefault();
+        }
+        else
+        {
+            auto global_discr = src_variant_column.globalDiscriminatorByLocal(local_discr);
+            auto to_global_discr = other_to_new_discriminators[global_discr];
+            auto it = variants_converted_to_string.find(global_discr);
+            if (it == variants_converted_to_string.end())
+            {
+                variant_col.insertIntoVariantFrom(to_global_discr, *src_variant_columns[local_discr], src_offsets[i]);
+            }
+            else
+            {
+                variant_col.insertIntoVariantFrom(to_global_discr, *it->second, src_offsets[i] - variants_ranges[global_discr].first);
+            }
+        }
+    }
+}
+
+void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
+{
+    const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
+
+    /// Check if we have the same variants in both columns.
+    if (variant_info.variant_names == dynamic_src.variant_info.variant_names)
+    {
+        variant_column->insertManyFrom(*dynamic_src.variant_column, position, length);
+        return;
+    }
+
+    auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+
+    /// If variants are different, we need to extend our variant with new variants.
+    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    {
+        variant_col.insertManyFrom(*dynamic_src.variant_column, position, length, *global_discriminators_mapping);
+        return;
+    }
+
+    /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
+    /// We need to insert single value, try to add only corresponding variant.
+    const auto & src_variant_col = assert_cast<const ColumnVariant &>(*dynamic_src.variant_column);
+    auto src_global_discr = src_variant_col.globalDiscriminatorAt(position);
+    if (src_global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+    {
+        insertDefault();
+        return;
+    }
+
+    auto variant_type = assert_cast<const DataTypeVariant &>(*dynamic_src.variant_info.variant_type).getVariants()[src_global_discr];
+    if (addNewVariant(variant_type))
+    {
+        auto discr = variant_info.variant_name_to_discriminator[dynamic_src.variant_info.variant_names[src_global_discr]];
+        variant_col.insertManyIntoVariantFrom(discr, src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position), length);
+        return;
+    }
+
+    addStringVariant();
+    auto tmp_variant_column = src_variant_col.getVariantByGlobalDiscriminator(src_global_discr).cloneEmpty();
+    tmp_variant_column->insertFrom(src_variant_col.getVariantByGlobalDiscriminator(src_global_discr), src_variant_col.offsetAt(position));
+    auto tmp_string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
+    auto string_variant_discr = variant_info.variant_name_to_discriminator["String"];
+    variant_col.insertManyIntoVariantFrom(string_variant_discr, *tmp_string_column, 0, length);
+}
+
+
+StringRef ColumnDynamic::serializeValueIntoArena(size_t n, DB::Arena & arena, const char *& begin) const
+{
+    /// We cannot use Variant serialization here as it serializes discriminator + value,
+    /// but Dynamic doesn't have fixed mapping discriminator <-> variant type
+    /// as different Dynamic column can have different Variants.
+    /// Instead, we serialize null bit + variant type name (size + bytes) + value.
+    const auto & variant_col = assert_cast<const ColumnVariant &>(*variant_column);
+    auto discr = variant_col.globalDiscriminatorAt(n);
+    StringRef res;
+    UInt8 null_bit = discr == ColumnVariant::NULL_DISCRIMINATOR;
+    if (null_bit)
+    {
+        char * pos = arena.allocContinue(sizeof(UInt8), begin);
+        memcpy(pos, &null_bit, sizeof(UInt8));
+        res.data = pos;
+        res.size = sizeof(UInt8);
+        return res;
+    }
+
+    const auto & variant_name = variant_info.variant_names[discr];
+    size_t variant_name_size = variant_name.size();
+    char * pos = arena.allocContinue(sizeof(UInt8) + sizeof(size_t) + variant_name.size(), begin);
+    memcpy(pos, &null_bit, sizeof(UInt8));
+    memcpy(pos + sizeof(UInt8), &variant_name_size, sizeof(size_t));
+    memcpy(pos + sizeof(UInt8) + sizeof(size_t), variant_name.data(), variant_name.size());
+    res.data = pos;
+    res.size = sizeof(UInt8) + sizeof(size_t) + variant_name.size();
+
+    auto value_ref = variant_col.getVariantByGlobalDiscriminator(discr).serializeValueIntoArena(variant_col.offsetAt(n), arena, begin);
+    res.data = value_ref.data - res.size;
+    res.size += value_ref.size;
+    return res;
+}
+
+const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos)
+{
+    auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
+    UInt8 null_bit = unalignedLoad<UInt8>(pos);
+    pos += sizeof(UInt8);
+    if (null_bit)
+    {
+        insertDefault();
+        return pos;
+    }
+
+    /// Read variant type name.
+    const size_t variant_name_size = unalignedLoad<size_t>(pos);
+    pos += sizeof(variant_name_size);
+    String variant_name;
+    variant_name.resize(variant_name_size);
+    memcpy(variant_name.data(), pos, variant_name_size);
+    pos += variant_name_size;
+    /// If we already have such variant, just deserialize it into corresponding variant column.
+    auto it = variant_info.variant_name_to_discriminator.find(variant_name);
+    if (it != variant_info.variant_name_to_discriminator.end())
+    {
+        auto discr = it->second;
+        return variant_col.deserializeVariantAndInsertFromArena(discr, pos);
+    }
+
+    /// If we don't have such variant, add it.
+    auto variant_type = DataTypeFactory::instance().get(variant_name);
+    if (likely(addNewVariant(variant_type)))
+    {
+        auto discr = variant_info.variant_name_to_discriminator[variant_name];
+        return variant_col.deserializeVariantAndInsertFromArena(discr, pos);
+    }
+
+    /// We reached maximum number of variants and couldn't add new variant.
+    /// This case should be really rare in real use cases.
+    /// We should always be able to add String variant and cast inserted value to String.
+    addStringVariant();
+    /// Create temporary column of this variant type and deserialize value into it.
+    auto tmp_variant_column = variant_type->createColumn();
+    pos = tmp_variant_column->deserializeAndInsertFromArena(pos);
+    /// Cast temporary column to String and insert this value into String variant.
+    auto str_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
+    variant_col.insertIntoVariantFrom(variant_info.variant_name_to_discriminator["String"], *str_column, 0);
+    return pos;
+}
+
+const char * ColumnDynamic::skipSerializedInArena(const char * pos) const
+{
+    UInt8 null_bit = unalignedLoad<UInt8>(pos);
+    pos += sizeof(UInt8);
+    if (null_bit)
+        return pos;
+
+    const size_t variant_name_size = unalignedLoad<size_t>(pos);
+    pos += sizeof(variant_name_size);
+    String variant_name;
+    variant_name.resize(variant_name_size);
+    memcpy(variant_name.data(), pos, variant_name_size);
+    pos += variant_name_size;
+    auto tmp_variant_column = DataTypeFactory::instance().get(variant_name)->createColumn();
+    return tmp_variant_column->skipSerializedInArena(pos);
+}
+
+void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
+{
+    const auto & variant_col = assert_cast<const ColumnVariant &>(*variant_column);
+    auto discr = variant_col.globalDiscriminatorAt(n);
+    if (discr == ColumnVariant::NULL_DISCRIMINATOR)
+    {
+        hash.update(discr);
+        return;
+    }
+
+    hash.update(variant_info.variant_names[discr]);
+    variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
+}
+
+int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
+{
+    const auto & left_variant = assert_cast<const ColumnVariant &>(*variant_column);
+    const auto & right_dynamic = assert_cast<const ColumnDynamic &>(rhs);
+    const auto & right_variant = assert_cast<const ColumnVariant &>(*right_dynamic.variant_column);
+
+    auto left_discr = left_variant.globalDiscriminatorAt(n);
+    auto right_discr = right_variant.globalDiscriminatorAt(m);
+
+    /// Check if we have NULLs and return result based on nan_direction_hint.
+    if (left_discr == ColumnVariant::NULL_DISCRIMINATOR && right_discr == ColumnVariant::NULL_DISCRIMINATOR)
+        return 0;
+    else if (left_discr == ColumnVariant::NULL_DISCRIMINATOR)
+        return nan_direction_hint;
+    else if (right_discr == ColumnVariant::NULL_DISCRIMINATOR)
+        return -nan_direction_hint;
+
+    /// If rows have different types, we compare type names.
+    if (variant_info.variant_names[left_discr] != right_dynamic.variant_info.variant_names[right_discr])
+        return variant_info.variant_names[left_discr] < right_dynamic.variant_info.variant_names[right_discr] ? -1 : 1;
+
+    /// If rows have the same types, compare actual values from corresponding variants.
+    return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
+}
+
+ColumnPtr ColumnDynamic::compress() const
+{
+    ColumnPtr variant_compressed = variant_column->compress();
+    size_t byte_size = variant_compressed->byteSize();
+    return ColumnCompressed::create(size(), byte_size,
+        [my_variant_compressed = std::move(variant_compressed), my_variant_info = variant_info, my_max_dynamic_types = max_dynamic_types, my_statistics = statistics]() mutable
+        {
+            return ColumnDynamic::create(my_variant_compressed->decompress(), my_variant_info, my_max_dynamic_types, my_statistics);
+        });
+}
+
+void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    if (!empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column");
+
+    /// During serialization of Dynamic column in MergeTree all Dynamic columns
+    /// in single part must have the same structure (the same variants). During merge
+    /// resulting column is constructed by inserting from source columns,
+    /// but it may happen that resulting column doesn't have rows from all source parts
+    /// but only from subset of them, and as a result some variants could be missing
+    /// and structures of resulting column may differ.
+    /// To solve this problem, before merge we create empty resulting column and use this method
+    /// to take dynamic structure from all source column even if we won't insert
+    /// rows from some of them.
+
+    /// We want to construct resulting variant with most frequent variants from source columns and convert the rarest
+    /// variants to single String variant if we exceed the limit of variants.
+    /// First, collect all variants from all source columns and calculate total sizes.
+    std::unordered_map<String, size_t> total_sizes;
+    DataTypes all_variants;
+
+    for (const auto & source_column : source_columns)
+    {
+        const auto & source_dynamic = assert_cast<const ColumnDynamic &>(*source_column);
+        const auto & source_variant_column = source_dynamic.getVariantColumn();
+        const auto & source_variant_info = source_dynamic.getVariantInfo();
+        const auto & source_variants = assert_cast<const DataTypeVariant &>(*source_variant_info.variant_type).getVariants();
+        /// During deserialization from MergeTree we will have variant sizes statistics from the whole data part.
+        const auto & source_statistics =  source_dynamic.getStatistics();
+        for (size_t i = 0; i != source_variants.size(); ++i)
+        {
+            const auto & variant_name = source_variant_info.variant_names[i];
+            auto it = total_sizes.find(variant_name);
+            /// Add this variant to the list of all variants if we didn't see it yet.
+            if (it == total_sizes.end())
+            {
+                all_variants.push_back(source_variants[i]);
+                it = total_sizes.emplace(variant_name, 0).first;
+            }
+
+            size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name);
+//            LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size);
+            it->second += size;
+        }
+    }
+
+    DataTypePtr result_variant_type;
+    /// Check if the number of all variants exceeds the limit.
+    if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_sizes.contains("String")))
+    {
+        /// Create list of variants with their sizes and sort it.
+        std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
+        variants_with_sizes.reserve(all_variants.size());
+        for (const auto & variant : all_variants)
+        {
+//            LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]);
+            variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
+        }
+        std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
+
+        /// Take first max_dynamic_types variants from sorted list.
+        DataTypes result_variants;
+        result_variants.reserve(max_dynamic_types);
+        /// Add String variant in advance.
+        result_variants.push_back(std::make_shared<DataTypeString>());
+        size_t i = 0;
+        while (result_variants.size() != max_dynamic_types && i < variants_with_sizes.size())
+        {
+            const auto & variant = variants_with_sizes[i].second;
+            if (variant->getName() != "String")
+                result_variants.push_back(variant);
+            ++i;
+        }
+
+        result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
+    }
+    else
+    {
+        result_variant_type = std::make_shared<DataTypeVariant>(all_variants);
+    }
+
+    /// Now we have resulting Variant and can fill variant info.
+    variant_info.variant_type = result_variant_type;
+    variant_info.variant_name = result_variant_type->getName();
+    const auto & result_variants = assert_cast<const DataTypeVariant &>(*result_variant_type).getVariants();
+    variant_info.variant_names.clear();
+    variant_info.variant_names.reserve(result_variants.size());
+    variant_info.variant_name_to_discriminator.clear();
+    variant_info.variant_name_to_discriminator.reserve(result_variants.size());
+    statistics.data.clear();
+    statistics.data.reserve(result_variants.size());
+    statistics.source = Statistics::Source::MERGE;
+    for (size_t i = 0; i != result_variants.size(); ++i)
+    {
+        auto variant_name = result_variants[i]->getName();
+        variant_info.variant_names.push_back(variant_name);
+        variant_info.variant_name_to_discriminator[variant_name] = i;
+        statistics.data[variant_name] = total_sizes[variant_name];
+    }
+
+    variant_column = variant_info.variant_type->createColumn();
+
+    /// Now we have the resulting Variant that will be used in all merged columns.
+    /// Variants can also contain Dynamic columns inside, we should collect
+    /// all source variants that will be used in the resulting merged column
+    /// and call takeDynamicStructureFromSourceColumns on all resulting variants.
+    std::vector<Columns> variants_source_columns;
+    variants_source_columns.resize(variant_info.variant_names.size());
+    for (const auto & source_column : source_columns)
+    {
+        const auto & source_dynamic_column = assert_cast<const ColumnDynamic &>(*source_column);
+        const auto & source_variant_info = source_dynamic_column.getVariantInfo();
+        for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
+        {
+            /// Try to find this variant in current source column.
+            auto it = source_variant_info.variant_name_to_discriminator.find(variant_info.variant_names[i]);
+            if (it != source_variant_info.variant_name_to_discriminator.end())
+                variants_source_columns[i].push_back(source_dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(it->second));
+        }
+    }
+
+    auto & variant_col = getVariantColumn();
+    for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
+        variant_col.getVariantByGlobalDiscriminator(i).takeDynamicStructureFromSourceColumns(variants_source_columns[i]);
+}
+
+void ColumnDynamic::applyNullMap(const ColumnVector<UInt8>::Container & null_map)
+{
+    assert_cast<ColumnVariant &>(*variant_column).applyNullMap(null_map);
+}
+
+void ColumnDynamic::applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map)
+{
+    assert_cast<ColumnVariant &>(*variant_column).applyNegatedNullMap(null_map);
+}
+
+}
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
new file mode 100644
index 00000000000..7487a5aa0db
--- /dev/null
+++ b/src/Columns/ColumnDynamic.h
@@ -0,0 +1,363 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/ColumnVariant.h>
+#include <DataTypes/IDataType.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+/**
+ * Column for storing Dynamic type values.
+ * Dynamic column allows to insert and store values of any data types inside.
+ * Inside it stores:
+ *   - Variant column with all inserted values of different types.
+ *   - Information about currently stored variants.
+ *
+ * When new values are inserted into Dynamic column, the internal Variant
+ * type and column are extended if the inserted value has new type.
+ */
+class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>
+{
+public:
+    struct Statistics
+    {
+        enum class Source
+        {
+            READ,
+            MERGE,
+        };
+
+        Source source;
+        std::unordered_map<String, size_t> data;
+    };
+
+private:
+    friend class COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
+
+    struct VariantInfo
+    {
+        DataTypePtr variant_type;
+        /// Name of the whole variant to not call getName() every time.
+        String variant_name;
+        /// Store names of variants to not call getName() every time on variants.
+        Names variant_names;
+        /// Store mapping (variant name) -> (global discriminator).
+        /// It's used during variant extension.
+        std::unordered_map<String, UInt8> variant_name_to_discriminator;
+    };
+
+    ColumnDynamic(size_t max_dynamic_types_);
+    ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {});
+
+public:
+    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
+      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
+      */
+    using Base = COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>;
+    static Ptr create(const ColumnPtr & variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    {
+        return ColumnDynamic::create(variant_column_->assumeMutable(), variant_info_, max_dynamic_types_, statistics_);
+    }
+
+    static MutablePtr create(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    {
+        return Base::create(std::move(variant_column_), variant_info_, max_dynamic_types_, statistics_);
+    }
+
+    static MutablePtr create(MutableColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {});
+
+    static ColumnPtr create(ColumnPtr variant_column_, const DataTypePtr & variant_type, size_t max_dynamic_types_, const Statistics & statistics_ = {})
+    {
+        return create(variant_column_->assumeMutable(), variant_type, max_dynamic_types_, statistics_);
+    }
+
+    static MutablePtr create(size_t max_dynamic_types_)
+    {
+        return Base::create(max_dynamic_types_);
+    }
+
+    std::string getName() const override { return "Dynamic(max_types=" + std::to_string(max_dynamic_types) + ")"; }
+
+    const char * getFamilyName() const override
+    {
+        return "Dynamic";
+    }
+
+    TypeIndex getDataType() const override
+    {
+        return TypeIndex::Dynamic;
+    }
+
+    MutableColumnPtr cloneEmpty() const override
+    {
+        /// Keep current dynamic structure.
+        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics);
+    }
+
+    MutableColumnPtr cloneResized(size_t size) const override
+    {
+        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics);
+    }
+
+    size_t size() const override
+    {
+        return variant_column->size();
+    }
+
+    Field operator[](size_t n) const override
+    {
+        return (*variant_column)[n];
+    }
+
+    void get(size_t n, Field & res) const override
+    {
+        variant_column->get(n, res);
+    }
+
+    bool isDefaultAt(size_t n) const override
+    {
+        return variant_column->isDefaultAt(n);
+    }
+
+    bool isNullAt(size_t n) const override
+    {
+        return variant_column->isNullAt(n);
+    }
+
+    StringRef getDataAt(size_t n) const override
+    {
+        return variant_column->getDataAt(n);
+    }
+
+    void insertData(const char * pos, size_t length) override
+    {
+        return variant_column->insertData(pos, length);
+    }
+
+    void insert(const Field & x) override;
+    bool tryInsert(const Field & x) override;
+    void insertFrom(const IColumn & src_, size_t n) override;
+    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
+    void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
+
+    void insertDefault() override
+    {
+        variant_column->insertDefault();
+    }
+
+    void insertManyDefaults(size_t length) override
+    {
+        variant_column->insertManyDefaults(length);
+    }
+
+    void popBack(size_t n) override
+    {
+        variant_column->popBack(n);
+    }
+
+    StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+    const char * deserializeAndInsertFromArena(const char * pos) override;
+    const char * skipSerializedInArena(const char * pos) const override;
+
+    void updateHashWithValue(size_t n, SipHash & hash) const override;
+
+    void updateWeakHash32(WeakHash32 & hash) const override
+    {
+        variant_column->updateWeakHash32(hash);
+    }
+
+    void updateHashFast(SipHash & hash) const override
+    {
+        variant_column->updateHashFast(hash);
+    }
+
+    ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
+    {
+        return create(variant_column->filter(filt, result_size_hint), variant_info, max_dynamic_types);
+    }
+
+    void expand(const Filter & mask, bool inverted) override
+    {
+        variant_column->expand(mask, inverted);
+    }
+
+    ColumnPtr permute(const Permutation & perm, size_t limit) const override
+    {
+        return create(variant_column->permute(perm, limit), variant_info, max_dynamic_types);
+    }
+
+    ColumnPtr index(const IColumn & indexes, size_t limit) const override
+    {
+        return create(variant_column->index(indexes, limit), variant_info, max_dynamic_types);
+    }
+
+    ColumnPtr replicate(const Offsets & replicate_offsets) const override
+    {
+        return create(variant_column->replicate(replicate_offsets), variant_info, max_dynamic_types);
+    }
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
+    {
+        MutableColumns scattered_variant_columns = variant_column->scatter(num_columns, selector);
+        MutableColumns scattered_columns;
+        scattered_columns.reserve(num_columns);
+        for (auto & scattered_variant_column : scattered_variant_columns)
+            scattered_columns.emplace_back(create(std::move(scattered_variant_column), variant_info, max_dynamic_types));
+
+        return scattered_columns;
+    }
+
+    int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
+
+    bool hasEqualValues() const override
+    {
+        return variant_column->hasEqualValues();
+    }
+
+    void getExtremes(Field & min, Field & max) const override
+    {
+        variant_column->getExtremes(min, max);
+    }
+
+    void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
+                        size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override
+    {
+        variant_column->getPermutation(direction, stability, limit, nan_direction_hint, res);
+    }
+
+    void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
+                           size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override
+    {
+        variant_column->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
+    }
+
+    void reserve(size_t n) override
+    {
+        variant_column->reserve(n);
+    }
+
+    void ensureOwnership() override
+    {
+        variant_column->ensureOwnership();
+    }
+
+    size_t byteSize() const override
+    {
+        return variant_column->byteSize();
+    }
+
+    size_t byteSizeAt(size_t n) const override
+    {
+        return variant_column->byteSizeAt(n);
+    }
+
+    size_t allocatedBytes() const override
+    {
+        return variant_column->allocatedBytes();
+    }
+
+    void protect() override
+    {
+        variant_column->protect();
+    }
+
+    void forEachSubcolumn(MutableColumnCallback callback) override
+    {
+        callback(variant_column);
+    }
+
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
+    {
+        callback(*variant_column);
+        variant_column->forEachSubcolumnRecursively(callback);
+    }
+
+    bool structureEquals(const IColumn & rhs) const override
+    {
+        if (const auto * rhs_concrete = typeid_cast<const ColumnDynamic *>(&rhs))
+            return max_dynamic_types == rhs_concrete->max_dynamic_types;
+        return false;
+    }
+
+    ColumnPtr compress() const override;
+
+    double getRatioOfDefaultRows(double sample_ratio) const override
+    {
+        return variant_column->getRatioOfDefaultRows(sample_ratio);
+    }
+
+    UInt64 getNumberOfDefaultRows() const override
+    {
+        return variant_column->getNumberOfDefaultRows();
+    }
+
+    void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
+    {
+        variant_column->getIndicesOfNonDefaultRows(indices, from, limit);
+    }
+
+    void finalize() override
+    {
+        variant_column->finalize();
+    }
+
+    bool isFinalized() const override
+    {
+        return variant_column->isFinalized();
+    }
+
+    /// Apply null map to a nested Variant column.
+    void applyNullMap(const ColumnVector<UInt8>::Container & null_map);
+    void applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map);
+
+    const VariantInfo & getVariantInfo() const { return variant_info; }
+
+    const ColumnPtr & getVariantColumnPtr() const { return variant_column; }
+    ColumnPtr & getVariantColumnPtr() { return variant_column; }
+
+    const ColumnVariant & getVariantColumn() const { return assert_cast<const ColumnVariant &>(*variant_column); }
+    ColumnVariant & getVariantColumn() { return assert_cast<ColumnVariant &>(*variant_column); }
+
+    bool addNewVariant(const DataTypePtr & new_variant);
+    void addStringVariant();
+
+    bool hasDynamicStructure() const override { return true; }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
+    const Statistics & getStatistics() const { return statistics; }
+
+    size_t getMaxDynamicTypes() const { return max_dynamic_types; }
+
+private:
+    /// Combine current variant with the other variant and return global discriminators mapping
+    /// from other variant to the combined one. It's used for inserting from
+    /// different variants.
+    /// Returns nullptr if maximum number of Variants is reached and tne new Variant cannot be created.
+    std::vector<UInt8> * combineVariants(const VariantInfo & other_variant_info);
+
+    void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type);
+
+    WrappedPtr variant_column;
+    /// Store the type of current variant with some additional information.
+    VariantInfo variant_info;
+    /// Maximum number of different types that can be stored in Dynamic.
+    /// If exceeded, all new variants will be converted to String.
+    size_t max_dynamic_types;
+
+    /// Size statistics of each variants from MergeTree data part.
+    /// Used in takeDynamicStructureFromSourceColumns and set during deserialization.
+    Statistics statistics;
+
+    std::unordered_map<String, std::vector<UInt8>> variant_mappings_cache;
+    std::unordered_set<String> variants_with_failed_combination;
+};
+
+}
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 57e8ba685b4..48e8bced23a 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -312,4 +312,13 @@ ColumnPtr ColumnMap::compress() const
     });
 }
 
+void ColumnMap::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    Columns nested_source_columns;
+    nested_source_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+        nested_source_columns.push_back(assert_cast<const ColumnMap &>(*source_column).getNestedColumnPtr());
+    nested->takeDynamicStructureFromSourceColumns(nested_source_columns);
+}
+
 }
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index 60aa69e7bf6..52165d0d74e 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -104,6 +104,9 @@ public:
     ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
 
     ColumnPtr compress() const override;
+
+    bool hasDynamicStructure() const override { return nested->hasDynamicStructure(); }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
 };
 
 }
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index fa5fdfb8c21..4474816601e 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -835,6 +835,15 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const
     return res;
 }
 
+void ColumnNullable::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    Columns nested_source_columns;
+    nested_source_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+        nested_source_columns.push_back(assert_cast<const ColumnNullable &>(*source_column).getNestedColumnPtr());
+    nested_column->takeDynamicStructureFromSourceColumns(nested_source_columns);
+}
+
 ColumnPtr makeNullable(const ColumnPtr & column)
 {
     if (isColumnNullable(*column))
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index ef4bf4fa41b..73bd75527f8 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -186,6 +186,9 @@ public:
     /// Check that size of null map equals to size of nested column.
     void checkConsistency() const;
 
+    bool hasDynamicStructure() const override { return nested_column->hasDynamicStructure(); }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
 private:
     WrappedPtr nested_column;
     WrappedPtr null_map;
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index b9a173fd92c..4acd162e52f 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -801,6 +801,15 @@ ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const
     return Iterator(offsets_data, _size, current_offset, n);
 }
 
+void ColumnSparse::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    Columns values_source_columns;
+    values_source_columns.reserve(source_columns.size());
+    for (const auto & source_column : source_columns)
+        values_source_columns.push_back(assert_cast<const ColumnSparse &>(*source_column).getValuesPtr());
+    values->takeDynamicStructureFromSourceColumns(values_source_columns);
+}
+
 ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
 {
     if (!column)
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index c1bd614102c..7d3200da35f 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -148,6 +148,9 @@ public:
     size_t sizeOfValueIfFixed() const override { return values->sizeOfValueIfFixed() + values->sizeOfValueIfFixed(); }
     bool isCollationSupported() const override { return values->isCollationSupported(); }
 
+    bool hasDynamicStructure() const override { return values->hasDynamicStructure(); }
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
     size_t getNumberOfTrailingDefaults() const
     {
         return offsets->empty() ? _size : _size - getOffsetsData().back() - 1;
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 062bdadf9d2..4e8e4063157 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -572,6 +572,34 @@ bool ColumnTuple::isCollationSupported() const
     return false;
 }
 
+bool ColumnTuple::hasDynamicStructure() const
+{
+    for (const auto & column : columns)
+    {
+        if (column->hasDynamicStructure())
+            return true;
+    }
+    return false;
+}
+
+void ColumnTuple::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    std::vector<Columns> nested_source_columns;
+    nested_source_columns.resize(columns.size());
+    for (size_t i = 0; i != columns.size(); ++i)
+        nested_source_columns[i].reserve(source_columns.size());
+
+    for (const auto & source_column : source_columns)
+    {
+        const auto & nsource_columns = assert_cast<const ColumnTuple &>(*source_column).getColumns();
+        for (size_t i = 0; i != nsource_columns.size(); ++i)
+            nested_source_columns[i].push_back(nsource_columns[i]);
+    }
+
+    for (size_t i = 0; i != columns.size(); ++i)
+        columns[i]->takeDynamicStructureFromSourceColumns(nested_source_columns[i]);
+}
+
 
 ColumnPtr ColumnTuple::compress() const
 {
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index 5b626155754..65103fa8c49 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -114,6 +114,9 @@ public:
     const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
     ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; }
 
+    bool hasDynamicStructure() const override;
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
 private:
     int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
 
diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 31e9b0964f4..819491f7fd9 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -12,7 +12,6 @@
 #include <Common/Arena.h>
 #include <Common/SipHash.h>
 #include <Common/HashTable/Hash.h>
-#include <DataTypes/Serializations/SerializationInfoTuple.h>
 #include <Columns/MaskOperations.h>
 
 
@@ -452,16 +451,18 @@ bool ColumnVariant::tryInsert(const DB::Field & x)
     return false;
 }
 
-void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
+void ColumnVariant::insertFromImpl(const DB::IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
 {
+    const size_t num_variants = variants.size();
     const ColumnVariant & src = assert_cast<const ColumnVariant &>(src_);
 
-    const size_t num_variants = variants.size();
-    if (src.variants.size() != num_variants)
+    if (!global_discriminators_mapping && src.variants.size() != num_variants)
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types");
 
-    /// Remember that src column can have different local variants order.
-    Discriminator global_discr = src.globalDiscriminatorAt(n);
+    Discriminator src_global_discr = src.globalDiscriminatorAt(n);
+    Discriminator global_discr = src_global_discr;
+    if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
+        global_discr = (*global_discriminators_mapping)[src_global_discr];
     Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
     getLocalDiscriminators().push_back(local_discr);
     if (local_discr == NULL_DISCRIMINATOR)
@@ -471,25 +472,15 @@ void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
     else
     {
         getOffsets().push_back(variants[local_discr]->size());
-        variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(global_discr), src.offsetAt(n));
+        variants[local_discr]->insertFrom(src.getVariantByGlobalDiscriminator(src_global_discr), src.offsetAt(n));
     }
 }
 
-void ColumnVariant::insertIntoVariant(const DB::Field & x, Discriminator global_discr)
-{
-    if (global_discr > variants.size())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid global discriminator: {}. The number of variants is {}", size_t(global_discr), variants.size());
-    auto & variant = getVariantByGlobalDiscriminator(global_discr);
-    variant.insert(x);
-    getLocalDiscriminators().push_back(localDiscriminatorByGlobal(global_discr));
-    getOffsets().push_back(variant.size() - 1);
-}
-
-void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
+void ColumnVariant::insertRangeFromImpl(const DB::IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
 {
     const size_t num_variants = variants.size();
     const auto & src = assert_cast<const ColumnVariant &>(src_);
-    if (src.variants.size() != num_variants)
+    if (!global_discriminators_mapping && src.variants.size() != num_variants)
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types");
 
     if (start + length > src.getLocalDiscriminators().size())
@@ -507,7 +498,12 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
     /// In this case we can simply call insertRangeFrom on this single variant.
     if (auto non_empty_src_local_discr = src.getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
     {
-        auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(*non_empty_src_local_discr));
+        Discriminator src_global_discr = src.globalDiscriminatorByLocal(*non_empty_src_local_discr);
+        Discriminator global_discr = src_global_discr;
+        if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
+            global_discr = (*global_discriminators_mapping)[src_global_discr];
+
+        Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
         size_t offset = variants[local_discr]->size();
         variants[local_discr]->insertRangeFrom(*src.variants[*non_empty_src_local_discr], start, length);
         getLocalDiscriminators().resize_fill(local_discriminators->size() + length, local_discr);
@@ -522,7 +518,7 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
     /// collect ranges we need to insert for all variants and update offsets.
     /// nested_ranges[i].first - offset in src.variants[i]
     /// nested_ranges[i].second - length in src.variants[i]
-    std::vector<std::pair<size_t, size_t>> nested_ranges(num_variants, {0, 0});
+    std::vector<std::pair<size_t, size_t>> nested_ranges(src.variants.size(), {0, 0});
     auto & offsets_data = getOffsets();
     offsets_data.reserve(offsets_data.size() + length);
     auto & local_discriminators_data = getLocalDiscriminators();
@@ -533,7 +529,11 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
     {
         /// We insert from src.variants[src_local_discr] to variants[local_discr]
         Discriminator src_local_discr = src_local_discriminators_data[i];
-        Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr));
+        Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr);
+        Discriminator global_discr = src_global_discr;
+        if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
+            global_discr = (*global_discriminators_mapping)[src_global_discr];
+        Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
         local_discriminators_data.push_back(local_discr);
         if (local_discr == NULL_DISCRIMINATOR)
         {
@@ -553,22 +553,29 @@ void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t l
     for (size_t src_local_discr = 0; src_local_discr != nested_ranges.size(); ++src_local_discr)
     {
         auto [nested_start, nested_length] = nested_ranges[src_local_discr];
-        auto local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr));
+        Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr);
+        Discriminator global_discr = src_global_discr;
+        if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
+            global_discr = (*global_discriminators_mapping)[src_global_discr];
+        Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
         if (nested_length)
             variants[local_discr]->insertRangeFrom(*src.variants[src_local_discr], nested_start, nested_length);
     }
 }
 
-void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
+void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping)
 {
     const size_t num_variants = variants.size();
     const auto & src = assert_cast<const ColumnVariant &>(src_);
-    if (src.variants.size() != num_variants)
+    if (!global_discriminators_mapping && src.variants.size() != num_variants)
         throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert value of Variant type with different number of types");
 
-    /// Remember that src column can have different local variants order.
     Discriminator src_local_discr = src.localDiscriminatorAt(position);
-    Discriminator local_discr = localDiscriminatorByGlobal(src.globalDiscriminatorByLocal(src_local_discr));
+    Discriminator src_global_discr = src.globalDiscriminatorByLocal(src_local_discr);
+    Discriminator global_discr = src_global_discr;
+    if (global_discriminators_mapping && src_global_discr != NULL_DISCRIMINATOR)
+        global_discr = (*global_discriminators_mapping)[src_global_discr];
+    Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
     auto & local_discriminators_data = getLocalDiscriminators();
     local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr);
 
@@ -588,6 +595,72 @@ void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, si
     }
 }
 
+void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
+{
+    insertFromImpl(src_, n, nullptr);
+}
+
+void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
+{
+    insertRangeFromImpl(src_, start, length, nullptr);
+}
+
+void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
+{
+    insertManyFromImpl(src_, position, length, nullptr);
+}
+
+void ColumnVariant::insertFrom(const DB::IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
+{
+    insertFromImpl(src_, n, &global_discriminators_mapping);
+}
+
+void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
+{
+    insertRangeFromImpl(src_, start, length, &global_discriminators_mapping);
+}
+
+void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping)
+{
+    insertManyFromImpl(src_, position, length, &global_discriminators_mapping);
+}
+
+void ColumnVariant::insertIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t n)
+{
+    Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
+    getLocalDiscriminators().push_back(local_discr);
+    getOffsets().push_back(variants[local_discr]->size());
+    variants[local_discr]->insertFrom(src_, n);
+}
+
+void ColumnVariant::insertRangeIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t start, size_t length)
+{
+    Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
+    auto & local_discriminators_data = getLocalDiscriminators();
+    local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr);
+    auto & offsets_data = getOffsets();
+    size_t offset = variants[local_discr]->size();
+    offsets_data.reserve(offsets_data.size() + length);
+    for (size_t i = 0; i != length; ++i)
+        offsets_data.push_back(offset + i);
+
+    variants[local_discr]->insertRangeFrom(src_, start, length);
+}
+
+void ColumnVariant::insertManyIntoVariantFrom(DB::ColumnVariant::Discriminator global_discr, const DB::IColumn & src_, size_t position, size_t length)
+{
+    Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
+    auto & local_discriminators_data = getLocalDiscriminators();
+    local_discriminators_data.resize_fill(local_discriminators_data.size() + length, local_discr);
+    auto & offsets_data = getOffsets();
+    size_t offset = variants[local_discr]->size();
+    offsets_data.reserve(offsets_data.size() + length);
+    for (size_t i = 0; i != length; ++i)
+        offsets_data.push_back(offset + i);
+
+    variants[local_discr]->insertManyFrom(src_, position, length);
+}
+
 void ColumnVariant::insertDefault()
 {
     getLocalDiscriminators().push_back(NULL_DISCRIMINATOR);
@@ -678,6 +751,14 @@ const char * ColumnVariant::deserializeAndInsertFromArena(const char * pos)
     return variants[local_discr]->deserializeAndInsertFromArena(pos);
 }
 
+const char * ColumnVariant::deserializeVariantAndInsertFromArena(DB::ColumnVariant::Discriminator global_discr, const char * pos)
+{
+    Discriminator local_discr = localDiscriminatorByGlobal(global_discr);
+    getLocalDiscriminators().push_back(local_discr);
+    getOffsets().push_back(variants[local_discr]->size());
+    return variants[local_discr]->deserializeAndInsertFromArena(pos);
+}
+
 const char * ColumnVariant::skipSerializedInArena(const char * pos) const
 {
     Discriminator global_discr = unalignedLoad<Discriminator>(pos);
@@ -1426,4 +1507,54 @@ void ColumnVariant::applyNullMapImpl(const ColumnVector<UInt8>::Container & null
     }
 }
 
+void ColumnVariant::extend(const std::vector<Discriminator> & old_to_new_global_discriminators, std::vector<std::pair<MutableColumnPtr, Discriminator>> && new_variants_and_discriminators)
+{
+    /// Update global discriminators for current variants.
+    for (Discriminator & global_discr : local_to_global_discriminators)
+        global_discr = old_to_new_global_discriminators[global_discr];
+
+    /// Add new variants.
+    variants.reserve(variants.size() + new_variants_and_discriminators.size());
+    local_to_global_discriminators.reserve(local_to_global_discriminators.size() + new_variants_and_discriminators.size());
+    for (auto & new_variant_and_discriminator : new_variants_and_discriminators)
+    {
+        variants.emplace_back(std::move(new_variant_and_discriminator.first));
+        local_to_global_discriminators.push_back(new_variant_and_discriminator.second);
+    }
+
+    /// Update global -> local discriminators matching.
+    global_to_local_discriminators.resize(local_to_global_discriminators.size());
+    for (Discriminator local_discr = 0; local_discr != local_to_global_discriminators.size(); ++local_discr)
+        global_to_local_discriminators[local_to_global_discriminators[local_discr]] = local_discr;
+}
+
+bool ColumnVariant::hasDynamicStructure() const
+{
+    for (const auto & variant : variants)
+    {
+        if (variant->hasDynamicStructure())
+            return true;
+    }
+
+    return false;
+}
+
+void ColumnVariant::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+{
+    std::vector<Columns> variants_source_columns;
+    variants_source_columns.resize(variants.size());
+    for (size_t i = 0; i != variants.size(); ++i)
+        variants_source_columns[i].reserve(source_columns.size());
+
+    for (const auto & source_column : source_columns)
+    {
+        const auto & source_variants = assert_cast<const ColumnVariant &>(*source_column).variants;
+        for (size_t i = 0; i != source_variants.size(); ++i)
+            variants_source_columns[i].push_back(source_variants[i]);
+    }
+
+    for (size_t i = 0; i != variants.size(); ++i)
+        variants[i]->takeDynamicStructureFromSourceColumns(variants_source_columns[i]);
+}
+
 }
diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h
index 4aa2c9058cc..8f703ea17d9 100644
--- a/src/Columns/ColumnVariant.h
+++ b/src/Columns/ColumnVariant.h
@@ -175,18 +175,32 @@ public:
     bool isDefaultAt(size_t n) const override;
     bool isNullAt(size_t n) const override;
     StringRef getDataAt(size_t n) const override;
+
     void insertData(const char * pos, size_t length) override;
     void insert(const Field & x) override;
     bool tryInsert(const Field & x) override;
-    void insertIntoVariant(const Field & x, Discriminator global_discr);
+
     void insertFrom(const IColumn & src_, size_t n) override;
-    void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
-    void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
+    void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
+    void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
+
+    /// Methods for insertion from another Variant but with known mapping between global discriminators.
+    void insertFrom(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
+    void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
+    void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
+
+    /// Methods for insertrion into a specific variant.
+    void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n);
+    void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length);
+    void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length);
+
     void insertDefault() override;
     void insertManyDefaults(size_t length) override;
+
     void popBack(size_t n) override;
     StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
     const char * deserializeAndInsertFromArena(const char * pos) override;
+    const char * deserializeVariantAndInsertFromArena(Discriminator global_discr, const char * pos);
     const char * skipSerializedInArena(const char * pos) const override;
     void updateHashWithValue(size_t n, SipHash & hash) const override;
     void updateWeakHash32(WeakHash32 & hash) const override;
@@ -234,6 +248,8 @@ public:
     ColumnPtr & getVariantPtrByLocalDiscriminator(size_t discr) { return variants[discr]; }
     ColumnPtr & getVariantPtrByGlobalDiscriminator(size_t discr) { return variants[global_to_local_discriminators.at(discr)]; }
 
+    const NestedColumns & getVariants() const { return variants; }
+
     const IColumn & getLocalDiscriminatorsColumn() const { return *local_discriminators; }
     IColumn & getLocalDiscriminatorsColumn() { return *local_discriminators; }
 
@@ -282,7 +298,19 @@ public:
     void applyNullMap(const ColumnVector<UInt8>::Container & null_map);
     void applyNegatedNullMap(const ColumnVector<UInt8>::Container & null_map);
 
+    /// Extend current column with new variants. Change global discriminators of current variants to the new
+    /// according to the mapping and add new variants with new global discriminators.
+    /// This extension doesn't rewrite any data, just adds new empty variants and modifies global/local discriminators matching.
+    void extend(const std::vector<Discriminator> & old_to_new_global_discriminators, std::vector<std::pair<MutableColumnPtr, Discriminator>> && new_variants_and_discriminators);
+
+    bool hasDynamicStructure() const override;
+    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
+
 private:
+    void insertFromImpl(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
+    void insertRangeFromImpl(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
+    void insertManyFromImpl(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> * global_discriminators_mapping);
+
     void initIdentityGlobalToLocalDiscriminatorsMapping();
 
     template <bool inverted>
diff --git a/src/Columns/IColumn.cpp b/src/Columns/IColumn.cpp
index 18974e49760..479fd7de1bc 100644
--- a/src/Columns/IColumn.cpp
+++ b/src/Columns/IColumn.cpp
@@ -16,6 +16,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnVector.h>
 #include <Core/Field.h>
 #include <DataTypes/Serializations/SerializationInfo.h>
@@ -461,6 +462,7 @@ template class IColumnHelper<ColumnAggregateFunction, IColumn>;
 template class IColumnHelper<ColumnFunction, IColumn>;
 template class IColumnHelper<ColumnCompressed, IColumn>;
 template class IColumnHelper<ColumnVariant, IColumn>;
+template class IColumnHelper<ColumnDynamic, IColumn>;
 
 template class IColumnHelper<IColumnDummy, IColumn>;
 
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index cea8d7c9f55..33f398474ed 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -534,6 +534,8 @@ public:
         return res;
     }
 
+    virtual bool hasDynamicStructure() const { return false; }
+    virtual void takeDynamicStructureFromSourceColumns(const std::vector<Ptr> & /*source_columns*/) {}
 
     /** Some columns can contain another columns inside.
       * So, we have a tree of columns. But not all combinations are possible.
diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp
new file mode 100644
index 00000000000..4c209f7d8a9
--- /dev/null
+++ b/src/Columns/tests/gtest_column_dynamic.cpp
@@ -0,0 +1,652 @@
+#include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnString.h>
+#include <Common/Arena.h>
+#include <gtest/gtest.h>
+
+using namespace DB;
+
+TEST(ColumnDynamic, CreateEmpty)
+{
+    auto column = ColumnDynamic::create(255);
+    ASSERT_TRUE(column->empty());
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
+    ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
+}
+
+TEST(ColumnDynamic, InsertDefault)
+{
+    auto column = ColumnDynamic::create(255);
+    column->insertDefault();
+    ASSERT_TRUE(column->size() == 1);
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant()");
+    ASSERT_TRUE(column->getVariantInfo().variant_names.empty());
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.empty());
+    ASSERT_TRUE(column->isNullAt(0));
+    ASSERT_EQ((*column)[0], Field(Null()));
+}
+
+TEST(ColumnDynamic, InsertFields)
+{
+    auto column = ColumnDynamic::create(255);
+    column->insert(Field(42));
+    column->insert(Field(-42));
+    column->insert(Field("str1"));
+    column->insert(Field(Null()));
+    column->insert(Field(42.42));
+    column->insert(Field(43));
+    column->insert(Field(-43));
+    column->insert(Field("str2"));
+    column->insert(Field(Null()));
+    column->insert(Field(43.43));
+    ASSERT_TRUE(column->size() == 10);
+
+    ASSERT_EQ(column->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
+    std::vector<String> expected_names = {"Float64", "Int8", "String"};
+    ASSERT_EQ(column->getVariantInfo().variant_names, expected_names);
+    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+}
+
+ColumnDynamic::MutablePtr getDynamicWithManyVariants(size_t num_variants, Field tuple_element = Field(42))
+{
+    auto column = ColumnDynamic::create(255);
+    for (size_t i = 0; i != num_variants; ++i)
+    {
+        Tuple tuple;
+        for (size_t j = 0; j != i + 1; ++j)
+            tuple.push_back(tuple_element);
+        column->insert(tuple);
+    }
+
+    return column;
+}
+
+TEST(ColumnDynamic, InsertFieldsOverflow1)
+{
+    auto column = getDynamicWithManyVariants(253);
+
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 253);
+
+    column->insert(Field(42.42));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+
+    column->insert(Field(42));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    Field field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "42");
+
+    column->insert(Field(43));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "43");
+
+    column->insert(Field("str1"));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "str1");
+
+    column->insert(Field(Array({Field(42), Field(43)})));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "[42, 43]");
+}
+
+TEST(ColumnDynamic, InsertFieldsOverflow2)
+{
+    auto column = getDynamicWithManyVariants(254);
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 254);
+
+    column->insert(Field("str1"));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+
+    column->insert(Field(42));
+    ASSERT_EQ(column->getVariantInfo().variant_names.size(), 255);
+    ASSERT_FALSE(column->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    Field field = (*column)[column->size() - 1];
+    ASSERT_EQ(field, "42");
+}
+
+ColumnDynamic::MutablePtr getInsertFromColumn(size_t num = 1)
+{
+    auto column_from = ColumnDynamic::create(255);
+    for (size_t i = 0; i != num; ++i)
+    {
+        column_from->insert(Field(42));
+        column_from->insert(Field(42.42));
+        column_from->insert(Field("str"));
+    }
+    return column_from;
+}
+
+void checkInsertFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
+{
+    column_to->insertFrom(*column_from, 0);
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+    auto field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertFrom(*column_from, 1);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42.42);
+
+    column_to->insertFrom(*column_from, 2);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+}
+
+TEST(ColumnDynamic, InsertFrom1)
+{
+    auto column_to = ColumnDynamic::create(255);
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertFrom2)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str"));
+
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertFrom3)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str"));
+    column_to->insert(Array({Field(42)}));
+
+    checkInsertFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+}
+
+TEST(ColumnDynamic, InsertFromOverflow1)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertFrom(*column_from, 0);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    auto field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertFrom(*column_from, 1);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "42.42");
+
+    column_to->insertFrom(*column_from, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+}
+
+TEST(ColumnDynamic, InsertFromOverflow2)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertFrom(*column_from, 0);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    auto field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertFrom(*column_from, 1);
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "42.42");
+}
+
+void checkInsertManyFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
+{
+    column_to->insertManyFrom(*column_from, 0, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+    auto field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertManyFrom(*column_from, 1, 2);
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42.42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42.42);
+
+    column_to->insertManyFrom(*column_from, 2, 2);
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, "str");
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+}
+
+TEST(ColumnDynamic, InsertManyFrom1)
+{
+    auto column_to = ColumnDynamic::create(255);
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertManyFrom2)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str"));
+
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertManyFrom3)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str"));
+    column_to->insert(Array({Field(42)}));
+
+    checkInsertManyFrom(getInsertFromColumn(), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+}
+
+TEST(ColumnDynamic, InsertManyFromOverflow1)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertManyFrom(*column_from, 0, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    auto field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertManyFrom(*column_from, 1, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, "42.42");
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "42.42");
+
+    column_to->insertManyFrom(*column_from, 2, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, "str");
+}
+
+TEST(ColumnDynamic, InsertManyFromOverflow2)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertManyFrom(*column_from, 0, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 254);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    auto field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, 42);
+
+    column_to->insertManyFrom(*column_from, 1, 2);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, "42.42");
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "42.42");
+}
+
+void checkInsertRangeFrom(const ColumnDynamic::MutablePtr & column_from, ColumnDynamic::MutablePtr & column_to, const std::string & expected_variant, const std::vector<String> & expected_names, const std::unordered_map<String, UInt8> & expected_variant_name_to_discriminator)
+{
+    column_to->insertRangeFrom(*column_from, 0, 3);
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+    auto field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42.42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+
+    column_to->insertRangeFrom(*column_from, 3, 3);
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, 42);
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, 42.42);
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, "str");
+
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), expected_variant);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+}
+
+TEST(ColumnDynamic, InsertRangeFrom1)
+{
+    auto column_to = ColumnDynamic::create(255);
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertRangeFrom2)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str1"));
+
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Float64, Int8, String)", {"Float64", "Int8", "String"}, {{"Float64", 0}, {"Int8", 1}, {"String", 2}});
+}
+
+TEST(ColumnDynamic, InsertRangeFrom3)
+{
+    auto column_to = ColumnDynamic::create(255);
+    column_to->insert(Field(42));
+    column_to->insert(Field(42.42));
+    column_to->insert(Field("str1"));
+    column_to->insert(Array({Field(42)}));
+
+    checkInsertRangeFrom(getInsertFromColumn(2), column_to, "Variant(Array(Int8), Float64, Int8, String)", {"Array(Int8)", "Float64", "Int8", "String"}, {{"Array(Int8)", 0}, {"Float64", 1}, {"Int8", 2}, {"String", 3}});
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow1)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(43));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertRangeFrom(*column_from, 0, 4);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    auto field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("42.42"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("str"));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow2)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(43));
+    column_from->insert(Field(42.42));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertRangeFrom(*column_from, 0, 3);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    auto field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("42.42"));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow3)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(43));
+    column_from->insert(Field(42.42));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insert(Field("Str"));
+    column_to->insertRangeFrom(*column_from, 0, 3);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    auto field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("42.42"));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow4)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+
+    auto column_to = getDynamicWithManyVariants(254);
+    column_to->insertRangeFrom(*column_from, 0, 3);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    auto field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field("42"));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("42.42"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("str"));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow5)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(43));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insert(Field("str"));
+    column_to->insertRangeFrom(*column_from, 0, 4);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    auto field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(42));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("42.42"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("str"));
+}
+
+TEST(ColumnDynamic, InsertRangeFromOverflow6)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(43));
+    column_from->insert(Field(44));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field(43.43));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Array({Field(42)})));
+
+    auto column_to = getDynamicWithManyVariants(253);
+    column_to->insertRangeFrom(*column_from, 2, 5);
+    ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    auto field = (*column_to)[column_to->size() - 5];
+
+    ASSERT_EQ(field, Field("44"));
+    field = (*column_to)[column_to->size() - 4];
+    ASSERT_EQ(field, Field(42.42));
+    field = (*column_to)[column_to->size() - 3];
+    ASSERT_EQ(field, Field(43.43));
+    field = (*column_to)[column_to->size() - 2];
+    ASSERT_EQ(field, Field("str"));
+    field = (*column_to)[column_to->size() - 1];
+    ASSERT_EQ(field, Field("[42]"));
+}
+
+TEST(ColumnDynamic, SerializeDeserializeFromArena1)
+{
+    auto column = ColumnDynamic::create(255);
+    column->insert(Field(42));
+    column->insert(Field(42.42));
+    column->insert(Field("str"));
+    column->insert(Field(Null()));
+
+    Arena arena;
+    const char * pos = nullptr;
+    auto ref1 = column->serializeValueIntoArena(0, arena, pos);
+    column->serializeValueIntoArena(1, arena, pos);
+    column->serializeValueIntoArena(2, arena, pos);
+    column->serializeValueIntoArena(3, arena, pos);
+    pos = column->deserializeAndInsertFromArena(ref1.data);
+    pos = column->deserializeAndInsertFromArena(pos);
+    pos = column->deserializeAndInsertFromArena(pos);
+    column->deserializeAndInsertFromArena(pos);
+
+    ASSERT_EQ((*column)[column->size() - 4], 42);
+    ASSERT_EQ((*column)[column->size() - 3], 42.42);
+    ASSERT_EQ((*column)[column->size() - 2], "str");
+    ASSERT_EQ((*column)[column->size() - 1], Null());
+}
+
+TEST(ColumnDynamic, SerializeDeserializeFromArena2)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Null()));
+
+    Arena arena;
+    const char * pos = nullptr;
+    auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
+    column_from->serializeValueIntoArena(1, arena, pos);
+    column_from->serializeValueIntoArena(2, arena, pos);
+    column_from->serializeValueIntoArena(3, arena, pos);
+
+    auto column_to = ColumnDynamic::create(255);
+    pos = column_to->deserializeAndInsertFromArena(ref1.data);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    column_to->deserializeAndInsertFromArena(pos);
+
+    ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
+    ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
+    ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
+    ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
+    ASSERT_EQ(column_to->getVariantInfo().variant_type->getName(), "Variant(Float64, Int8, String)");
+    std::vector<String> expected_names = {"Float64", "Int8", "String"};
+    ASSERT_EQ(column_to->getVariantInfo().variant_names, expected_names);
+    std::unordered_map<String, UInt8> expected_variant_name_to_discriminator = {{"Float64", 0}, {"Int8", 1}, {"String", 2}};
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator == expected_variant_name_to_discriminator);
+}
+
+TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Null()));
+
+    Arena arena;
+    const char * pos = nullptr;
+    auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
+    column_from->serializeValueIntoArena(1, arena, pos);
+    column_from->serializeValueIntoArena(2, arena, pos);
+    column_from->serializeValueIntoArena(3, arena, pos);
+
+    auto column_to = getDynamicWithManyVariants(253);
+    pos = column_to->deserializeAndInsertFromArena(ref1.data);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    pos = column_to->deserializeAndInsertFromArena(pos);
+    column_to->deserializeAndInsertFromArena(pos);
+
+    ASSERT_EQ((*column_from)[column_from->size() - 4], 42);
+    ASSERT_EQ((*column_from)[column_from->size() - 3], 42.42);
+    ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
+    ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
+}
+
+TEST(ColumnDynamic, skipSerializedInArena)
+{
+    auto column_from = ColumnDynamic::create(255);
+    column_from->insert(Field(42));
+    column_from->insert(Field(42.42));
+    column_from->insert(Field("str"));
+    column_from->insert(Field(Null()));
+
+    Arena arena;
+    const char * pos = nullptr;
+    auto ref1 = column_from->serializeValueIntoArena(0, arena, pos);
+    column_from->serializeValueIntoArena(1, arena, pos);
+    column_from->serializeValueIntoArena(2, arena, pos);
+    auto ref4 = column_from->serializeValueIntoArena(3, arena, pos);
+
+    const char * end = ref4.data + ref4.size;
+    auto column_to = ColumnDynamic::create(255);
+    pos = column_to->skipSerializedInArena(ref1.data);
+    pos = column_to->skipSerializedInArena(pos);
+    pos = column_to->skipSerializedInArena(pos);
+    pos = column_to->skipSerializedInArena(pos);
+
+    ASSERT_EQ(pos, end);
+    ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.empty());
+    ASSERT_TRUE(column_to->getVariantInfo().variant_names.empty());
+}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 84e709294aa..7176c4d8850 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -871,6 +871,7 @@ class IColumn;
     M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \
     M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \
     M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
+    M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \
     \
     /** Experimental functions */ \
     M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
@@ -879,6 +880,7 @@ class IColumn;
     M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \
     M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
     M(Bool, allow_experimental_variant_type, false, "Allow Variant data type", 0) \
+    M(Bool, allow_experimental_dynamic_type, false, "Allow Dynamic data type", 0) \
     M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \
     M(Bool, allow_experimental_usearch_index, false, "Allows to use USearch index. Disabled by default because this feature is experimental", 0) \
     M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \
diff --git a/src/Core/TypeId.h b/src/Core/TypeId.h
index 7003e880cd5..26d9ab8595b 100644
--- a/src/Core/TypeId.h
+++ b/src/Core/TypeId.h
@@ -50,6 +50,7 @@ enum class TypeIndex
     IPv6,
     JSONPaths,
     Variant,
+    Dynamic
 };
 
 /**
diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp
index 6e5760933eb..806a1577a21 100644
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@@ -75,6 +75,27 @@ void DataTypeArray::forEachChild(const ChildCallback & callback) const
     nested->forEachChild(callback);
 }
 
+std::unique_ptr<ISerialization::SubstreamData> DataTypeArray::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const
+{
+    auto nested_type = assert_cast<const DataTypeArray &>(*data.type).nested;
+    auto nested_data = std::make_unique<ISerialization::SubstreamData>(nested_type->getDefaultSerialization());
+    nested_data->type = nested_type;
+    nested_data->column = data.column ? assert_cast<const ColumnArray &>(*data.column).getDataPtr() : nullptr;
+
+    auto nested_subcolumn_data = nested_type->getSubcolumnData(subcolumn_name, *nested_data, throw_if_null);
+    if (!nested_subcolumn_data)
+        return nullptr;
+
+    auto creator = SerializationArray::SubcolumnCreator(data.column ? assert_cast<const ColumnArray &>(*data.column).getOffsetsPtr() : nullptr);
+    auto res = std::make_unique<ISerialization::SubstreamData>();
+    res->serialization = creator.create(nested_subcolumn_data->serialization);
+    res->type = creator.create(nested_subcolumn_data->type);
+    if (data.column)
+        res->column = creator.create(nested_subcolumn_data->column);
+
+    return res;
+}
+
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.size() != 1)
diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h
index 4423f137e1a..b242d871c36 100644
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@@ -55,7 +55,12 @@ public:
     bool textCanContainOnlyValidUTF8() const override { return nested->textCanContainOnlyValidUTF8(); }
     bool isComparable() const override { return nested->isComparable(); }
     bool canBeComparedWithCollation() const override { return nested->canBeComparedWithCollation(); }
-    bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
+    bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); }
+
+    /// Array column doesn't have subcolumns by itself but allows to read subcolumns of nested column.
+    /// If nested column has dynamic subcolumns, Array of this type should also be able to read these dynamic subcolumns.
+    bool hasDynamicSubcolumnsData() const override { return nested->hasDynamicSubcolumnsData(); }
+    std::unique_ptr<SubstreamData> getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override;
 
     bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override
     {
diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp
new file mode 100644
index 00000000000..2c6b3eba906
--- /dev/null
+++ b/src/DataTypes/DataTypeDynamic.cpp
@@ -0,0 +1,144 @@
+#include <DataTypes/DataTypeDynamic.h>
+#include <DataTypes/Serializations/SerializationDynamic.h>
+#include <DataTypes/Serializations/SerializationDynamicElement.h>
+#include <DataTypes/Serializations/SerializationVariantElement.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnVariant.h>
+#include <Core/Field.h>
+#include <Parsers/IAST.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int UNEXPECTED_AST_STRUCTURE;
+}
+
+DataTypeDynamic::DataTypeDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
+{
+}
+
+MutableColumnPtr DataTypeDynamic::createColumn() const
+{
+    return ColumnDynamic::create(max_dynamic_types);
+}
+
+String DataTypeDynamic::doGetName() const
+{
+    if (max_dynamic_types == DEFAULT_MAX_DYNAMIC_TYPES)
+        return "Dynamic";
+    return "Dynamic(max_types=" + toString(max_dynamic_types) + ")";
+}
+
+Field DataTypeDynamic::getDefault() const
+{
+    return Field(Null());
+}
+
+SerializationPtr DataTypeDynamic::doGetDefaultSerialization() const
+{
+    return std::make_shared<SerializationDynamic>(max_dynamic_types);
+}
+
+static DataTypePtr create(const ASTPtr & arguments)
+{
+    if (!arguments || arguments->children.empty())
+        return std::make_shared<DataTypeDynamic>();
+
+    if (arguments->children.size() > 1)
+        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                        "Dynamic data type can have only one optional argument - the maximum number of dynamic types in a form 'Dynamic(max_types=N)");
+
+
+    const auto * argument = arguments->children[0]->as<ASTFunction>();
+    if (!argument || argument->name != "equals")
+        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Dynamic data type argument should be in a form 'max_types=N'");
+
+    auto identifier_name = argument->arguments->children[0]->as<ASTIdentifier>()->name();
+    if (identifier_name != "max_types")
+        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name);
+
+    auto literal = argument->arguments->children[1]->as<ASTLiteral>();
+
+    if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() == 0 || literal->value.get<UInt64>() > 255)
+        throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255");
+
+    return std::make_shared<DataTypeDynamic>(literal->value.get<UInt64>());
+}
+
+void registerDataTypeDynamic(DataTypeFactory & factory)
+{
+    factory.registerDataType("Dynamic", create);
+}
+
+std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnData(std::string_view subcolumn_name, const DB::IDataType::SubstreamData & data, bool throw_if_null) const
+{
+    auto [subcolumn_type_name, subcolumn_nested_name] = Nested::splitName(subcolumn_name);
+    /// Check if requested subcolumn is a valid data type.
+    auto subcolumn_type = DataTypeFactory::instance().tryGet(String(subcolumn_type_name));
+    if (!subcolumn_type)
+    {
+        if (throw_if_null)
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Dynamic type doesn't have subcolumn '{}'", subcolumn_type_name);
+        return nullptr;
+    }
+
+    std::unique_ptr<SubstreamData> res = std::make_unique<SubstreamData>(subcolumn_type->getDefaultSerialization());
+    res->type = subcolumn_type;
+    std::optional<ColumnVariant::Discriminator> discriminator;
+    if (data.column)
+    {
+        /// If column was provided, we should extract subcolumn from Dynamic column.
+        const auto & dynamic_column = assert_cast<const ColumnDynamic &>(*data.column);
+        const auto & variant_info = dynamic_column.getVariantInfo();
+        /// Check if provided Dynamic column has subcolumn of this type.
+        auto it = variant_info.variant_name_to_discriminator.find(subcolumn_type->getName());
+        if (it != variant_info.variant_name_to_discriminator.end())
+        {
+            discriminator = it->second;
+            res->column = dynamic_column.getVariantColumn().getVariantPtrByGlobalDiscriminator(*discriminator);
+        }
+    }
+
+    /// Extract nested subcolumn of requested dynamic subcolumn if needed.
+    if (!subcolumn_nested_name.empty())
+    {
+        res = getSubcolumnData(subcolumn_nested_name, *res, throw_if_null);
+        if (!res)
+            return nullptr;
+    }
+
+    res->serialization = std::make_shared<SerializationDynamicElement>(res->serialization, subcolumn_type->getName());
+    res->type = makeNullableOrLowCardinalityNullableSafe(res->type);
+    if (data.column)
+    {
+        if (discriminator)
+        {
+            /// Provided Dynamic column has subcolumn of this type, we should use VariantSubcolumnCreator to
+            /// create full subcolumn from variant according to discriminators.
+            const auto & variant_column = assert_cast<const ColumnDynamic &>(*data.column).getVariantColumn();
+            auto creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), "", *discriminator, variant_column.localDiscriminatorByGlobal(*discriminator));
+            res->column = creator.create(res->column);
+        }
+        else
+        {
+            /// Provided Dynamic column doesn't have subcolumn of this type, just create column filled with default values.
+            auto column = res->type->createColumn();
+            column->insertManyDefaults(data.column->size());
+            res->column = std::move(column);
+        }
+    }
+
+    return res;
+}
+
+}
diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h
new file mode 100644
index 00000000000..452e05061a0
--- /dev/null
+++ b/src/DataTypes/DataTypeDynamic.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <DataTypes/IDataType.h>
+
+#define DEFAULT_MAX_DYNAMIC_TYPES 32
+
+
+namespace DB
+{
+
+class DataTypeDynamic final : public IDataType
+{
+public:
+    static constexpr bool is_parametric = true;
+
+    DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES);
+
+    TypeIndex getTypeId() const override { return TypeIndex::Dynamic; }
+    const char * getFamilyName() const override { return "Dynamic"; }
+
+    bool isParametric() const override { return true; }
+    bool canBeInsideNullable() const override { return false; }
+    bool supportsSparseSerialization() const override { return false; }
+    bool canBeInsideSparseColumns() const override { return false; }
+    bool isComparable() const override { return true; }
+
+    MutableColumnPtr createColumn() const override;
+
+    Field getDefault() const override;
+
+    bool equals(const IDataType & rhs) const override
+    {
+        if (const auto * rhs_dynamic_type = typeid_cast<const DataTypeDynamic *>(&rhs))
+            return max_dynamic_types == rhs_dynamic_type->max_dynamic_types;
+        return false;
+    }
+
+    bool haveSubtypes() const override { return false; }
+
+    bool hasDynamicSubcolumnsData() const override { return true; }
+    std::unique_ptr<SubstreamData> getDynamicSubcolumnData(std::string_view subcolumn_name, const SubstreamData & data, bool throw_if_null) const override;
+
+    size_t getMaxDynamicTypes() const { return max_dynamic_types; }
+
+private:
+    SerializationPtr doGetDefaultSerialization() const override;
+    String doGetName() const override;
+
+    size_t max_dynamic_types;
+};
+
+}
+
diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp
index 844384f3c95..a94526dce60 100644
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@@ -292,6 +292,7 @@ DataTypeFactory::DataTypeFactory()
     registerDataTypeMap(*this);
     registerDataTypeObject(*this);
     registerDataTypeVariant(*this);
+    registerDataTypeDynamic(*this);
 }
 
 DataTypeFactory & DataTypeFactory::instance()
diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h
index 4727cb3ae5c..86e0203358d 100644
--- a/src/DataTypes/DataTypeFactory.h
+++ b/src/DataTypes/DataTypeFactory.h
@@ -100,5 +100,6 @@ void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
 void registerDataTypeDomainGeo(DataTypeFactory & factory);
 void registerDataTypeObject(DataTypeFactory & factory);
 void registerDataTypeVariant(DataTypeFactory & factory);
+void registerDataTypeDynamic(DataTypeFactory & factory);
 
 }
diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h
index 7281cca1bb1..4866c3e78cc 100644
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@@ -42,7 +42,7 @@ public:
     bool isComparable() const override { return key_type->isComparable() && value_type->isComparable(); }
     bool isParametric() const override { return true; }
     bool haveSubtypes() const override { return true; }
-    bool hasDynamicSubcolumns() const override { return nested->hasDynamicSubcolumns(); }
+    bool hasDynamicSubcolumnsDeprecated() const override { return nested->hasDynamicSubcolumnsDeprecated(); }
 
     const DataTypePtr & getKeyType() const { return key_type; }
     const DataTypePtr & getValueType() const { return value_type; }
diff --git a/src/DataTypes/DataTypeObject.h b/src/DataTypes/DataTypeObject.h
index 937a9091371..c610a1a8ba4 100644
--- a/src/DataTypes/DataTypeObject.h
+++ b/src/DataTypes/DataTypeObject.h
@@ -36,7 +36,7 @@ public:
     bool haveSubtypes() const override { return false; }
     bool equals(const IDataType & rhs) const override;
     bool isParametric() const override { return true; }
-    bool hasDynamicSubcolumns() const override { return true; }
+    bool hasDynamicSubcolumnsDeprecated() const override { return true; }
 
     SerializationPtr doGetDefaultSerialization() const override;
 
diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp
index 5bbd79160d4..71347011658 100644
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@@ -291,9 +291,9 @@ bool DataTypeTuple::haveMaximumSizeOfValue() const
     return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
 }
 
-bool DataTypeTuple::hasDynamicSubcolumns() const
+bool DataTypeTuple::hasDynamicSubcolumnsDeprecated() const
 {
-    return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
+    return std::any_of(elems.begin(), elems.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); });
 }
 
 bool DataTypeTuple::isComparable() const
diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h
index 15561fe4286..fd00fce5a17 100644
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@@ -52,7 +52,7 @@ public:
     bool isComparable() const override;
     bool textCanContainOnlyValidUTF8() const override;
     bool haveMaximumSizeOfValue() const override;
-    bool hasDynamicSubcolumns() const override;
+    bool hasDynamicSubcolumnsDeprecated() const override;
     size_t getMaximumSizeOfValueInMemory() const override;
     size_t getSizeOfValueInMemory() const override;
 
diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp
index db96972c00f..b918b79a2ed 100644
--- a/src/DataTypes/DataTypeVariant.cpp
+++ b/src/DataTypes/DataTypeVariant.cpp
@@ -33,6 +33,9 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nullable/LowCardinality(Nullable) types are not allowed inside Variant type");
         if (type->getTypeId() == TypeIndex::Variant)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Nested Variant types are not allowed");
+        if (type->getTypeId() == TypeIndex::Dynamic)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dynamic type is not allowed inside Variant type");
+
         /// Don't use Nothing type as a variant.
         if (!isNothing(type))
             name_to_type[type->getName()] = type;
@@ -42,9 +45,6 @@ DataTypeVariant::DataTypeVariant(const DataTypes & variants_)
     for (const auto & [_, type] : name_to_type)
         variants.push_back(type);
 
-    if (variants.empty())
-        throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
-
     if (variants.size() > ColumnVariant::MAX_NESTED_COLUMNS)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Variant type with more than {} nested types is not allowed", ColumnVariant::MAX_NESTED_COLUMNS);
 }
@@ -113,9 +113,16 @@ bool DataTypeVariant::equals(const IDataType & rhs) const
         return false;
 
     for (size_t i = 0; i < size; ++i)
+    {
         if (!variants[i]->equals(*rhs_variant.variants[i]))
             return false;
 
+        /// The same data types with different custom names considered different.
+        /// For example, UInt8 and Bool.
+        if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName())
+            return false;
+    }
+
     return true;
 }
 
@@ -129,17 +136,15 @@ bool DataTypeVariant::haveMaximumSizeOfValue() const
     return std::all_of(variants.begin(), variants.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
 }
 
-bool DataTypeVariant::hasDynamicSubcolumns() const
+bool DataTypeVariant::hasDynamicSubcolumnsDeprecated() const
 {
-    return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumns(); });
+    return std::any_of(variants.begin(), variants.end(), [](auto && elem) { return elem->hasDynamicSubcolumnsDeprecated(); });
 }
 
-std::optional<ColumnVariant::Discriminator> DataTypeVariant::tryGetVariantDiscriminator(const IDataType & type) const
+std::optional<ColumnVariant::Discriminator> DataTypeVariant::tryGetVariantDiscriminator(const String & type_name) const
 {
-    String type_name = type.getName();
     for (size_t i = 0; i != variants.size(); ++i)
     {
-        /// We don't use equals here, because it doesn't respect custom type names.
         if (variants[i]->getName() == type_name)
             return i;
     }
@@ -187,7 +192,7 @@ void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback
 static DataTypePtr create(const ASTPtr & arguments)
 {
     if (!arguments || arguments->children.empty())
-        throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Variant cannot be empty");
+        return std::make_shared<DataTypeVariant>(DataTypes{});
 
     DataTypes nested_types;
     nested_types.reserve(arguments->children.size());
diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h
index dadc85ac3b3..1b561a083b1 100644
--- a/src/DataTypes/DataTypeVariant.h
+++ b/src/DataTypes/DataTypeVariant.h
@@ -45,14 +45,14 @@ public:
     bool haveSubtypes() const override { return true; }
     bool textCanContainOnlyValidUTF8() const override;
     bool haveMaximumSizeOfValue() const override;
-    bool hasDynamicSubcolumns() const override;
+    bool hasDynamicSubcolumnsDeprecated() const override;
     size_t getMaximumSizeOfValueInMemory() const override;
 
     const DataTypePtr & getVariant(size_t i) const { return variants[i]; }
     const DataTypes & getVariants() const { return variants; }
 
     /// Check if Variant has provided type in the list of variants and return its discriminator.
-    std::optional<ColumnVariant::Discriminator> tryGetVariantDiscriminator(const IDataType & type) const;
+    std::optional<ColumnVariant::Discriminator> tryGetVariantDiscriminator(const String & type_name) const;
 
     void forEachChild(const ChildCallback & callback) const override;
 
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 344b81be960..1c9715bbf53 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -101,14 +101,12 @@ void IDataType::forEachSubcolumn(
     data.serialization->enumerateStreams(settings, callback_with_data, data);
 }
 
-template <typename Ptr>
-Ptr IDataType::getForSubcolumn(
+std::unique_ptr<IDataType::SubstreamData> IDataType::getSubcolumnData(
     std::string_view subcolumn_name,
     const SubstreamData & data,
-    Ptr SubstreamData::*member,
-    bool throw_if_null) const
+    bool throw_if_null)
 {
-    Ptr res;
+    std::unique_ptr<IDataType::SubstreamData> res;
 
     ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
     {
@@ -120,7 +118,29 @@ Ptr IDataType::getForSubcolumn(
                 auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
                 /// Create data from path only if it's requested subcolumn.
                 if (name == subcolumn_name)
-                    res = ISerialization::createFromPath(subpath, prefix_len).*member;
+                {
+                    res = std::make_unique<SubstreamData>(ISerialization::createFromPath(subpath, prefix_len));
+                }
+                /// Check if this subcolumn is a prefix of requested subcolumn and it can create dynamic subcolumns.
+                else if (subcolumn_name.starts_with(name + ".") && subpath[i].data.type && subpath[i].data.type->hasDynamicSubcolumnsData())
+                {
+                    auto dynamic_subcolumn_name = subcolumn_name.substr(name.size() + 1);
+                    auto dynamic_subcolumn_data = subpath[i].data.type->getDynamicSubcolumnData(dynamic_subcolumn_name, subpath[i].data, false);
+                    if (dynamic_subcolumn_data)
+                    {
+                        /// Create requested subcolumn using dynamic subcolumn data.
+                        auto tmp_subpath = subpath;
+                        if (tmp_subpath[i].creator)
+                        {
+                            dynamic_subcolumn_data->type = tmp_subpath[i].creator->create(dynamic_subcolumn_data->type);
+                            dynamic_subcolumn_data->column = tmp_subpath[i].creator->create(dynamic_subcolumn_data->column);
+                            dynamic_subcolumn_data->serialization = tmp_subpath[i].creator->create(dynamic_subcolumn_data->serialization);
+                        }
+
+                        tmp_subpath[i].data = *dynamic_subcolumn_data;
+                        res = std::make_unique<SubstreamData>(ISerialization::createFromPath(tmp_subpath, prefix_len));
+                    }
+                }
             }
             subpath[i].visited = true;
         }
@@ -130,8 +150,11 @@ Ptr IDataType::getForSubcolumn(
     settings.position_independent_encoding = false;
     data.serialization->enumerateStreams(settings, callback_with_data, data);
 
+    if (!res && data.type->hasDynamicSubcolumnsData())
+        return data.type->getDynamicSubcolumnData(subcolumn_name, data, throw_if_null);
+
     if (!res && throw_if_null)
-        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
+        throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, data.type->getName());
 
     return res;
 }
@@ -141,34 +164,51 @@ bool IDataType::hasSubcolumn(std::string_view subcolumn_name) const
     return tryGetSubcolumnType(subcolumn_name) != nullptr;
 }
 
+bool IDataType::hasDynamicSubcolumns() const
+{
+    if (hasDynamicSubcolumnsData())
+        return true;
+
+    bool has_dynamic_subcolumns = false;
+    auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
+    auto callback = [&](const SubstreamPath &, const String &, const SubstreamData & subcolumn_data)
+    {
+        has_dynamic_subcolumns |= subcolumn_data.type->hasDynamicSubcolumnsData();
+    };
+    forEachSubcolumn(callback, data);
+    return has_dynamic_subcolumns;
+}
+
 DataTypePtr IDataType::tryGetSubcolumnType(std::string_view subcolumn_name) const
 {
     auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
-    return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
+    auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false);
+    return subcolumn_data ? subcolumn_data->type : nullptr;
 }
 
 DataTypePtr IDataType::getSubcolumnType(std::string_view subcolumn_name) const
 {
     auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
-    return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
+    return getSubcolumnData(subcolumn_name, data, true)->type;
 }
 
 ColumnPtr IDataType::tryGetSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
 {
-    auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
-    return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
+    auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column);
+    auto subcolumn_data = getSubcolumnData(subcolumn_name, data, false);
+    return subcolumn_data ? subcolumn_data->column : nullptr;
 }
 
 ColumnPtr IDataType::getSubcolumn(std::string_view subcolumn_name, const ColumnPtr & column) const
 {
-    auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
-    return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
+    auto data = SubstreamData(getDefaultSerialization()).withType(getPtr()).withColumn(column);
+    return getSubcolumnData(subcolumn_name, data, true)->column;
 }
 
 SerializationPtr IDataType::getSubcolumnSerialization(std::string_view subcolumn_name, const SerializationPtr & serialization) const
 {
-    auto data = SubstreamData(serialization);
-    return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
+    auto data = SubstreamData(serialization).withType(getPtr());
+    return getSubcolumnData(subcolumn_name, data, true)->serialization;
 }
 
 Names IDataType::getSubcolumnNames() const
@@ -323,6 +363,7 @@ bool isMap(TYPE data_type) {return WhichDataType(data_type).isMap(); } \
 bool isInterval(TYPE data_type) {return WhichDataType(data_type).isInterval(); } \
 bool isObject(TYPE data_type) { return WhichDataType(data_type).isObject(); } \
 bool isVariant(TYPE data_type) { return WhichDataType(data_type).isVariant(); } \
+bool isDynamic(TYPE data_type) { return WhichDataType(data_type).isDynamic(); } \
 bool isNothing(TYPE data_type) { return WhichDataType(data_type).isNothing(); } \
 \
 bool isColumnedAsNumber(TYPE data_type) \
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index eaf798a3017..dde61ca3a48 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -311,8 +311,13 @@ public:
     /// Strings, Numbers, Date, DateTime, Nullable
     virtual bool canBeInsideLowCardinality() const { return false; }
 
-    /// Object, Array(Object), Tuple(..., Object, ...)
-    virtual bool hasDynamicSubcolumns() const { return false; }
+    /// Checks for deprecated Object type usage recursively: Object, Array(Object), Tuple(..., Object, ...)
+    virtual bool hasDynamicSubcolumnsDeprecated() const { return false; }
+
+    /// Checks if column has dynamic subcolumns.
+    virtual bool hasDynamicSubcolumns() const;
+    /// Checks if column can create dynamic subcolumns data and getDynamicSubcolumnData can be called.
+    virtual bool hasDynamicSubcolumnsData() const { return false; }
 
     /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
     static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
@@ -329,16 +334,25 @@ protected:
     mutable SerializationPtr custom_serialization;
 
 public:
+    bool hasCustomName() const { return static_cast<bool>(custom_name.get()); }
     const IDataTypeCustomName * getCustomName() const { return custom_name.get(); }
     const ISerialization * getCustomSerialization() const { return custom_serialization.get(); }
 
-private:
-    template <typename Ptr>
-    Ptr getForSubcolumn(
+protected:
+    static std::unique_ptr<SubstreamData> getSubcolumnData(
         std::string_view subcolumn_name,
         const SubstreamData & data,
-        Ptr SubstreamData::*member,
-        bool throw_if_null) const;
+        bool throw_if_null);
+
+    virtual std::unique_ptr<SubstreamData> getDynamicSubcolumnData(
+        std::string_view /*subcolumn_name*/,
+        const SubstreamData & /*data*/,
+        bool throw_if_null) const
+    {
+        if (throw_if_null)
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getDynamicSubcolumnData() is not implemented for type {}", getName());
+        return nullptr;
+    }
 };
 
 
@@ -423,6 +437,7 @@ struct WhichDataType
     constexpr bool isLowCardinality() const { return idx == TypeIndex::LowCardinality; }
 
     constexpr bool isVariant() const { return idx == TypeIndex::Variant; }
+    constexpr bool isDynamic() const { return idx == TypeIndex::Dynamic; }
 };
 
 /// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
@@ -483,6 +498,7 @@ bool isMap(TYPE data_type); \
 bool isInterval(TYPE data_type); \
 bool isObject(TYPE data_type); \
 bool isVariant(TYPE data_type); \
+bool isDynamic(TYPE data_type); \
 bool isNothing(TYPE data_type); \
 \
 bool isColumnedAsNumber(TYPE data_type); \
diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp
index 99cf092e6cd..107e3a50025 100644
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@@ -177,7 +177,7 @@ static std::pair<ColumnPtr, DataTypePtr> convertObjectColumnToTuple(
 static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
     const ColumnPtr & column, const DataTypePtr & type)
 {
-    if (!type->hasDynamicSubcolumns())
+    if (!type->hasDynamicSubcolumnsDeprecated())
         return {column, type};
 
     if (const auto * type_object = typeid_cast<const DataTypeObject *>(type.get()))
@@ -243,7 +243,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto
 {
     for (auto & column : block)
     {
-        if (!column.type->hasDynamicSubcolumns())
+        if (!column.type->hasDynamicSubcolumnsDeprecated())
             continue;
 
         std::tie(column.column, column.type)
@@ -417,7 +417,7 @@ static DataTypePtr getLeastCommonTypeForTuple(
 static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
     const DataTypePtr & type_in_storage, const DataTypes & concrete_types, bool check_ambiguos_paths)
 {
-    if (!type_in_storage->hasDynamicSubcolumns())
+    if (!type_in_storage->hasDynamicSubcolumnsDeprecated())
         return type_in_storage;
 
     if (isObject(type_in_storage))
@@ -459,7 +459,7 @@ DataTypePtr getLeastCommonTypeForDynamicColumns(
 
 DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage)
 {
-    if (!type_in_storage->hasDynamicSubcolumns())
+    if (!type_in_storage->hasDynamicSubcolumnsDeprecated())
         return type_in_storage;
 
     if (isObject(type_in_storage))
@@ -494,7 +494,7 @@ bool hasDynamicSubcolumns(const ColumnsDescription & columns)
     return std::any_of(columns.begin(), columns.end(),
         [](const auto & column)
         {
-            return column.type->hasDynamicSubcolumns();
+            return column.type->hasDynamicSubcolumnsDeprecated();
         });
 }
 
@@ -1065,7 +1065,7 @@ Field FieldVisitorFoldDimension::operator()(const Null & x) const
 void setAllObjectsToDummyTupleType(NamesAndTypesList & columns)
 {
     for (auto & column : columns)
-        if (column.type->hasDynamicSubcolumns())
+        if (column.type->hasDynamicSubcolumnsDeprecated())
             column.type = createConcreteEmptyDynamicColumn(column.type);
 }
 
diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h
index 3e3b1b96740..6599d8adef1 100644
--- a/src/DataTypes/ObjectUtils.h
+++ b/src/DataTypes/ObjectUtils.h
@@ -194,7 +194,7 @@ ColumnsDescription getConcreteObjectColumns(
     /// dummy column will be removed.
     for (const auto & column : storage_columns)
     {
-        if (column.type->hasDynamicSubcolumns())
+        if (column.type->hasDynamicSubcolumnsDeprecated())
             types_in_entries[column.name].push_back(createConcreteEmptyDynamicColumn(column.type));
     }
 
@@ -204,7 +204,7 @@ ColumnsDescription getConcreteObjectColumns(
         for (const auto & column : entry_columns)
         {
             auto storage_column = storage_columns.tryGetPhysical(column.name);
-            if (storage_column && storage_column->type->hasDynamicSubcolumns())
+            if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated())
                 types_in_entries[column.name].push_back(column.type);
         }
     }
diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp
index a3a28f8091c..dbe27a5f3f6 100644
--- a/src/DataTypes/Serializations/ISerialization.cpp
+++ b/src/DataTypes/Serializations/ISerialization.cpp
@@ -196,6 +196,8 @@ String getNameForSubstreamPath(
             stream_name += ".variant_offsets";
         else if (it->type == Substream::VariantElement)
             stream_name += "." + it->variant_element_name;
+        else if (it->type == SubstreamType::DynamicStructure)
+            stream_name += ".dynamic_structure";
     }
 
     return stream_name;
@@ -271,6 +273,23 @@ ColumnPtr ISerialization::getFromSubstreamsCache(SubstreamsCache * cache, const
     return it == cache->end() ? nullptr : it->second;
 }
 
+void ISerialization::addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state)
+{
+    if (!cache || path.empty())
+        return;
+
+    cache->emplace(getSubcolumnNameForStream(path), state);
+}
+
+ISerialization::DeserializeBinaryBulkStatePtr ISerialization::getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path)
+{
+    if (!cache || path.empty())
+        return nullptr;
+
+    auto it = cache->find(getSubcolumnNameForStream(path));
+    return it == cache->end() ? nullptr : it->second;
+}
+
 bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path)
 {
     for (const auto & elem : path)
diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index ebaa26d19a6..65493cf6dda 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -160,6 +160,9 @@ public:
             VariantElements,
             VariantElement,
 
+            DynamicData,
+            DynamicStructure,
+
             Regular,
         };
 
@@ -231,6 +234,8 @@ public:
     using SerializeBinaryBulkStatePtr = std::shared_ptr<SerializeBinaryBulkState>;
     using DeserializeBinaryBulkStatePtr = std::shared_ptr<DeserializeBinaryBulkState>;
 
+    using SubstreamsDeserializeStatesCache = std::unordered_map<String, DeserializeBinaryBulkStatePtr>;
+
     struct SerializeBinaryBulkSettings
     {
         OutputStreamGetter getter;
@@ -240,6 +245,14 @@ public:
         bool low_cardinality_use_single_dictionary_for_part = true;
 
         bool position_independent_encoding = true;
+
+        enum class DynamicStatisticsMode
+        {
+            NONE,   /// Don't write statistics.
+            PREFIX, /// Write statistics in prefix.
+            SUFFIX, /// Write statistics in suffix.
+        };
+        DynamicStatisticsMode dynamic_write_statistics = DynamicStatisticsMode::NONE;
     };
 
     struct DeserializeBinaryBulkSettings
@@ -256,6 +269,8 @@ public:
 
         /// If not zero, may be used to avoid reallocations while reading column of String type.
         double avg_value_size_hint = 0;
+
+        bool dynamic_read_statistics = false;
     };
 
     /// Call before serializeBinaryBulkWithMultipleStreams chain to write something before first mark.
@@ -273,7 +288,8 @@ public:
     /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr.
     virtual void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & /*settings*/,
-        DeserializeBinaryBulkStatePtr & /*state*/) const {}
+        DeserializeBinaryBulkStatePtr & /*state*/,
+        SubstreamsDeserializeStatesCache * /*cache*/) const {}
 
     /** 'offset' and 'limit' are used to specify range.
       * limit = 0 - means no limit.
@@ -393,6 +409,9 @@ public:
     static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column);
     static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path);
 
+    static void addToSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path, DeserializeBinaryBulkStatePtr state);
+    static DeserializeBinaryBulkStatePtr getFromSubstreamsDeserializeStatesCache(SubstreamsDeserializeStatesCache * cache, const SubstreamPath & path);
+
     static bool isSpecialCompressionAllowed(const SubstreamPath & path);
 
     static size_t getArrayLevel(const SubstreamPath & path);
diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp
index e8aab615849..d6546b338b5 100644
--- a/src/DataTypes/Serializations/SerializationArray.cpp
+++ b/src/DataTypes/Serializations/SerializationArray.cpp
@@ -284,10 +284,11 @@ void SerializationArray::serializeBinaryBulkStateSuffix(
 
 void SerializationArray::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     settings.path.push_back(Substream::ArrayElements);
-    nested->deserializeBinaryBulkStatePrefix(settings, state);
+    nested->deserializeBinaryBulkStatePrefix(settings, state, cache);
     settings.path.pop_back();
 }
 
diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h
index 82f5e8bce45..c3353f0c251 100644
--- a/src/DataTypes/Serializations/SerializationArray.h
+++ b/src/DataTypes/Serializations/SerializationArray.h
@@ -55,7 +55,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
             DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
             const IColumn & column,
@@ -71,7 +72,6 @@ public:
         DeserializeBinaryBulkStatePtr & state,
         SubstreamsCache * cache) const override;
 
-private:
     struct SubcolumnCreator : public ISubcolumnCreator
     {
         const ColumnPtr offsets;
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
new file mode 100644
index 00000000000..c9fe8dd6b29
--- /dev/null
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -0,0 +1,645 @@
+#include <DataTypes/Serializations/SerializationDynamic.h>
+#include <DataTypes/FieldToDataType.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeString.h>
+
+#include <Columns/ColumnDynamic.h>
+#include <Columns/ColumnString.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+#include <IO/ReadBufferFromString.h>
+#include <Interpreters/castColumn.h>
+#include <Formats/EscapingRuleUtils.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+    extern const int LOGICAL_ERROR;
+}
+
+void SerializationDynamic::enumerateStreams(
+    EnumerateStreamsSettings & settings,
+    const StreamCallback & callback,
+    const SubstreamData & data) const
+{
+    settings.path.push_back(Substream::DynamicStructure);
+    callback(settings.path);
+    settings.path.pop_back();
+
+    const auto * column_dynamic = data.column ? &assert_cast<const ColumnDynamic &>(*data.column) : nullptr;
+
+    /// If column is nullptr, nothing to enumerate as we don't have any variants.
+    if (!column_dynamic)
+        return;
+
+    const auto & variant_info = column_dynamic->getVariantInfo();
+    auto variant_serialization = variant_info.variant_type->getDefaultSerialization();
+
+    settings.path.push_back(Substream::DynamicData);
+    auto variant_data = SubstreamData(variant_serialization)
+                         .withType(variant_info.variant_type)
+                         .withColumn(column_dynamic->getVariantColumnPtr())
+                         .withSerializationInfo(data.serialization_info);
+    settings.path.back().data = variant_data;
+    variant_serialization->enumerateStreams(settings, callback, variant_data);
+    settings.path.pop_back();
+}
+
+SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast<Value>(version))
+{
+    checkVersion(version);
+}
+
+void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version)
+{
+    if (version != VariantTypeName)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization.");
+}
+
+struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState
+{
+    SerializationDynamic::DynamicStructureSerializationVersion structure_version;
+    DataTypePtr variant_type;
+    Names variant_names;
+    SerializationPtr variant_serialization;
+    ISerialization::SerializeBinaryBulkStatePtr variant_state;
+
+    /// Pointer to currently serialized dynamic column.
+    /// Used to calculate statistics for the whole column and not for some range.
+    const ColumnDynamic * current_dynamic_column = nullptr;
+
+    /// Variants statistics. Map (Variant name) -> (Variant size).
+    ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ };
+
+    SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
+
+    void updateStatistics(const ColumnVariant & column_variant)
+    {
+        for (size_t i = 0; i != variant_names.size(); ++i)
+            statistics.data[variant_names[i]] += column_variant.getVariantPtrByGlobalDiscriminator(i)->size();
+    }
+};
+
+struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState
+{
+    SerializationPtr variant_serialization;
+    ISerialization::DeserializeBinaryBulkStatePtr variant_state;
+    ISerialization::DeserializeBinaryBulkStatePtr structure_state;
+};
+
+void SerializationDynamic::serializeBinaryBulkStatePrefix(
+    const DB::IColumn & column,
+    SerializeBinaryBulkSettings & settings,
+    SerializeBinaryBulkStatePtr & state) const
+{
+    const auto & column_dynamic = assert_cast<const ColumnDynamic &>(column);
+    const auto & variant_info = column_dynamic.getVariantInfo();
+
+    settings.path.push_back(Substream::DynamicStructure);
+    auto * stream = settings.getter(settings.path);
+    settings.path.pop_back();
+
+    if (!stream)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix");
+
+    /// Write structure serialization version.
+    UInt64 structure_version = DynamicStructureSerializationVersion::Value::VariantTypeName;
+    writeBinaryLittleEndian(structure_version, *stream);
+    auto dynamic_state = std::make_shared<SerializeBinaryBulkStateDynamic>(structure_version);
+
+    dynamic_state->variant_type = variant_info.variant_type;
+    dynamic_state->variant_names = variant_info.variant_names;
+    const auto & variant_column = column_dynamic.getVariantColumn();
+
+    /// Write internal Variant type name.
+    writeStringBinary(dynamic_state->variant_type->getName(), *stream);
+
+    /// Write statistics in prefix if needed.
+    if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX)
+    {
+        const auto & statistics = column_dynamic.getStatistics();
+        for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
+        {
+            size_t size = 0;
+            /// Use statistics from column if it was created during merge.
+            if (statistics.data.empty() || statistics.source != ColumnDynamic::Statistics::Source::MERGE)
+                size = variant_column.getVariantByGlobalDiscriminator(i).size();
+            /// Otherwise we can use only variant sizes from current column.
+            else
+                size = statistics.data.at(variant_info.variant_names[i]);
+            writeVarUInt(size, *stream);
+        }
+    }
+
+    dynamic_state->variant_serialization = dynamic_state->variant_type->getDefaultSerialization();
+    settings.path.push_back(Substream::DynamicData);
+    dynamic_state->variant_serialization->serializeBinaryBulkStatePrefix(variant_column, settings, dynamic_state->variant_state);
+    settings.path.pop_back();
+
+    state = std::move(dynamic_state);
+}
+
+void SerializationDynamic::deserializeBinaryBulkStatePrefix(
+    DeserializeBinaryBulkSettings & settings,
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
+{
+    DeserializeBinaryBulkStatePtr structure_state = deserializeDynamicStructureStatePrefix(settings, cache);
+    if (!structure_state)
+        return;
+
+    auto dynamic_state = std::make_shared<DeserializeBinaryBulkStateDynamic>();
+    dynamic_state->structure_state = structure_state;
+    dynamic_state->variant_serialization = checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(structure_state)->variant_type->getDefaultSerialization();
+
+    settings.path.push_back(Substream::DynamicData);
+    dynamic_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_state->variant_state, cache);
+    settings.path.pop_back();
+
+    state = std::move(dynamic_state);
+}
+
+ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeDynamicStructureStatePrefix(
+    DeserializeBinaryBulkSettings & settings, SubstreamsDeserializeStatesCache * cache)
+{
+    settings.path.push_back(Substream::DynamicStructure);
+
+    DeserializeBinaryBulkStatePtr state = nullptr;
+    if (auto cached_state = getFromSubstreamsDeserializeStatesCache(cache, settings.path))
+    {
+        state = cached_state;
+    }
+    else if (auto * structure_stream = settings.getter(settings.path))
+    {
+        /// Read structure serialization version.
+        UInt64 structure_version;
+        readBinaryLittleEndian(structure_version, *structure_stream);
+        auto structure_state = std::make_shared<DeserializeBinaryBulkStateDynamicStructure>(structure_version);
+        /// Read internal Variant type name.
+        String data_type_name;
+        readStringBinary(data_type_name, *structure_stream);
+        structure_state->variant_type = DataTypeFactory::instance().get(data_type_name);
+        const auto * variant_type = typeid_cast<const DataTypeVariant *>(structure_state->variant_type.get());
+        if (!variant_type)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type of Dynamic nested column, expected Variant, got {}", structure_state->variant_type->getName());
+
+        /// Read statistics.
+        if (settings.dynamic_read_statistics)
+        {
+            const auto & variants = variant_type->getVariants();
+            size_t variant_size;
+            for (const auto & variant : variants)
+            {
+                readVarUInt(variant_size, *structure_stream);
+                structure_state->statistics.data[variant->getName()] = variant_size;
+            }
+        }
+
+        state = structure_state;
+        addToSubstreamsDeserializeStatesCache(cache, settings.path, state);
+    }
+
+    settings.path.pop_back();
+    return state;
+}
+
+void SerializationDynamic::serializeBinaryBulkStateSuffix(
+    SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
+{
+    auto * dynamic_state = checkAndGetState<SerializeBinaryBulkStateDynamic>(state);
+    settings.path.push_back(Substream::DynamicStructure);
+    auto * stream = settings.getter(settings.path);
+    settings.path.pop_back();
+
+    if (!stream)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for Dynamic column structure during serialization of binary bulk state prefix");
+
+    /// Write statistics in suffix if needed.
+    if (settings.dynamic_write_statistics == SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX)
+    {
+        for (const auto & variant_name : dynamic_state->variant_names)
+            writeVarUInt(dynamic_state->statistics.data[variant_name], *stream);
+    }
+
+    settings.path.push_back(Substream::DynamicData);
+    dynamic_state->variant_serialization->serializeBinaryBulkStateSuffix(settings, dynamic_state->variant_state);
+    settings.path.pop_back();
+}
+
+void SerializationDynamic::serializeBinaryBulkWithMultipleStreams(
+    const DB::IColumn & column,
+    size_t offset,
+    size_t limit,
+    SerializeBinaryBulkSettings & settings,
+    SerializeBinaryBulkStatePtr & state) const
+{
+    const auto & column_dynamic = assert_cast<const ColumnDynamic &>(column);
+    auto * dynamic_state = checkAndGetState<SerializeBinaryBulkStateDynamic>(state);
+    const auto & variant_info = column_dynamic.getVariantInfo();
+    const auto * variant_column = &column_dynamic.getVariantColumn();
+
+    if (!variant_info.variant_type->equals(*dynamic_state->variant_type))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName());
+
+    settings.path.push_back(Substream::DynamicData);
+    dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state);
+    settings.path.pop_back();
+}
+
+void SerializationDynamic::deserializeBinaryBulkWithMultipleStreams(
+    DB::ColumnPtr & column,
+    size_t limit,
+    DeserializeBinaryBulkSettings & settings,
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsCache * cache) const
+{
+    if (!state)
+        return;
+
+    auto mutable_column = column->assumeMutable();
+    auto * dynamic_state = checkAndGetState<DeserializeBinaryBulkStateDynamic>(state);
+    auto * structure_state = checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(dynamic_state->structure_state);
+
+    if (mutable_column->empty())
+        mutable_column = ColumnDynamic::create(structure_state->variant_type->createColumn(), structure_state->variant_type, max_dynamic_types, structure_state->statistics);
+
+    auto & column_dynamic = assert_cast<ColumnDynamic &>(*mutable_column);
+    const auto & variant_info = column_dynamic.getVariantInfo();
+    if (!variant_info.variant_type->equals(*structure_state->variant_type))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", structure_state->variant_type->getName(), variant_info.variant_type->getName());
+
+    settings.path.push_back(Substream::DynamicData);
+    dynamic_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(column_dynamic.getVariantColumnPtr(), limit, settings, dynamic_state->variant_state, cache);
+    settings.path.pop_back();
+
+    column = std::move(mutable_column);
+}
+
+void SerializationDynamic::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    UInt8 null_bit = field.isNull();
+    writeBinary(null_bit, ostr);
+    if (null_bit)
+        return;
+
+    auto field_type = applyVisitor(FieldToDataType(), field);
+    auto field_type_name = field_type->getName();
+    writeVarUInt(field_type_name.size(), ostr);
+    writeString(field_type_name, ostr);
+    field_type->getDefaultSerialization()->serializeBinary(field, ostr, settings);
+}
+
+void SerializationDynamic::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    UInt8 null_bit;
+    readBinary(null_bit, istr);
+    if (null_bit)
+    {
+        field = Null();
+        return;
+    }
+
+    size_t field_type_name_size;
+    readVarUInt(field_type_name_size, istr);
+    String field_type_name(field_type_name_size, 0);
+    istr.readStrict(field_type_name.data(), field_type_name_size);
+    auto field_type = DataTypeFactory::instance().get(field_type_name);
+    field_type->getDefaultSerialization()->deserializeBinary(field, istr, settings);
+}
+
+void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    const auto & variant_info = dynamic_column.getVariantInfo();
+    const auto & variant_column = dynamic_column.getVariantColumn();
+    auto global_discr = variant_column.globalDiscriminatorAt(row_num);
+
+    UInt8 null_bit = global_discr == ColumnVariant::NULL_DISCRIMINATOR;
+    writeBinary(null_bit, ostr);
+    if (null_bit)
+        return;
+
+    const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
+    const auto & variant_type_name = variant_info.variant_names[global_discr];
+    writeVarUInt(variant_type_name.size(), ostr);
+    writeString(variant_type_name, ostr);
+    variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
+}
+
+template <typename DeserializeFunc>
+static void deserializeVariant(
+    ColumnVariant & variant_column,
+    const DataTypePtr & variant_type,
+    ColumnVariant::Discriminator global_discr,
+    ReadBuffer & istr,
+    DeserializeFunc deserialize)
+{
+    auto & variant = variant_column.getVariantByGlobalDiscriminator(global_discr);
+    deserialize(*variant_type->getDefaultSerialization(), variant, istr);
+    variant_column.getLocalDiscriminators().push_back(variant_column.localDiscriminatorByGlobal(global_discr));
+    variant_column.getOffsets().push_back(variant.size() - 1);
+}
+
+void SerializationDynamic::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto & dynamic_column = assert_cast<ColumnDynamic &>(column);
+    UInt8 null_bit;
+    readBinary(null_bit, istr);
+    if (null_bit)
+    {
+        dynamic_column.insertDefault();
+        return;
+    }
+
+    size_t variant_type_name_size;
+    readVarUInt(variant_type_name_size, istr);
+    String variant_type_name(variant_type_name_size, 0);
+    istr.readStrict(variant_type_name.data(), variant_type_name_size);
+
+    const auto & variant_info = dynamic_column.getVariantInfo();
+    auto it = variant_info.variant_name_to_discriminator.find(variant_type_name);
+    if (it != variant_info.variant_name_to_discriminator.end())
+    {
+        const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(it->second);
+        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, it->second, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
+        return;
+    }
+
+    /// We don't have this variant yet. Let's try to add it.
+    auto variant_type = DataTypeFactory::instance().get(variant_type_name);
+    if (dynamic_column.addNewVariant(variant_type))
+    {
+        auto discr = variant_info.variant_name_to_discriminator.at(variant_type_name);
+        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, istr, [&settings](const ISerialization & serialization, IColumn & variant, ReadBuffer & buf){ serialization.deserializeBinary(variant, buf, settings); });
+        return;
+    }
+
+    /// We reached maximum number of variants and couldn't add new variant.
+    /// This case should be really rare in real use cases.
+    /// We should always be able to add String variant and insert value as String.
+    dynamic_column.addStringVariant();
+    auto tmp_variant_column = variant_type->createColumn();
+    variant_type->getDefaultSerialization()->deserializeBinary(*tmp_variant_column, istr, settings);
+    auto string_column = castColumn(ColumnWithTypeAndName(tmp_variant_column->getPtr(), variant_type, ""), std::make_shared<DataTypeString>());
+    auto & variant_column = dynamic_column.getVariantColumn();
+    variant_column.insertIntoVariantFrom(variant_info.variant_name_to_discriminator.at("String"), *string_column, 0);
+}
+
+void SerializationDynamic::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextCSV(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+template <typename ReadFieldFunc, typename TryDeserializeVariantFunc, typename DeserializeVariant>
+static void deserializeTextImpl(
+    IColumn & column,
+    ReadBuffer & istr,
+    const FormatSettings & settings,
+    ReadFieldFunc read_field,
+    FormatSettings::EscapingRule escaping_rule,
+    TryDeserializeVariantFunc try_deserialize_variant,
+    DeserializeVariant deserialize_variant)
+{
+    auto & dynamic_column = assert_cast<ColumnDynamic &>(column);
+    auto & variant_column = dynamic_column.getVariantColumn();
+    const auto & variant_info = dynamic_column.getVariantInfo();
+    String field = read_field(istr);
+    auto field_buf = std::make_unique<ReadBufferFromString>(field);
+    JSONInferenceInfo json_info;
+    auto variant_type = tryInferDataTypeByEscapingRule(field, settings, escaping_rule, &json_info);
+    if (escaping_rule == FormatSettings::EscapingRule::JSON)
+        transformFinalInferredJSONTypeIfNeeded(variant_type, settings, &json_info);
+
+    if (checkIfTypeIsComplete(variant_type) && dynamic_column.addNewVariant(variant_type))
+    {
+        auto discr = variant_info.variant_name_to_discriminator.at(variant_type->getName());
+        deserializeVariant(dynamic_column.getVariantColumn(), variant_type, discr, *field_buf, deserialize_variant);
+        return;
+    }
+
+    /// We couldn't infer type or add new variant. Try to insert field into current variants.
+    field_buf = std::make_unique<ReadBufferFromString>(field);
+    if (try_deserialize_variant(*variant_info.variant_type->getDefaultSerialization(), variant_column, *field_buf))
+        return;
+
+    /// We couldn't insert field into any existing variant, add String variant and read value as String.
+    dynamic_column.addStringVariant();
+
+    if (escaping_rule == FormatSettings::EscapingRule::Quoted && (field.size() < 2 || field.front() != '\'' || field.back() != '\''))
+        field = "'" + field + "'";
+
+    field_buf = std::make_unique<ReadBufferFromString>(field);
+    auto string_discr = variant_info.variant_name_to_discriminator.at("String");
+    deserializeVariant(dynamic_column.getVariantColumn(), std::make_shared<DataTypeString>(), string_discr, *field_buf, deserialize_variant);
+}
+
+void SerializationDynamic::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [&settings](ReadBuffer & buf)
+    {
+        String field;
+        readCSVField(field, buf, settings.csv);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeTextCSV(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeTextCSV(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::CSV, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeTextCSV(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextEscaped(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+void SerializationDynamic::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [](ReadBuffer & buf)
+    {
+        String field;
+        readEscapedString(field, buf);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeTextEscaped(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeTextEscaped(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Escaped, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeTextEscaped(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeTextEscaped(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextQuoted(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+void SerializationDynamic::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [](ReadBuffer & buf)
+    {
+        String field;
+        readQuotedField(field, buf);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeTextQuoted(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeTextQuoted(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Quoted, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeTextQuoted(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeTextQuoted(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextJSON(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+void SerializationDynamic::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [&settings](ReadBuffer & buf)
+    {
+        String field;
+        readJSONField(field, buf, settings.json);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeTextJSON(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeTextJSON(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::JSON, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeTextJSON(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextRaw(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+void SerializationDynamic::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [](ReadBuffer & buf)
+    {
+        String field;
+        readString(field, buf);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeTextRaw(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeTextRaw(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeTextRaw(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeTextRaw(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeText(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+void SerializationDynamic::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
+{
+    auto read_field = [](ReadBuffer & buf)
+    {
+        String field;
+        readStringUntilEOF(field, buf);
+        return field;
+    };
+
+    auto try_deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        return serialization.tryDeserializeWholeText(col, buf, settings);
+    };
+
+    auto deserialize_variant = [&settings](const ISerialization & serialization, IColumn & col, ReadBuffer & buf)
+    {
+        serialization.deserializeWholeText(col, buf, settings);
+    };
+
+    deserializeTextImpl(column, istr, settings, read_field, FormatSettings::EscapingRule::Raw, try_deserialize_variant, deserialize_variant);
+}
+
+bool SerializationDynamic::tryDeserializeWholeText(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
+{
+    deserializeWholeText(column, istr, settings);
+    return true;
+}
+
+void SerializationDynamic::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
+{
+    const auto & dynamic_column = assert_cast<const ColumnDynamic &>(column);
+    dynamic_column.getVariantInfo().variant_type->getDefaultSerialization()->serializeTextXML(dynamic_column.getVariantColumn(), row_num, ostr, settings);
+}
+
+}
diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h
new file mode 100644
index 00000000000..4803bc25d18
--- /dev/null
+++ b/src/DataTypes/Serializations/SerializationDynamic.h
@@ -0,0 +1,116 @@
+#pragma once
+
+#include <DataTypes/Serializations/ISerialization.h>
+#include <Columns/ColumnDynamic.h>
+
+namespace DB
+{
+
+class SerializationDynamicElement;
+
+class SerializationDynamic : public ISerialization
+{
+public:
+    SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
+    {
+    }
+
+    struct DynamicStructureSerializationVersion
+    {
+        enum Value
+        {
+            VariantTypeName = 1,
+        };
+
+        Value value;
+
+        static void checkVersion(UInt64 version);
+
+        explicit DynamicStructureSerializationVersion(UInt64 version);
+    };
+
+    void enumerateStreams(
+        EnumerateStreamsSettings & settings,
+        const StreamCallback & callback,
+        const SubstreamData & data) const override;
+
+    void serializeBinaryBulkStatePrefix(
+        const IColumn & column,
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void serializeBinaryBulkStateSuffix(
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void deserializeBinaryBulkStatePrefix(
+        DeserializeBinaryBulkSettings & settings,
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
+
+    static DeserializeBinaryBulkStatePtr deserializeDynamicStructureStatePrefix(
+        DeserializeBinaryBulkSettings & settings,
+        SubstreamsDeserializeStatesCache * cache);
+
+    void serializeBinaryBulkWithMultipleStreams(
+        const IColumn & column,
+        size_t offset,
+        size_t limit,
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void deserializeBinaryBulkWithMultipleStreams(
+        ColumnPtr & column,
+        size_t limit,
+        DeserializeBinaryBulkSettings & settings,
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsCache * cache) const override;
+
+    void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextRaw(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+    void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+    bool tryDeserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
+
+    void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
+
+private:
+    friend SerializationDynamicElement;
+
+    struct DeserializeBinaryBulkStateDynamicStructure : public ISerialization::DeserializeBinaryBulkState
+    {
+        DynamicStructureSerializationVersion structure_version;
+        DataTypePtr variant_type;
+        ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ};
+
+        explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {}
+    };
+
+    size_t max_dynamic_types;
+};
+
+}
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
new file mode 100644
index 00000000000..386a6579519
--- /dev/null
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -0,0 +1,99 @@
+#include <DataTypes/Serializations/SerializationDynamicElement.h>
+#include <DataTypes/Serializations/SerializationVariantElement.h>
+#include <DataTypes/Serializations/SerializationDynamic.h>
+#include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <Columns/ColumnDynamic.h>
+#include <IO/ReadHelpers.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+void SerializationDynamicElement::enumerateStreams(
+    DB::ISerialization::EnumerateStreamsSettings & settings,
+    const DB::ISerialization::StreamCallback & callback,
+    const DB::ISerialization::SubstreamData &) const
+{
+    settings.path.push_back(Substream::DynamicStructure);
+    callback(settings.path);
+    settings.path.pop_back();
+
+    /// We don't know if we have actually have this variant in Dynamic column,
+    /// so we cannot enumerate variant streams.
+}
+
+void SerializationDynamicElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
+{
+    throw Exception(
+        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationDynamicElement");
+}
+
+void SerializationDynamicElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
+{
+    throw Exception(
+        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationDynamicElement");
+}
+
+struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState
+{
+    ISerialization::DeserializeBinaryBulkStatePtr structure_state;
+    SerializationPtr variant_serialization;
+    ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
+};
+
+void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
+    DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
+{
+    DeserializeBinaryBulkStatePtr structure_state = SerializationDynamic::deserializeDynamicStructureStatePrefix(settings, cache);
+    if (!structure_state)
+        return;
+
+    auto dynamic_element_state = std::make_shared<DeserializeBinaryBulkStateDynamicElement>();
+    dynamic_element_state->structure_state = std::move(structure_state);
+    const auto & variant_type = checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(structure_state)->variant_type;
+    /// Check if we actually have required element in the Variant.
+    if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
+    {
+        settings.path.push_back(Substream::DynamicData);
+        dynamic_element_state->variant_serialization = std::make_shared<SerializationVariantElement>(nested_serialization, dynamic_element_name, *global_discr);
+        dynamic_element_state->variant_serialization->deserializeBinaryBulkStatePrefix(settings, dynamic_element_state->variant_element_state, cache);
+        settings.path.pop_back();
+    }
+
+    state = std::move(dynamic_element_state);
+}
+
+void SerializationDynamicElement::serializeBinaryBulkWithMultipleStreams(const IColumn &, size_t, size_t, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkWithMultipleStreams is not implemented for SerializationDynamicElement");
+}
+
+void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
+    ColumnPtr & result_column,
+    size_t limit,
+    DeserializeBinaryBulkSettings & settings,
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsCache * cache) const
+{
+    auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
+
+    if (dynamic_element_state->variant_serialization)
+    {
+        settings.path.push_back(Substream::DynamicData);
+        dynamic_element_state->variant_serialization->deserializeBinaryBulkWithMultipleStreams(result_column, limit, settings, dynamic_element_state->variant_element_state, cache);
+        settings.path.pop_back();
+    }
+    else
+    {
+        auto mutable_column = result_column->assumeMutable();
+        mutable_column->insertManyDefaults(limit);
+        result_column = std::move(mutable_column);
+    }
+}
+
+}
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h
new file mode 100644
index 00000000000..9e4980e0a27
--- /dev/null
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <DataTypes/Serializations/SerializationWrapper.h>
+
+namespace DB
+{
+
+
+/// Serialization for Dynamic element when we read it as a subcolumn.
+class SerializationDynamicElement final : public SerializationWrapper
+{
+private:
+    /// To be able to deserialize Dyna,ic element as a subcolumn
+    /// we need its type name and global discriminator.
+    String dynamic_element_name;
+
+public:
+    SerializationDynamicElement(const SerializationPtr & nested_, const String & dynamic_element_name_)
+        : SerializationWrapper(nested_)
+        , dynamic_element_name(dynamic_element_name_)
+    {
+    }
+
+    void enumerateStreams(
+        EnumerateStreamsSettings & settings,
+        const StreamCallback & callback,
+        const SubstreamData & data) const override;
+
+    void serializeBinaryBulkStatePrefix(
+        const IColumn & column,
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void serializeBinaryBulkStateSuffix(
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void deserializeBinaryBulkStatePrefix(
+        DeserializeBinaryBulkSettings & settings,
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
+
+    void serializeBinaryBulkWithMultipleStreams(
+        const IColumn & column,
+        size_t offset,
+        size_t limit,
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state) const override;
+
+    void deserializeBinaryBulkWithMultipleStreams(
+        ColumnPtr & column,
+        size_t limit,
+        DeserializeBinaryBulkSettings & settings,
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsCache * cache) const override;
+};
+
+}
diff --git a/src/DataTypes/Serializations/SerializationInterval.cpp b/src/DataTypes/Serializations/SerializationInterval.cpp
index 59086d8aef3..2157566895d 100644
--- a/src/DataTypes/Serializations/SerializationInterval.cpp
+++ b/src/DataTypes/Serializations/SerializationInterval.cpp
@@ -68,9 +68,9 @@ void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer &
 }
 
 void SerializationInterval::deserializeBinaryBulkStatePrefix(
-    DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
 {
-    dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state);
+    dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state, cache);
 }
 
 
diff --git a/src/DataTypes/Serializations/SerializationInterval.h b/src/DataTypes/Serializations/SerializationInterval.h
index a4e6c204e4f..368aff4f0c3 100644
--- a/src/DataTypes/Serializations/SerializationInterval.h
+++ b/src/DataTypes/Serializations/SerializationInterval.h
@@ -34,7 +34,10 @@ public:
     void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
     void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
     void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
-    void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override;
+    void deserializeBinaryBulkStatePrefix(
+        DeserializeBinaryBulkSettings & settings,
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
     void deserializeBinaryBulkWithMultipleStreams(
         ColumnPtr & column,
         size_t limit,
diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
index 9efe05042ed..802da263d89 100644
--- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp
+++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
@@ -267,7 +267,8 @@ void SerializationLowCardinality::serializeBinaryBulkStateSuffix(
 
 void SerializationLowCardinality::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * /*cache*/) const
 {
     settings.path.push_back(Substream::DictionaryKeys);
     auto * stream = settings.getter(settings.path);
diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h
index d2c3a95c702..aa64e956a64 100644
--- a/src/DataTypes/Serializations/SerializationLowCardinality.h
+++ b/src/DataTypes/Serializations/SerializationLowCardinality.h
@@ -33,7 +33,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
             DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
             const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp
index 7b6f87baf2e..dac4fbe88e0 100644
--- a/src/DataTypes/Serializations/SerializationMap.cpp
+++ b/src/DataTypes/Serializations/SerializationMap.cpp
@@ -420,9 +420,10 @@ void SerializationMap::serializeBinaryBulkStateSuffix(
 
 void SerializationMap::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
-    nested->deserializeBinaryBulkStatePrefix(settings, state);
+    nested->deserializeBinaryBulkStatePrefix(settings, state, cache);
 }
 
 
diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h
index 3e27ef1b04a..cfcde445c1f 100644
--- a/src/DataTypes/Serializations/SerializationMap.h
+++ b/src/DataTypes/Serializations/SerializationMap.h
@@ -51,7 +51,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationNamed.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp
index 2792827e690..07f5f9ea7ed 100644
--- a/src/DataTypes/Serializations/SerializationNamed.cpp
+++ b/src/DataTypes/Serializations/SerializationNamed.cpp
@@ -54,10 +54,11 @@ void SerializationNamed::serializeBinaryBulkStateSuffix(
 
 void SerializationNamed::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     addToPath(settings.path);
-    nested_serialization->deserializeBinaryBulkStatePrefix(settings, state);
+    nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache);
     settings.path.pop_back();
 }
 
diff --git a/src/DataTypes/Serializations/SerializationNamed.h b/src/DataTypes/Serializations/SerializationNamed.h
index 0633ba2ea6f..bb2161e40e6 100644
--- a/src/DataTypes/Serializations/SerializationNamed.h
+++ b/src/DataTypes/Serializations/SerializationNamed.h
@@ -36,7 +36,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp
index 4d31451f92d..477349f955d 100644
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@@ -95,10 +95,11 @@ void SerializationNullable::serializeBinaryBulkStateSuffix(
 
 void SerializationNullable::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     settings.path.push_back(Substream::NullableElements);
-    nested->deserializeBinaryBulkStatePrefix(settings, state);
+    nested->deserializeBinaryBulkStatePrefix(settings, state, cache);
     settings.path.pop_back();
 }
 
diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h
index 37858ccdefd..f7d2d2eadf0 100644
--- a/src/DataTypes/Serializations/SerializationNullable.h
+++ b/src/DataTypes/Serializations/SerializationNullable.h
@@ -29,7 +29,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
             DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
             const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp
index 67bf7af7799..88244a89204 100644
--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@@ -210,7 +210,8 @@ void SerializationObject<Parser>::serializeBinaryBulkStateSuffix(
 template <typename Parser>
 void SerializationObject<Parser>::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     checkSerializationIsSupported(settings);
     if (state)
@@ -258,7 +259,7 @@ void SerializationObject<Parser>::deserializeBinaryBulkStatePrefix(
     }
 
     settings.path.push_back(Substream::ObjectData);
-    state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state);
+    state_object->nested_serialization->deserializeBinaryBulkStatePrefix(settings, state_object->nested_state, cache);
     settings.path.pop_back();
 
     state = std::move(state_object);
diff --git a/src/DataTypes/Serializations/SerializationObject.h b/src/DataTypes/Serializations/SerializationObject.h
index 39e1c514640..4cb7d0ab6a8 100644
--- a/src/DataTypes/Serializations/SerializationObject.h
+++ b/src/DataTypes/Serializations/SerializationObject.h
@@ -41,7 +41,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp
index 4d7514271ad..f9228069b90 100644
--- a/src/DataTypes/Serializations/SerializationSparse.cpp
+++ b/src/DataTypes/Serializations/SerializationSparse.cpp
@@ -152,7 +152,7 @@ void SerializationSparse::enumerateStreams(
     const StreamCallback & callback,
     const SubstreamData & data) const
 {
-    const auto * column_sparse = data.column ? &assert_cast<const ColumnSparse &>(*data.column) : nullptr;
+    const auto * column_sparse = data.column ? typeid_cast<const ColumnSparse *>(data.column.get()) : nullptr;
     size_t column_size = column_sparse ? column_sparse->size() : 0;
 
     settings.path.push_back(Substream::SparseOffsets);
@@ -242,12 +242,13 @@ void SerializationSparse::serializeBinaryBulkStateSuffix(
 
 void SerializationSparse::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     auto state_sparse = std::make_shared<DeserializeStateSparse>();
 
     settings.path.push_back(Substream::SparseElements);
-    nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested);
+    nested->deserializeBinaryBulkStatePrefix(settings, state_sparse->nested, cache);
     settings.path.pop_back();
 
     state = std::move(state_sparse);
diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h
index b1ed7b613f0..a55856bacf0 100644
--- a/src/DataTypes/Serializations/SerializationSparse.h
+++ b/src/DataTypes/Serializations/SerializationSparse.h
@@ -43,7 +43,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     /// Allows to write ColumnSparse and other columns in sparse serialization.
     void serializeBinaryBulkWithMultipleStreams(
diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index 632a019d2d9..bb7c19aa78d 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -606,13 +606,14 @@ void SerializationTuple::serializeBinaryBulkStateSuffix(
 
 void SerializationTuple::deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const
 {
     auto tuple_state = std::make_shared<DeserializeBinaryBulkStateTuple>();
     tuple_state->states.resize(elems.size());
 
     for (size_t i = 0; i < elems.size(); ++i)
-        elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
+        elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i], cache);
 
     state = std::move(tuple_state);
 }
diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h
index d9c63a05217..810673d8b21 100644
--- a/src/DataTypes/Serializations/SerializationTuple.h
+++ b/src/DataTypes/Serializations/SerializationTuple.h
@@ -53,7 +53,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
             DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
             const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp
index 8ca86c63bf6..3fe26b773e3 100644
--- a/src/DataTypes/Serializations/SerializationVariant.cpp
+++ b/src/DataTypes/Serializations/SerializationVariant.cpp
@@ -123,7 +123,8 @@ void SerializationVariant::serializeBinaryBulkStateSuffix(
 
 void SerializationVariant::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
     auto variant_state = std::make_shared<DeserializeBinaryBulkStateVariant>();
     variant_state->states.resize(variants.size());
@@ -132,7 +133,7 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
     for (size_t i = 0; i < variants.size(); ++i)
     {
         addVariantElementToPath(settings.path, i);
-        variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i]);
+        variants[i]->deserializeBinaryBulkStatePrefix(settings, variant_state->states[i], cache);
         settings.path.pop_back();
     }
 
diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h
index 3f53dcf1339..0de786f5561 100644
--- a/src/DataTypes/Serializations/SerializationVariant.h
+++ b/src/DataTypes/Serializations/SerializationVariant.h
@@ -59,7 +59,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp
index 7d4487fe6da..4f120ecac06 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp
@@ -2,6 +2,7 @@
 #include <DataTypes/Serializations/SerializationNumber.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Columns/ColumnNullable.h>
+#include <IO/ReadHelpers.h>
 
 namespace DB
 {
@@ -55,12 +56,13 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
     ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
 };
 
-void SerializationVariantElement::deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
+void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
+    DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
 {
     auto variant_element_state = std::make_shared<DeserializeBinaryBulkStateVariantElement>();
 
     addVariantToPath(settings.path);
-    nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state);
+    nested_serialization->deserializeBinaryBulkStatePrefix(settings, variant_element_state->variant_element_state, cache);
     removeVariantFromPath(settings.path);
 
     state = std::move(variant_element_state);
@@ -80,6 +82,7 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
 {
     auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
 
+    size_t variant_limit = 0;
     /// First, deserialize discriminators from Variant column.
     settings.path.push_back(Substream::VariantDiscriminators);
     if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
@@ -96,17 +99,30 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
         if (!variant_element_state->discriminators || result_column->empty())
             variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
 
+//        ColumnVariant::Discriminator discr;
+//        readBinaryLittleEndian(discr, *discriminators_stream);
+//        if (discr == ColumnVariant::NULL_DISCRIMINATOR)
+//        {
         SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
+//        }
+//        else
+//        {
+//            auto & discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators->assumeMutable()).getData();
+//            discriminators_data.resize_fill(discriminators_data.size() + limit, discr);
+//        }
+
         addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
     }
     settings.path.pop_back();
 
-    /// Iterate through new discriminators to calculate the limit for our variant.
     const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
     size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
-    size_t variant_limit = 0;
-    for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
-        variant_limit += (discriminators_data[i] == variant_discriminator);
+    /// Iterate through new discriminators to calculate the limit for our variant.
+    if (!variant_limit)
+    {
+        for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
+            variant_limit += (discriminators_data[i] == variant_discriminator);
+    }
 
     /// Now we know the limit for our variant and can deserialize it.
 
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.h b/src/DataTypes/Serializations/SerializationVariantElement.h
index aafecf43d39..0ce0a72e250 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.h
+++ b/src/DataTypes/Serializations/SerializationVariantElement.h
@@ -43,7 +43,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
@@ -59,12 +60,6 @@ public:
         DeserializeBinaryBulkStatePtr & state,
         SubstreamsCache * cache) const override;
 
-private:
-    friend SerializationVariant;
-
-    void addVariantToPath(SubstreamPath & path) const;
-    void removeVariantFromPath(SubstreamPath & path) const;
-
     struct VariantSubcolumnCreator : public ISubcolumnCreator
     {
         const ColumnPtr local_discriminators;
@@ -82,6 +77,11 @@ private:
         ColumnPtr create(const ColumnPtr & prev) const override;
         SerializationPtr create(const SerializationPtr & prev) const override;
     };
+private:
+    friend SerializationVariant;
+
+    void addVariantToPath(SubstreamPath & path) const;
+    void removeVariantFromPath(SubstreamPath & path) const;
 };
 
 }
diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp
index bde52bb8096..ecef533d7e0 100644
--- a/src/DataTypes/Serializations/SerializationWrapper.cpp
+++ b/src/DataTypes/Serializations/SerializationWrapper.cpp
@@ -29,9 +29,10 @@ void SerializationWrapper::serializeBinaryBulkStateSuffix(
 
 void SerializationWrapper::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsDeserializeStatesCache * cache) const
 {
-    nested_serialization->deserializeBinaryBulkStatePrefix(settings, state);
+    nested_serialization->deserializeBinaryBulkStatePrefix(settings, state, cache);
 }
 
 void SerializationWrapper::serializeBinaryBulkWithMultipleStreams(
diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h
index 6c5e2046062..882f17bba0a 100644
--- a/src/DataTypes/Serializations/SerializationWrapper.h
+++ b/src/DataTypes/Serializations/SerializationWrapper.h
@@ -36,7 +36,8 @@ public:
 
     void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const override;
+        DeserializeBinaryBulkStatePtr & state,
+        SubstreamsDeserializeStatesCache * cache) const override;
 
     void serializeBinaryBulkWithMultipleStreams(
         const IColumn & column,
diff --git a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp
index fc7432d5bf6..c6337a31fce 100644
--- a/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp
+++ b/src/DataTypes/Serializations/tests/gtest_object_serialization.cpp
@@ -49,7 +49,7 @@ TEST(SerializationObject, FromString)
         settings.position_independent_encoding = false;
         settings.getter = [&in](const auto &) { return &in; };
 
-        serialization->deserializeBinaryBulkStatePrefix(settings, state);
+        serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr);
         serialization->deserializeBinaryBulkWithMultipleStreams(result_column, column_string->size(), settings, state, nullptr);
     }
 
diff --git a/src/DataTypes/Utils.cpp b/src/DataTypes/Utils.cpp
index 2f29d57d454..e7e69e379af 100644
--- a/src/DataTypes/Utils.cpp
+++ b/src/DataTypes/Utils.cpp
@@ -224,6 +224,7 @@ bool canBeSafelyCasted(const DataTypePtr & from_type, const DataTypePtr & to_typ
         case TypeIndex::Nothing:
         case TypeIndex::JSONPaths:
         case TypeIndex::Variant:
+        case TypeIndex::Dynamic:
             return false;
     }
 
diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp
index 59b3e52e139..330bc28be61 100644
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@@ -929,6 +929,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
         query_context->setSetting("allow_experimental_hash_functions", 1);
         query_context->setSetting("allow_experimental_object_type", 1);
         query_context->setSetting("allow_experimental_variant_type", 1);
+        query_context->setSetting("allow_experimental_dynamic_type", 1);
         query_context->setSetting("allow_experimental_annoy_index", 1);
         query_context->setSetting("allow_experimental_usearch_index", 1);
         query_context->setSetting("allow_experimental_bigint_types", 1);
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 5b7995e0da2..deff44a0d9b 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -43,9 +43,9 @@ struct FormatSettings
     String column_names_for_schema_inference{};
     String schema_inference_hints{};
 
-    bool try_infer_integers = false;
-    bool try_infer_dates = false;
-    bool try_infer_datetimes = false;
+    bool try_infer_integers = true;
+    bool try_infer_dates = true;
+    bool try_infer_datetimes = true;
     bool try_infer_exponent_floats = false;
 
     enum class DateTimeInputFormat
diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp
index 8286b24d0a6..39915b0735e 100644
--- a/src/Formats/NativeReader.cpp
+++ b/src/Formats/NativeReader.cpp
@@ -93,7 +93,7 @@ void NativeReader::readData(const ISerialization & serialization, ColumnPtr & co
 
     ISerialization::DeserializeBinaryBulkStatePtr state;
 
-    serialization.deserializeBinaryBulkStatePrefix(settings, state);
+    serialization.deserializeBinaryBulkStatePrefix(settings, state, nullptr);
     serialization.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr);
 
     if (column->size() != rows)
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 7049ca44110..75f8979e727 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -26,6 +26,7 @@
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/DataTypeNested.h>
 #include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeDynamic.h>
 #include <DataTypes/Serializations/SerializationDecimal.h>
 #include <Formats/FormatSettings.h>
 #include <Columns/ColumnString.h>
@@ -39,6 +40,7 @@
 #include <Columns/ColumnObject.h>
 #include <Columns/ColumnsCommon.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnStringHelpers.h>
 #include <Common/assert_cast.h>
 #include <Common/Concepts.h>
@@ -62,6 +64,7 @@
 #include <Common/IPv6ToBinary.h>
 #include <Core/Types.h>
 
+#include <Common/logger_useful.h>
 
 namespace DB
 {
@@ -1815,6 +1818,7 @@ struct ConvertImpl
 
 
 /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization.
+template <bool throw_on_error>
 struct ConvertImplGenericFromString
 {
     static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count)
@@ -1854,29 +1858,34 @@ struct ConvertImplGenericFromString
             {
                 serialization_from.deserializeWholeText(column_to, read_buffer, format_settings);
             }
-            catch (const Exception & e)
+            catch (const Exception &)
             {
-                auto * nullable_column = typeid_cast<ColumnNullable *>(&column_to);
-                if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column)
-                {
-                    auto & col_nullmap = nullable_column->getNullMapData();
-                    if (col_nullmap.size() != nullable_column->size())
-                        col_nullmap.resize_fill(nullable_column->size());
-                    if (nullable_column->size() == (i + 1))
-                        nullable_column->popBack(1);
-                    nullable_column->insertDefault();
-                    continue;
-                }
-                throw;
+                if constexpr (throw_on_error)
+                    throw;
+                /// Check if exception happened after we inserted the value
+                /// (deserializeWholeText should not do it, but let's check anyway).
+                if (column_to.size() > i)
+                    column_to.popBack(column_to.size() - i);
+                column_to.insertDefault();
             }
 
+            /// Usually deserializeWholeText checks for eof after parsing, but let's check one more time just in case.
             if (!read_buffer.eof())
             {
-                if (result_type)
-                    throwExceptionForIncompletelyParsedValue(read_buffer, *result_type);
+                if constexpr (throw_on_error)
+                {
+                    if (result_type)
+                        throwExceptionForIncompletelyParsedValue(read_buffer, *result_type);
+                    else
+                        throw Exception(
+                            ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to column {}. Expected eof", column_to.getName());
+                }
                 else
-                    throw Exception(ErrorCodes::CANNOT_PARSE_TEXT,
-                        "Cannot parse string to column {}. Expected eof", column_to.getName());
+                {
+                    if (column_to.size() > i)
+                        column_to.popBack(column_to.size() - i);
+                    column_to.insertDefault();
+                }
             }
         }
     }
@@ -3279,7 +3288,9 @@ private:
     {
         if (checkAndGetDataType<DataTypeString>(from_type.get()))
         {
-            return &ConvertImplGenericFromString::execute;
+            if (cast_type == CastType::accurateOrNull)
+                return &ConvertImplGenericFromString<false>::execute;
+            return &ConvertImplGenericFromString<true>::execute;
         }
 
         return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
@@ -3442,7 +3453,7 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString::execute;
+            return &ConvertImplGenericFromString<true>::execute;
         }
         else if (const auto * agg_type = checkAndGetDataType<DataTypeAggregateFunction>(from_type_untyped.get()))
         {
@@ -3485,7 +3496,7 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString::execute;
+            return &ConvertImplGenericFromString<true>::execute;
         }
 
         DataTypePtr from_type_holder;
@@ -3576,7 +3587,7 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString::execute;
+            return &ConvertImplGenericFromString<true>::execute;
         }
 
         const auto * from_type = checkAndGetDataType<DataTypeTuple>(from_type_untyped.get());
@@ -3921,7 +3932,7 @@ private:
         {
             return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
             {
-                auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
+                auto res = ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
                 res->finalize();
                 return res;
             };
@@ -4089,7 +4100,7 @@ private:
             };
         }
 
-        auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type));
+        auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(removeNullableOrLowCardinalityNullable(from_type)->getName());
         if (!variant_discr_opt)
             throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName());
 
@@ -4197,6 +4208,293 @@ private:
         return createColumnToVariantWrapper(from_type, assert_cast<const DataTypeVariant &>(*to_type));
     }
 
+    WrapperType createDynamicToColumnWrapper(const DataTypePtr & to_type) const
+    {
+        return [this, to_type]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
+        {
+            const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
+            const auto & variant_info = column_dynamic.getVariantInfo();
+            auto variant_wrapper = createVariantToColumnWrapper(assert_cast<const DataTypeVariant &>(*variant_info.variant_type), to_type);
+            ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")};
+            return variant_wrapper(args, result_type, col_nullable, input_rows_count);
+        };
+    }
+
+    WrapperType createStringToDynamicThroughParsingWrapper() const
+    {
+        return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
+        {
+            auto column = arguments[0].column->convertToFullColumnIfLowCardinality();
+            auto args = arguments;
+            args[0].column = column;
+
+            const ColumnNullable * column_nullable = nullptr;
+            if (isColumnNullable(*args[0].column))
+            {
+                column_nullable = assert_cast<const ColumnNullable *>(args[0].column.get());
+                args[0].column = column_nullable->getNestedColumnPtr();
+            }
+
+            args[0].type = removeNullable(removeLowCardinality(args[0].type));
+
+            if (cast_type == CastType::accurateOrNull)
+                return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count);
+            return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count);
+        };
+    }
+
+    std::pair<ColumnPtr, DataTypePtr> getReducedVariant(
+        const ColumnVariant & variant_column,
+        const DataTypePtr & variant_type,
+        const std::unordered_map<String, ColumnVariant::Discriminator> & variant_name_to_discriminator,
+        size_t max_result_num_variants,
+        const ColumnDynamic::Statistics & statistics = {}) const
+    {
+        LOG_DEBUG(getLogger("FunctionsConversion"), "getReducedVariant for variant {} with size {}", variant_type->getName(), variant_column.size());
+
+        const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
+        /// First check if we don't exceed the limit in current Variant column.
+        if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String")))
+            return {variant_column.getPtr(), variant_type};
+
+        /// We want to keep the most frequent variants and convert to string the rarest.
+        std::vector<std::pair<size_t, ColumnVariant::Discriminator>> variant_sizes;
+        variant_sizes.reserve(variant_types.size());
+        std::optional<ColumnVariant::Discriminator> old_string_discriminator;
+        /// List of variants that should be converted to a single String variant.
+        std::vector<ColumnVariant::Discriminator> variants_to_convert_to_string;
+        for (size_t i = 0; i != variant_types.size(); ++i)
+        {
+            /// String variant won't be removed.
+            String variant_name = variant_types[i]->getName();
+            LOG_DEBUG(getLogger("FunctionsConversion"), "Variant {}/{} size: {}, statistics: {}", variant_name, i, variant_column.getVariantByGlobalDiscriminator(i).size(), statistics.data.contains(variant_name) ? toString(statistics.data.at(variant_name)) : "none");
+
+            if (variant_name == "String")
+            {
+                old_string_discriminator = i;
+                /// For simplicity, add this variant to the list that will be converted string,
+                /// so we will process it with other variants when constructing the new String variant.
+                variants_to_convert_to_string.push_back(i);
+            }
+            else
+            {
+                size_t size = 0;
+                if (statistics.data.empty())
+                    size = variant_column.getVariantByGlobalDiscriminator(i).size();
+                else
+                    size = statistics.data.at(variant_name);
+                variant_sizes.emplace_back(size, i);
+            }
+        }
+
+        /// Sort variants by sizes, so we will keep the most frequent.
+        std::sort(variant_sizes.begin(), variant_sizes.end(), std::greater());
+
+        DataTypes remaining_variants;
+        remaining_variants.reserve(max_result_num_variants);
+        /// Add String variant in advance.
+        remaining_variants.push_back(std::make_shared<DataTypeString>());
+        for (auto [_, discr] : variant_sizes)
+        {
+            if (remaining_variants.size() != max_result_num_variants)
+                remaining_variants.push_back(variant_types[discr]);
+            else
+                variants_to_convert_to_string.push_back(discr);
+        }
+
+        auto reduced_variant = std::make_shared<DataTypeVariant>(remaining_variants);
+        const auto & new_variants = reduced_variant->getVariants();
+        /// To construct reduced variant column we will need mapping from old to new discriminators.
+        std::vector<ColumnVariant::Discriminator> old_to_new_discriminators_mapping;
+        old_to_new_discriminators_mapping.resize(variant_types.size());
+        ColumnVariant::Discriminator string_variant_discriminator = 0;
+        for (size_t i = 0; i != new_variants.size(); ++i)
+        {
+            String variant_name = new_variants[i]->getName();
+            if (variant_name == "String")
+            {
+                string_variant_discriminator = i;
+                for (auto discr : variants_to_convert_to_string)
+                    old_to_new_discriminators_mapping[discr] = i;
+            }
+            else
+            {
+                auto old_discr = variant_name_to_discriminator.at(variant_name);
+                old_to_new_discriminators_mapping[old_discr] = i;
+            }
+        }
+
+        /// Convert all reduced variants to String.
+        std::unordered_map<ColumnVariant::Discriminator, ColumnPtr> variants_converted_to_string;
+        variants_converted_to_string.reserve(variants_to_convert_to_string.size());
+        size_t string_variant_size = 0;
+        for (auto discr : variants_to_convert_to_string)
+        {
+            auto string_type = std::make_shared<DataTypeString>();
+            auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type);
+            LOG_DEBUG(getLogger("FunctionsConversion"), "Convert variant {} with size {} to String", variant_types[discr]->getName(), variant_column.getVariantPtrByGlobalDiscriminator(discr)->size());
+            auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], "");
+            ColumnsWithTypeAndName args = {column_to_convert};
+            auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size());
+            LOG_DEBUG(getLogger("FunctionsConversion"), "Got String column with size {}", variant_string_column->size());
+            string_variant_size += variant_string_column->size();
+            variants_converted_to_string[discr] = variant_string_column;
+        }
+
+        /// Create new discriminators and offsets and fill new String variant according to old discriminators.
+        auto string_variant = ColumnString::create();
+        string_variant->reserve(string_variant_size);
+        auto new_discriminators_column = variant_column.getLocalDiscriminatorsPtr()->cloneEmpty();
+        auto & new_discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators &>(*new_discriminators_column).getData();
+        new_discriminators_data.reserve(variant_column.size());
+        auto new_offsets = variant_column.getOffsetsPtr()->cloneEmpty();
+        auto & new_offsets_data = assert_cast<ColumnVariant::ColumnOffsets &>(*new_offsets).getData();
+        new_offsets_data.reserve(variant_column.size());
+        const auto & old_local_discriminators = variant_column.getLocalDiscriminators();
+        const auto & old_offsets = variant_column.getOffsets();
+        LOG_DEBUG(getLogger("FunctionsConversion"), "Discriminators size: {}. Offsets size: {}", old_local_discriminators.size(), old_offsets.size());
+        for (size_t i = 0; i != old_local_discriminators.size(); ++i)
+        {
+            auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]);
+            LOG_DEBUG(getLogger("FunctionsConversion"), "Row {}, discriminator {}", i, UInt64(old_discr));
+
+            if (old_discr == ColumnVariant::NULL_DISCRIMINATOR)
+            {
+                new_discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR);
+                new_offsets_data.push_back(0);
+                continue;
+            }
+
+            auto new_discr = old_to_new_discriminators_mapping[old_discr];
+            new_discriminators_data.push_back(new_discr);
+            if (new_discr != string_variant_discriminator)
+            {
+                LOG_DEBUG(getLogger("FunctionsConversion"), "Keep variant {}", UInt64(old_discr));
+                new_offsets_data.push_back(old_offsets[i]);
+            }
+            else
+            {
+                LOG_DEBUG(getLogger("FunctionsConversion"), "Get string value of variant {} with String column with size {} at offset {}", UInt64(old_discr), variants_converted_to_string[old_discr]->size(), old_offsets[i]);
+                new_offsets_data.push_back(string_variant->size());
+                string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]);
+            }
+        }
+
+        /// Create new list of variant columns.
+        Columns new_variant_columns;
+        new_variant_columns.resize(new_variants.size());
+        for (size_t i = 0; i != variant_types.size(); ++i)
+        {
+            auto new_discr = old_to_new_discriminators_mapping[i];
+            if (new_discr != string_variant_discriminator)
+                new_variant_columns[new_discr] = variant_column.getVariantPtrByGlobalDiscriminator(i);
+        }
+        new_variant_columns[string_variant_discriminator] = std::move(string_variant);
+        return {ColumnVariant::create(std::move(new_discriminators_column), std::move(new_offsets), new_variant_columns), reduced_variant};
+    }
+
+    WrapperType createVariantToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
+    {
+        const auto & from_variant_type = assert_cast<const DataTypeVariant &>(*from_type);
+        size_t max_dynamic_types = dynamic_type.getMaxDynamicTypes();
+        const auto & variants = from_variant_type.getVariants();
+        std::unordered_map<String, ColumnVariant::Discriminator> variant_name_to_discriminator;
+        variant_name_to_discriminator.reserve(variants.size());
+        for (size_t i = 0; i != variants.size(); ++i)
+            variant_name_to_discriminator[variants[i]->getName()] = i;
+
+        return [from_type, max_dynamic_types, variant_name_to_discriminator, this]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
+        {
+            const auto & variant_column = assert_cast<const ColumnVariant &>(*arguments.front().column);
+            auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(variant_column, from_type, variant_name_to_discriminator, max_dynamic_types);
+            return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, max_dynamic_types);
+        };
+    }
+
+    WrapperType createColumnToDynamicWrapper(const DataTypePtr & from_type, const DataTypeDynamic & dynamic_type) const
+    {
+        if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(from_type.get()))
+            return createVariantToDynamicWrapper(from_type, dynamic_type);
+
+        if (dynamic_type.getMaxDynamicTypes() == 1)
+        {
+            DataTypePtr string_type = std::make_shared<DataTypeString>();
+            if (from_type->isNullable())
+                string_type = makeNullable(string_type);
+            auto string_wrapper = prepareUnpackDictionaries(from_type, string_type);
+            auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullable(string_type)});
+            auto variant_wrapper = createColumnToVariantWrapper(string_type, *variant_type);
+            return [string_wrapper, variant_wrapper, string_type, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
+                   (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
+            {
+                auto string_column = string_wrapper(arguments, string_type, col_nullable, input_rows_count);
+                auto column = ColumnWithTypeAndName(string_column, string_type, "");
+                ColumnsWithTypeAndName args = {column};
+                auto variant_column = variant_wrapper(args, variant_type, nullptr, string_column->size());
+                return ColumnDynamic::create(variant_column, variant_type, max_dynamic_types);
+            };
+        }
+
+        if (context && context->getSettingsRef().cast_string_to_dynamic_use_inference && isStringOrFixedString(removeNullable(removeLowCardinality(from_type))))
+            return createStringToDynamicThroughParsingWrapper();
+
+        auto variant_type = std::make_shared<DataTypeVariant>(DataTypes{removeNullableOrLowCardinalityNullable(from_type)});
+        auto variant_wrapper = createColumnToVariantWrapper(from_type, *variant_type);
+        return [variant_wrapper, variant_type, max_dynamic_types=dynamic_type.getMaxDynamicTypes()]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
+        {
+            auto variant_res = variant_wrapper(arguments, variant_type, col_nullable, input_rows_count);
+            return ColumnDynamic::create(variant_res, variant_type, max_dynamic_types);
+        };
+    }
+
+    WrapperType createDynamicToDynamicWrapper(const DataTypeDynamic & from_dynamic, const DataTypeDynamic & to_dynamic) const
+    {
+        size_t from_max_types = from_dynamic.getMaxDynamicTypes();
+        size_t to_max_types = to_dynamic.getMaxDynamicTypes();
+        if (from_max_types == to_max_types)
+            return createIdentityWrapper(from_dynamic.getPtr());
+
+        if (to_max_types > from_max_types)
+        {
+            return [to_max_types]
+                   (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
+            {
+                const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
+                return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), to_max_types);
+            };
+        }
+
+        return [to_max_types, this]
+               (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
+        {
+            const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
+            auto [reduced_variant_column, reduced_variant_type] = getReducedVariant(
+                column_dynamic.getVariantColumn(),
+                column_dynamic.getVariantInfo().variant_type,
+                column_dynamic.getVariantInfo().variant_name_to_discriminator,
+                to_max_types,
+                column_dynamic.getStatistics());
+            return ColumnDynamic::create(reduced_variant_column, reduced_variant_type, to_max_types);
+        };
+    }
+
+    /// Wrapper for conversion to/from Dynamic type
+    WrapperType createDynamicWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const
+    {
+        if (const auto * from_dynamic = checkAndGetDataType<DataTypeDynamic>(from_type.get()))
+        {
+            if (const auto * to_dynamic = checkAndGetDataType<DataTypeDynamic>(to_type.get()))
+                return createDynamicToDynamicWrapper(*from_dynamic, *to_dynamic);
+
+            return createDynamicToColumnWrapper(to_type);
+        }
+
+        return createColumnToDynamicWrapper(from_type, *checkAndGetDataType<DataTypeDynamic>(to_type.get()));
+    }
+
     template <typename FieldType>
     WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum<FieldType> * to_type) const
     {
@@ -4376,8 +4674,11 @@ private:
 
     WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const
     {
-        /// Conversion from/to Variant data type is processed in a special way.
+        /// Conversion from/to Variant/Dynamic data type is processed in a special way.
         /// We don't need to remove LowCardinality/Nullable.
+        if (isDynamic(to_type) || isDynamic(from_type))
+            return createDynamicWrapper(from_type, to_type);
+
         if (isVariant(to_type) || isVariant(from_type))
             return createVariantWrapper(from_type, to_type);
 
@@ -4691,7 +4992,7 @@ private:
 
                 if (to_type->getCustomSerialization() && to_type->getCustomName())
                 {
-                    ret = [requested_result_is_nullable](
+                    ret = [requested_result_is_nullable, this](
                               ColumnsWithTypeAndName & arguments,
                               const DataTypePtr & result_type,
                               const ColumnNullable * column_nullable,
@@ -4700,7 +5001,10 @@ private:
                         auto wrapped_result_type = result_type;
                         if (requested_result_is_nullable)
                             wrapped_result_type = makeNullable(result_type);
-                        return ConvertImplGenericFromString::execute(
+                        if (this->cast_type == CastType::accurateOrNull)
+                            return ConvertImplGenericFromString<false>::execute(
+                                arguments, wrapped_result_type, column_nullable, input_rows_count);
+                        return ConvertImplGenericFromString<true>::execute(
                             arguments, wrapped_result_type, column_nullable, input_rows_count);
                     };
                     return true;
diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp
new file mode 100644
index 00000000000..964c058776e
--- /dev/null
+++ b/src/Functions/dynamicElement.cpp
@@ -0,0 +1,172 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationVariantElement.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnDynamic.h>
+#include <Common/assert_cast.h>
+#include <memory>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+/** Extract element of Dynamic by type name.
+  * Also the function looks through Arrays: you can get Array of Dynamic elements from Array of Dynamic.
+  */
+class FunctionDynamicElement : public IFunction
+{
+public:
+    static constexpr auto name = "dynamicElement";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDynamicElement>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 2; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        const size_t number_of_arguments = arguments.size();
+
+        if (number_of_arguments != 2)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                            "Number of arguments for function {} doesn't match: passed {}, should be 2",
+                            getName(), number_of_arguments);
+
+        size_t count_arrays = 0;
+        const IDataType * input_type = arguments[0].type.get();
+        while (const DataTypeArray * array = checkAndGetDataType<DataTypeArray>(input_type))
+        {
+            input_type = array->getNestedType().get();
+            ++count_arrays;
+        }
+
+        if (!isDynamic(*input_type))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "First argument for function {} must be Variant or Array of Variant. Actual {}",
+                            getName(),
+                            arguments[0].type->getName());
+
+        auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedElementType(arguments[1].column));
+
+        for (; count_arrays; --count_arrays)
+            return_type = std::make_shared<DataTypeArray>(return_type);
+
+        return return_type;
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto & input_arg = arguments[0];
+        const IDataType * input_type = input_arg.type.get();
+        const IColumn * input_col = input_arg.column.get();
+
+        bool input_arg_is_const = false;
+        if (typeid_cast<const ColumnConst *>(input_col))
+        {
+            input_col = assert_cast<const ColumnConst *>(input_col)->getDataColumnPtr().get();
+            input_arg_is_const = true;
+        }
+
+        Columns array_offsets;
+        while (const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(input_type))
+        {
+            const ColumnArray * array_col = assert_cast<const ColumnArray *>(input_col);
+
+            input_type = array_type->getNestedType().get();
+            input_col = &array_col->getData();
+            array_offsets.push_back(array_col->getOffsetsPtr());
+        }
+
+        const ColumnDynamic * input_col_as_dynamic = checkAndGetColumn<ColumnDynamic>(input_col);
+        if (!input_col_as_dynamic)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "First argument for function {} must be Dynamic or array of Dynamics. Actual {}", getName(), input_arg.type->getName());
+
+        auto element_type = getRequestedElementType(arguments[1].column);
+        const auto & variant_info = input_col_as_dynamic->getVariantInfo();
+        auto it = variant_info.variant_name_to_discriminator.find(element_type->getName());
+        if (it == variant_info.variant_name_to_discriminator.end())
+        {
+            auto result_type = makeNullableOrLowCardinalityNullableSafe(element_type);
+            auto result_column = result_type->createColumn();
+            result_column->insertManyDefaults(input_rows_count);
+            return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count);
+        }
+
+        const auto & variant_column = input_col_as_dynamic->getVariantColumn();
+        auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), element_type->getName(), it->second, variant_column.localDiscriminatorByGlobal(it->second));
+        auto result_column = subcolumn_creator.create(variant_column.getVariantPtrByGlobalDiscriminator(it->second));
+        return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count);
+    }
+
+private:
+    DataTypePtr getRequestedElementType(const ColumnPtr & type_name_column) const
+    {
+        const auto * name_col = checkAndGetColumnConst<ColumnString>(type_name_column.get());
+        if (!name_col)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of {} must be a constant String", getName());
+
+        String element_type_name = name_col->getValue<String>();
+        auto element_type = DataTypeFactory::instance().tryGet(element_type_name);
+        if (!element_type)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument of {} must be a valid type name. Got: {}", getName(), element_type_name);
+
+        return element_type;
+    }
+
+    ColumnPtr wrapInArraysAndConstIfNeeded(ColumnPtr res, const Columns & array_offsets, bool input_arg_is_const, size_t input_rows_count) const
+    {
+        for (auto it = array_offsets.rbegin(); it != array_offsets.rend(); ++it)
+            res = ColumnArray::create(res, *it);
+
+        if (input_arg_is_const)
+            res = ColumnConst::create(res, input_rows_count);
+
+        return res;
+    }
+};
+
+}
+
+REGISTER_FUNCTION(DynamicElement)
+{
+//    factory.registerFunction<FunctionDynamicElement>(FunctionDocumentation{
+//        .description = R"(
+//Extracts a column with specified type from a `Dynamic` column.
+//)",
+//        .syntax{"dynamicElement(dynamic, type_name)"},
+//        .arguments{{
+//            {"dynamic", "Dynamic column"},
+//            {"type_name", "The name of the variant type to extract"}}},
+//        .examples{{{
+//            "Example",
+//            R"(
+//)",
+//            R"(
+//)"}}},
+//        .categories{"Dynamic"},
+//    });
+
+    factory.registerFunction<FunctionDynamicElement>();
+}
+
+}
diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp
new file mode 100644
index 00000000000..8fb2974ceff
--- /dev/null
+++ b/src/Functions/dynamicType.cpp
@@ -0,0 +1,104 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+namespace
+{
+
+/// Return enum with type name for each row in Dynamic column.
+class FunctionDynamicType : public IFunction
+{
+public:
+    static constexpr auto name = "dynamicType";
+    static constexpr auto name_for_null = "None";
+
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionDynamicType>(); }
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.empty() || arguments.size() > 1)
+            throw Exception(
+                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 1",
+                getName(), arguments.empty());
+
+        if (!isDynamic(arguments[0].type.get()))
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "First argument for function {} must be Dynamic, got {} instead",
+                getName(), arguments[0].type->getName());
+
+        return std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        const ColumnDynamic * dynamic_column = checkAndGetColumn<ColumnDynamic>(arguments[0].column.get());
+        if (!dynamic_column)
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "First argument for function {} must be Dynamic, got {} instead",
+                            getName(), arguments[0].type->getName());
+
+        const auto & variant_info = dynamic_column->getVariantInfo();
+        const auto & variant_column = dynamic_column->getVariantColumn();
+        auto res = result_type->createColumn();
+        String element_type;
+        for (size_t i = 0; i != input_rows_count; ++i)
+        {
+            auto global_discr = variant_column.globalDiscriminatorAt(i);
+            if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
+                element_type = name_for_null;
+            else
+                element_type = variant_info.variant_names[global_discr];
+
+            res->insertData(element_type.data(), element_type.size());
+        }
+
+        return res;
+    }
+};
+
+}
+
+REGISTER_FUNCTION(DynamicType)
+{
+    factory.registerFunction<FunctionDynamicType>(FunctionDocumentation{
+        .description = R"(
+Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it.
+)",
+        .syntax = {"dynamicType(variant)"},
+        .arguments = {{"variant", "Variant column"}},
+        .examples = {{{
+            "Example",
+            R"(
+)",
+            R"(
+
+)"}}},
+        .categories{"Variant"},
+    });
+}
+
+}
diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp
index 4f75042ad8d..d501fa28d4b 100644
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnTuple.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/MaskOperations.h>
 #include <DataTypes/DataTypeArray.h>
@@ -1157,6 +1158,11 @@ private:
                     variant_column->applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
                     return result_column;
                 }
+                else if (auto * dynamic_column = typeid_cast<ColumnDynamic *>(result_column.get()))
+                {
+                    dynamic_column->applyNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
+                    return result_column;
+                }
                 else
                     return ColumnNullable::create(materializeColumnIfConst(result_column), arg_cond.column);
             }
@@ -1200,6 +1206,11 @@ private:
                     variant_column->applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
                     return result_column;
                 }
+                else if (auto * dynamic_column = typeid_cast<ColumnDynamic *>(result_column.get()))
+                {
+                    dynamic_column->applyNegatedNullMap(assert_cast<const ColumnUInt8 &>(*arg_cond.column).getData());
+                    return result_column;
+                }
                 else
                 {
                     size_t size = input_rows_count;
diff --git a/src/Functions/isNotNull.cpp b/src/Functions/isNotNull.cpp
index dd5182aeade..f0afc0d5ba3 100644
--- a/src/Functions/isNotNull.cpp
+++ b/src/Functions/isNotNull.cpp
@@ -1,6 +1,7 @@
 #include <Columns/ColumnLowCardinality.h>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 #include <Core/ColumnNumbers.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionFactory.h>
@@ -44,9 +45,10 @@ public:
     {
         const ColumnWithTypeAndName & elem = arguments[0];
 
-        if (isVariant(elem.type))
+        if (isVariant(elem.type) || isDynamic(elem.type))
         {
-            const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
+            const auto & column_variant = isVariant(elem.type) ? assert_cast<const ColumnVariant &>(*elem.column) : assert_cast<const ColumnDynamic &>(*elem.column).getVariantColumn();
+            const auto & discriminators = column_variant.getLocalDiscriminators();
             auto res = DataTypeUInt8().createColumn();
             auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
             data.resize(discriminators.size());
diff --git a/src/Functions/isNull.cpp b/src/Functions/isNull.cpp
index 4bf4e44f866..7ed4fa7a813 100644
--- a/src/Functions/isNull.cpp
+++ b/src/Functions/isNull.cpp
@@ -6,6 +6,7 @@
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnLowCardinality.h>
 #include <Columns/ColumnVariant.h>
+#include <Columns/ColumnDynamic.h>
 
 
 namespace DB
@@ -46,9 +47,10 @@ public:
     {
         const ColumnWithTypeAndName & elem = arguments[0];
 
-        if (isVariant(elem.type))
+        if (isVariant(elem.type) || isDynamic(elem.type))
         {
-            const auto & discriminators = checkAndGetColumn<ColumnVariant>(*elem.column)->getLocalDiscriminators();
+            const auto & column_variant = isVariant(elem.type) ? assert_cast<const ColumnVariant &>(*elem.column) : assert_cast<const ColumnDynamic &>(*elem.column).getVariantColumn();
+            const auto & discriminators = column_variant.getLocalDiscriminators();
             auto res = DataTypeUInt8().createColumn();
             auto & data = typeid_cast<ColumnUInt8 &>(*res).getData();
             data.reserve(discriminators.size());
diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp
index 2744a0dabb8..b57ccb6fee1 100644
--- a/src/Functions/variantElement.cpp
+++ b/src/Functions/variantElement.cpp
@@ -5,6 +5,7 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeVariant.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/Serializations/SerializationVariantElement.h>
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVariant.h>
@@ -116,55 +117,12 @@ public:
         if (!variant_global_discr.has_value())
             return arguments[2].column;
 
+        auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr);
         const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr);
         const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr);
-
-        /// If Variant has only NULLs or our variant doesn't have any real values,
-        /// just create column with default values and create null mask with 1.
-        if (input_col_as_variant->hasOnlyNulls() || variant_column->empty())
-        {
-            auto res = variant_type->createColumn();
-
-            if (variant_type->lowCardinality())
-                assert_cast<ColumnLowCardinality &>(*res).nestedToNullable();
-
-            res->insertManyDefaults(input_col_as_variant->size());
-            if (!variant_type->canBeInsideNullable())
-                return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count);
-
-            auto null_map = ColumnUInt8::create();
-            auto & null_map_data = null_map->getData();
-            null_map_data.resize_fill(input_col_as_variant->size(), 1);
-            return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(res), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
-        }
-
-        /// If we extract single non-empty column and have no NULLs, then just return this variant.
-        if (auto non_empty_local_discr = input_col_as_variant->getLocalDiscriminatorOfOneNoneEmptyVariantNoNulls())
-        {
-            /// If we were trying to extract some other variant,
-            /// it would be empty and we would already processed this case above.
-            chassert(input_col_as_variant->globalDiscriminatorByLocal(*non_empty_local_discr) == variant_global_discr);
-            return wrapInArraysAndConstIfNeeded(makeNullableOrLowCardinalityNullableSafe(variant_column), array_offsets, input_arg_is_const, input_rows_count);
-        }
-
-        /// In general case we should calculate null-mask for variant
-        /// according to the discriminators column and expand
-        /// variant column by this mask to get a full column (with default values on NULLs)
-        const auto & local_discriminators = input_col_as_variant->getLocalDiscriminators();
-        auto null_map = ColumnUInt8::create();
-        auto & null_map_data = null_map->getData();
-        null_map_data.reserve(local_discriminators.size());
-        auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr);
-        for (auto local_discr : local_discriminators)
-            null_map_data.push_back(local_discr != variant_local_discr);
-
-        auto expanded_variant_column = IColumn::mutate(variant_column);
-        if (variant_type->lowCardinality())
-            expanded_variant_column = assert_cast<ColumnLowCardinality &>(*expanded_variant_column).cloneNullable();
-        expanded_variant_column->expand(null_map_data, /*inverted = */ true);
-        if (variant_type->canBeInsideNullable())
-            return wrapInArraysAndConstIfNeeded(ColumnNullable::create(std::move(expanded_variant_column), std::move(null_map)), array_offsets, input_arg_is_const, input_rows_count);
-        return wrapInArraysAndConstIfNeeded(std::move(expanded_variant_column), array_offsets, input_arg_is_const, input_rows_count);
+        auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(input_col_as_variant->getLocalDiscriminatorsPtr(), variant_type->getName(), *variant_global_discr, variant_local_discr);
+        auto res = subcolumn_creator.create(variant_column);
+        return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count);
     }
 private:
     std::optional<size_t> getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 7c3bed7388c..739d0f17078 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1496,7 +1496,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
 
     validateVirtualColumns(*res);
 
-    if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
+    if (!res->supportsDynamicSubcolumnsDeprecated() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
     {
         throw Exception(ErrorCodes::ILLEGAL_COLUMN,
             "Cannot create table with column of type Object, "
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index fc58f7b5098..a1cede5ae95 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -554,7 +554,7 @@ BlockIO InterpreterInsertQuery::execute()
                     {
                         /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
                         /// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
-                        if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name))
+                        if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && !isDynamic(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name))
                             query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
                     }
                 }
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 5588fc55a64..351189f70ae 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -2,7 +2,7 @@
 #include <memory>
 #include <set>
 
-#include <Core/Settings.h>
+//#include <Core/Settings.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/SettingsEnums.h>
 
@@ -1188,6 +1188,38 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
         }
     }
 
+    if (!unknown_required_source_columns.empty())
+    {
+
+        for (const NameAndTypePair & pair : source_columns_ordinary)
+        {
+//            std::cerr << "Check ordinary column " << pair.name << "\n";
+            if (!pair.type->hasDynamicSubcolumns())
+                continue;
+
+//            std::cerr << "Check dyamic subcolumns\n";
+
+            for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
+            {
+                auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it);
+//                std::cerr << "Check dyamic subcolumn " << dynamic_subcolumn_name << "\n";
+
+                if (column_name == pair.name)
+                {
+                    if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name))
+                    {
+//                        std::cerr << "Found\n";
+                        source_columns.emplace_back(*it, dynamic_subcolumn_type);
+                        it = unknown_required_source_columns.erase(it);
+                        continue;
+                    }
+                }
+
+                ++it;
+            }
+        }
+    }
+
     if (!unknown_required_source_columns.empty())
     {
         constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}";
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 25085ff4823..30b7de409f1 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -17,6 +17,7 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 #include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeDynamic.h>
 
 #include <Core/AccurateComparison.h>
 
@@ -26,6 +27,7 @@
 #include <Common/FieldVisitorToString.h>
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <Common/DateLUT.h>
+#include <Common/checkStackSize.h>
 
 
 namespace DB
@@ -165,6 +167,8 @@ Field convertDecimalType(const Field & from, const To & type)
 
 Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint)
 {
+    checkStackSize();
+
     if (from_type_hint && from_type_hint->equals(type))
     {
         return src;
@@ -504,7 +508,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
     else if (const DataTypeVariant * type_variant = typeid_cast<const DataTypeVariant *>(&type))
     {
         /// If we have type hint and Variant contains such type, no need to convert field.
-        if (from_type_hint && type_variant->tryGetVariantDiscriminator(*from_type_hint))
+        if (from_type_hint && type_variant->tryGetVariantDiscriminator(from_type_hint->getName()))
             return src;
 
         /// Create temporary column and check if we can insert this field to the variant.
@@ -513,6 +517,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
         if (col->tryInsert(src))
             return src;
     }
+    else if (isDynamic(type))
+    {
+        /// We can insert any field to Dynamic column.
+        return src;
+    }
 
     /// Conversion from string by parsing.
     if (src.getType() == Field::Types::String)
diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp
index 27c364073ae..3529863a623 100644
--- a/src/Interpreters/parseColumnsListForTableFunction.cpp
+++ b/src/Interpreters/parseColumnsListForTableFunction.cpp
@@ -40,7 +40,7 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio
 
         if (!settings.allow_experimental_object_type)
         {
-            if (data_type.hasDynamicSubcolumns())
+            if (data_type.hasDynamicSubcolumnsDeprecated())
             {
                 throw Exception(
                     ErrorCodes::ILLEGAL_COLUMN,
@@ -107,6 +107,18 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio
                 }
             }
         }
+
+        if (!settings.allow_experimental_dynamic_type)
+        {
+            if (data_type.hasDynamicSubcolumns())
+            {
+                throw Exception(
+                    ErrorCodes::ILLEGAL_COLUMN,
+                    "Cannot create column with type '{}' because experimental Dynamic type is not allowed. "
+                    "Set setting allow_experimental_dynamic_type = 1 in order to allow it",
+                    data_type.getName());
+            }
+        }
     };
 
     validate_callback(*type_to_check);
diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h
index ffb59bfa457..e2d2bc97ff7 100644
--- a/src/Interpreters/parseColumnsListForTableFunction.h
+++ b/src/Interpreters/parseColumnsListForTableFunction.h
@@ -21,6 +21,7 @@ struct DataTypeValidationSettings
         , allow_experimental_variant_type(settings.allow_experimental_variant_type)
         , allow_suspicious_variant_types(settings.allow_suspicious_variant_types)
         , validate_nested_types(settings.validate_experimental_and_suspicious_types_inside_nested_types)
+        , allow_experimental_dynamic_type(settings.allow_experimental_dynamic_type)
     {
     }
 
@@ -30,6 +31,7 @@ struct DataTypeValidationSettings
     bool allow_experimental_variant_type = true;
     bool allow_suspicious_variant_types = true;
     bool validate_nested_types = true;
+    bool allow_experimental_dynamic_type = true;
 };
 
 void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings);
diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index fcf189e51f4..747a9a6f7ba 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -1,18 +1,47 @@
 #include <Parsers/ParserDataType.h>
 
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTIdentifier_fwd.h>
 #include <Parsers/CommonParsers.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/ParserCreateQuery.h>
 
-
 namespace DB
 {
 
 namespace
 {
 
+class DynamicArgumentsParser : public IParserBase
+{
+private:
+    const char * getName() const override { return "Dynamic data type optional argument"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
+    {
+        ASTPtr identifier;
+        ParserIdentifier identifier_parser;
+        if (!identifier_parser.parse(pos, identifier, expected))
+            return false;
+
+        if (pos->type != TokenType::Equals)
+        {
+            expected.add(pos, "equals operator");
+            return false;
+        }
+
+        ++pos;
+
+        ASTPtr number;
+        ParserNumber number_parser;
+        if (!number_parser.parse(pos, number, expected))
+            return false;
+
+        node = makeASTFunction("equals", identifier, number);
+        return true;
+    }
+};
+
 /// Wrapper to allow mixed lists of nested and normal types.
 /// Parameters are either:
 /// - Nested table elements;
@@ -21,10 +50,21 @@ namespace
 /// - another data type (or identifier)
 class ParserDataTypeArgument : public IParserBase
 {
+public:
+    ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_)
+    {
+    }
+
 private:
     const char * getName() const override { return "data type argument"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
     {
+        if (type_name == "Dynamic")
+        {
+            DynamicArgumentsParser parser;
+            return parser.parse(pos, node, expected);
+        }
+
         ParserNestedTable nested_parser;
         ParserDataType data_type_parser;
         ParserAllCollectionsOfLiterals literal_parser(false);
@@ -39,6 +79,8 @@ private:
             || literal_parser.parse(pos, node, expected)
             || data_type_parser.parse(pos, node, expected);
     }
+
+    std::string_view type_name;
 };
 
 }
@@ -140,7 +182,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ++pos;
 
     /// Parse optional parameters
-    ParserList args_parser(std::make_unique<ParserDataTypeArgument>(), std::make_unique<ParserToken>(TokenType::Comma));
+    ParserList args_parser(std::make_unique<ParserDataTypeArgument>(type_name), std::make_unique<ParserToken>(TokenType::Comma));
     ASTPtr expr_list_args;
 
     if (!args_parser.parse(pos, expr_list_args, expected))
diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h
index cae2ab7691e..9996bedb20e 100644
--- a/src/Processors/Formats/IOutputFormat.h
+++ b/src/Processors/Formats/IOutputFormat.h
@@ -105,6 +105,8 @@ public:
         }
     }
 
+    virtual void finalizeBuffers() {}
+
 protected:
     friend class ParallelFormattingOutputFormat;
 
@@ -122,7 +124,6 @@ protected:
     virtual void consumeTotals(Chunk) {}
     virtual void consumeExtremes(Chunk) {}
     virtual void finalizeImpl() {}
-    virtual void finalizeBuffers() {}
     virtual void writePrefix() {}
     virtual void writeSuffix() {}
     virtual void resetFormatterImpl() {}
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
index 3bd0b532d90..857f5040b79 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
@@ -70,25 +70,6 @@ static AggregatingSortedAlgorithm::ColumnsDefinition defineColumns(
     return def;
 }
 
-static MutableColumns getMergedColumns(const Block & header, const AggregatingSortedAlgorithm::ColumnsDefinition & def)
-{
-    MutableColumns columns;
-    columns.resize(header.columns());
-
-    for (const auto & desc : def.columns_to_simple_aggregate)
-    {
-        const auto & type = desc.nested_type ? desc.nested_type
-                                       : desc.real_type;
-        columns[desc.column_number] = type->createColumn();
-    }
-
-    for (size_t i = 0; i < columns.size(); ++i)
-        if (!columns[i])
-            columns[i] = header.getByPosition(i).type->createColumn();
-
-    return columns;
-}
-
 /// Remove constants and LowCardinality for SimpleAggregateFunction
 static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::ColumnsDefinition & def)
 {
@@ -159,12 +140,24 @@ AggregatingSortedAlgorithm::SimpleAggregateDescription::~SimpleAggregateDescript
 
 
 AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData(
-    MutableColumns columns_,
     UInt64 max_block_size_rows_,
     UInt64 max_block_size_bytes_,
     ColumnsDefinition & def_)
-    : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_), def(def_)
+    : MergedData(false, max_block_size_rows_, max_block_size_bytes_), def(def_)
 {
+}
+
+void AggregatingSortedAlgorithm::AggregatingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs)
+{
+    MergedData::initialize(header, inputs);
+
+    for (const auto & desc : def.columns_to_simple_aggregate)
+    {
+        const auto & type = desc.nested_type ? desc.nested_type
+                                             : desc.real_type;
+        columns[desc.column_number] = type->createColumn();
+    }
+
     initAggregateDescription();
 
     /// Just to make startGroup() simpler.
@@ -267,12 +260,14 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm(
     size_t max_block_size_bytes_)
     : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, description_)
     , columns_definition(defineColumns(header_, description_))
-    , merged_data(getMergedColumns(header_, columns_definition), max_block_size_rows_, max_block_size_bytes_, columns_definition)
+    , merged_data(max_block_size_rows_, max_block_size_bytes_, columns_definition)
 {
 }
 
 void AggregatingSortedAlgorithm::initialize(Inputs inputs)
 {
+    merged_data.initialize(header, inputs);
+
     for (auto & input : inputs)
         if (input.chunk)
             preprocessChunk(input.chunk, columns_definition);
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
index aa221573151..9ab800058b1 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h
@@ -101,11 +101,12 @@ private:
 
     public:
         AggregatingMergedData(
-            MutableColumns columns_,
             UInt64 max_block_size_rows_,
             UInt64 max_block_size_bytes_,
             ColumnsDefinition & def_);
 
+        void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override;
+
         /// Group is a group of rows with the same sorting key. It represents single row in result.
         /// Algorithm is: start group, add several rows, finish group.
         /// Then pull chunk when enough groups were added.
diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
index 8948cee217c..f5e4c88fcd0 100644
--- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
@@ -31,8 +31,7 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm(
     LoggerPtr log_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
-    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)
+    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique<MergedData>(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_))
     , sign_column_number(header_.getPositionByName(sign_column))
     , only_positive_sign(only_positive_sign_)
     , log(log_)
@@ -65,7 +64,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData()
 
 void CollapsingSortedAlgorithm::insertRow(RowRef & row)
 {
-    merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows());
+    merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows());
 }
 
 std::optional<Chunk> CollapsingSortedAlgorithm::insertRows()
@@ -90,8 +89,8 @@ std::optional<Chunk> CollapsingSortedAlgorithm::insertRows()
 
         if (count_positive >= count_negative)
         {
-            if (merged_data.hasEnoughRows())
-                res = merged_data.pull();
+            if (merged_data->hasEnoughRows())
+                res = merged_data->pull();
 
             insertRow(last_positive_row);
 
@@ -121,8 +120,8 @@ std::optional<Chunk> CollapsingSortedAlgorithm::insertRows()
 IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
 {
     /// Rare case, which may happen when index_granularity is 1, but we needed to insert 2 rows inside insertRows().
-    if (merged_data.hasEnoughRows())
-        return Status(merged_data.pull());
+    if (merged_data->hasEnoughRows())
+        return Status(merged_data->pull());
 
     /// Take rows in required order and put them into `merged_data`, while the rows are no more than `max_block_size`
     while (queue.isValid())
@@ -148,8 +147,8 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
         if (key_differs)
         {
             /// if there are enough rows and the last one is calculated completely
-            if (merged_data.hasEnoughRows())
-                return Status(merged_data.pull());
+            if (merged_data->hasEnoughRows())
+                return Status(merged_data->pull());
 
             /// We write data for the previous primary key.
             auto res = insertRows();
@@ -220,7 +219,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge()
         return Status(std::move(*res));
     }
 
-    return Status(merged_data.pull(), true);
+    return Status(merged_data->pull(), true);
 }
 
 }
diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
index be1a3a3bf33..99fd95d82d9 100644
--- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.h
@@ -42,8 +42,6 @@ public:
     Status merge() override;
 
 private:
-    MergedData merged_data;
-
     const size_t sign_column_number;
     const bool only_positive_sign;
 
diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
index 814625d7aee..2b891592b20 100644
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp
@@ -46,8 +46,8 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm(
     size_t max_block_size_bytes_,
     Graphite::Params params_,
     time_t time_of_merge_)
-    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_)
+    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs, std::make_unique<GraphiteRollupMergedData>(false, max_block_size_rows_, max_block_size_bytes_))
+    , graphite_rollup_merged_data(assert_cast<GraphiteRollupMergedData &>(*merged_data))
     , params(std::move(params_))
     , time_of_merge(time_of_merge_)
 {
@@ -63,7 +63,7 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm(
         }
     }
 
-    merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state);
+    graphite_rollup_merged_data.allocMemForAggregates(max_size_of_aggregate_state, max_alignment_of_aggregate_state);
     columns_definition = defineColumns(header_, params);
 }
 
@@ -113,7 +113,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge()
 
     const DateLUTImpl & date_lut = timezone ? timezone->getTimeZone() : DateLUT::instance();
 
-    /// Take rows in needed order and put them into `merged_data` until we get `max_block_size` rows.
+    /// Take rows in needed order and put them into `graphite_rollup_merged_data` until we get `max_block_size` rows.
     ///
     /// Variables starting with current_* refer to the rows previously popped from the queue that will
     /// contribute towards current output row.
@@ -142,10 +142,10 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge()
         if (is_new_key)
         {
             /// Accumulate the row that has maximum version in the previous group of rows with the same key:
-            if (merged_data.wasGroupStarted())
+            if (graphite_rollup_merged_data.wasGroupStarted())
                 accumulateRow(current_subgroup_newest_row);
 
-            Graphite::RollupRule next_rule = merged_data.currentRule();
+            Graphite::RollupRule next_rule = graphite_rollup_merged_data.currentRule();
             if (new_path)
                 next_rule = selectPatternForPath(this->params, next_path);
 
@@ -167,15 +167,15 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge()
 
             if (will_be_new_key)
             {
-                if (merged_data.wasGroupStarted())
+                if (graphite_rollup_merged_data.wasGroupStarted())
                 {
                     finishCurrentGroup();
 
                     /// We have enough rows - return, but don't advance the loop. At the beginning of the
                     /// next call to merge() the same next_cursor will be processed once more and
                     /// the next output row will be created from it.
-                    if (merged_data.hasEnoughRows())
-                        return Status(merged_data.pull());
+                    if (graphite_rollup_merged_data.hasEnoughRows())
+                        return Status(graphite_rollup_merged_data.pull());
                 }
 
                 /// At this point previous row has been fully processed, so we can advance the loop
@@ -218,28 +218,28 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge()
     }
 
     /// Write result row for the last group.
-    if (merged_data.wasGroupStarted())
+    if (graphite_rollup_merged_data.wasGroupStarted())
     {
         accumulateRow(current_subgroup_newest_row);
         finishCurrentGroup();
     }
 
-    return Status(merged_data.pull(), true);
+    return Status(graphite_rollup_merged_data.pull(), true);
 }
 
 void GraphiteRollupSortedAlgorithm::startNextGroup(SortCursor & cursor, Graphite::RollupRule next_rule)
 {
-    merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition);
+    graphite_rollup_merged_data.startNextGroup(cursor->all_columns, cursor->getRow(), next_rule, columns_definition);
 }
 
 void GraphiteRollupSortedAlgorithm::finishCurrentGroup()
 {
-    merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition);
+    graphite_rollup_merged_data.insertRow(current_time_rounded, current_subgroup_newest_row, columns_definition);
 }
 
 void GraphiteRollupSortedAlgorithm::accumulateRow(RowRef & row)
 {
-    merged_data.accumulateRow(row, columns_definition);
+    graphite_rollup_merged_data.accumulateRow(row, columns_definition);
 }
 
 void GraphiteRollupSortedAlgorithm::GraphiteRollupMergedData::startNextGroup(
diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
index a20a6eaf11f..aaa3859efb6 100644
--- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h
@@ -53,7 +53,7 @@ public:
     {
     public:
         using MergedData::MergedData;
-        ~GraphiteRollupMergedData();
+        ~GraphiteRollupMergedData() override;
 
         void startNextGroup(const ColumnRawPtrs & raw_columns, size_t row,
                             Graphite::RollupRule next_rule, ColumnsDefinition & def);
@@ -72,7 +72,7 @@ public:
     };
 
 private:
-    GraphiteRollupMergedData merged_data;
+    GraphiteRollupMergedData & graphite_rollup_merged_data;
 
     const Graphite::Params params;
     ColumnsDefinition columns_definition;
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h
index b8e73aec0dc..cf4b8589441 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h
@@ -34,9 +34,9 @@ protected:
         return !lhs.hasEqualSortColumnsWith(rhs);
     }
 
-private:
     Block header;
 
+private:
     /// Inputs currently being merged.
     Inputs current_inputs;
     SortCursorImpls cursors;
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
index c8b69382e89..fe5186736b5 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
@@ -5,7 +5,7 @@ namespace DB
 {
 
 IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks(
-    Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs)
+    Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr<MergedData> merged_data_)
     : header(std::move(header_))
     , description(std::move(description_))
     , chunk_allocator(num_inputs + max_row_refs)
@@ -13,6 +13,7 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks(
     , sources(num_inputs)
     , sources_origin_merge_tree_part_level(num_inputs)
     , out_row_sources_buf(out_row_sources_buf_)
+    , merged_data(std::move(merged_data_))
 {
 }
 
@@ -28,6 +29,8 @@ static void prepareChunk(Chunk & chunk)
 
 void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
 {
+    merged_data->initialize(header, inputs);
+
     for (size_t source_num = 0; source_num < inputs.size(); ++source_num)
     {
         if (!inputs[source_num].chunk)
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
index 3b4f9e92c5d..bc1aafe93f7 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h
@@ -1,6 +1,7 @@
 #pragma once
 #include <Processors/Merges/Algorithms/IMergingAlgorithm.h>
 #include <Processors/Merges/Algorithms/RowRef.h>
+#include <Processors/Merges/Algorithms/MergedData.h>
 #include <Core/SortDescription.h>
 
 namespace DB
@@ -10,7 +11,7 @@ class IMergingAlgorithmWithSharedChunks : public IMergingAlgorithm
 {
 public:
     IMergingAlgorithmWithSharedChunks(
-        Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs);
+        Block header_, size_t num_inputs, SortDescription description_, WriteBuffer * out_row_sources_buf_, size_t max_row_refs, std::unique_ptr<MergedData> merged_data_);
 
     void initialize(Inputs inputs) override;
     void consume(Input & input, size_t source_num) override;
@@ -25,7 +26,6 @@ private:
     SortCursorImpls cursors;
 
 protected:
-
     struct Source
     {
         detail::SharedChunkPtr chunk;
@@ -43,6 +43,8 @@ protected:
     /// If it is not nullptr then it should be populated during execution
     WriteBuffer * out_row_sources_buf = nullptr;
 
+    std::unique_ptr<MergedData> merged_data;
+
     using RowRef = detail::RowRefWithOwnedChunk;
     void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, sources[cursor.impl->order].chunk); }
     bool skipLastRowFor(size_t input_number) const { return sources[input_number].skip_last_row; }
diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h
index 7ffde835ad0..95f915e4478 100644
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@@ -1,7 +1,9 @@
 #pragma once
 
 #include <Common/Exception.h>
+#include <Common/logger_useful.h>
 #include <Core/Types.h>
+#include <Core/Block.h>
 #include <Columns/IColumn.h>
 #include <Processors/Chunk.h>
 
@@ -19,17 +21,40 @@ namespace ErrorCodes
 class MergedData
 {
 public:
-    explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_)
-        : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_)
+    explicit MergedData(bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_)
+        : max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_)
     {
     }
 
+    virtual void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs)
+    {
+        columns = header.cloneEmptyColumns();
+        std::vector<Columns> source_columns;
+        source_columns.resize(columns.size());
+        for (const auto & input : inputs)
+        {
+            if (!input.chunk)
+                continue;
+
+            const auto & input_columns = input.chunk.getColumns();
+            for (size_t i = 0; i != input_columns.size(); ++i)
+                source_columns[i].push_back(input_columns[i]);
+        }
+
+        for (size_t i = 0; i != columns.size(); ++i)
+        {
+            if (columns[i]->hasDynamicStructure())
+                columns[i]->takeDynamicStructureFromSourceColumns(source_columns[i]);
+        }
+    }
+
     /// Pull will be called at next prepare call.
     void flush() { need_flush = true; }
 
     void insertRow(const ColumnRawPtrs & raw_columns, size_t row, size_t block_size)
     {
         size_t num_columns = raw_columns.size();
+        chassert(columns.size() == num_columns);
         for (size_t i = 0; i < num_columns; ++i)
             columns[i]->insertFrom(*raw_columns[i], row);
 
@@ -41,6 +66,7 @@ public:
     void insertRows(const ColumnRawPtrs & raw_columns, size_t start_index, size_t length, size_t block_size)
     {
         size_t num_columns = raw_columns.size();
+        chassert(columns.size() == num_columns);
         for (size_t i = 0; i < num_columns; ++i)
         {
             if (length == 1)
@@ -61,6 +87,7 @@ public:
 
         UInt64 num_rows = chunk.getNumRows();
         UInt64 num_columns = chunk.getNumColumns();
+        chassert(columns.size() == num_columns);
         auto chunk_columns = chunk.mutateColumns();
 
         /// Here is a special code for constant columns.
@@ -69,9 +96,18 @@ public:
         for (size_t i = 0; i < num_columns; ++i)
         {
             if (isColumnConst(*columns[i]))
+            {
                 columns[i] = columns[i]->cloneResized(num_rows);
+            }
+            else if (columns[i]->hasDynamicStructure())
+            {
+                columns[i] = columns[i]->cloneEmpty();
+                columns[i]->insertRangeFrom(*chunk_columns[i], 0, num_rows);
+            }
             else
+            {
                 columns[i] = std::move(chunk_columns[i]);
+            }
         }
 
         if (rows_size < num_rows)
@@ -144,6 +180,8 @@ public:
     UInt64 totalAllocatedBytes() const { return total_allocated_bytes; }
     UInt64 maxBlockSize() const { return max_block_size; }
 
+    virtual ~MergedData() = default;
+
 protected:
     MutableColumns columns;
 
diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
index 1debfcec8e0..75a6ddec682 100644
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
@@ -18,7 +18,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm(
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
     : header(std::move(header_))
-    , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_)
+    , merged_data(use_average_block_sizes, max_block_size_, max_block_size_bytes_)
     , description(description_)
     , limit(limit_)
     , out_row_sources_buf(out_row_sources_buf_)
@@ -59,6 +59,7 @@ static void prepareChunk(Chunk & chunk)
 
 void MergingSortedAlgorithm::initialize(Inputs inputs)
 {
+    merged_data.initialize(header, inputs);
     current_inputs = std::move(inputs);
 
     for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num)
diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
index 9e5c1249c4e..7b2c7d82a01 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@@ -41,9 +41,8 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm(
     bool use_average_block_sizes,
     bool cleanup_,
     bool enable_vertical_final_)
-    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_)
-    , enable_vertical_final(enable_vertical_final_)
+    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique<MergedData>(use_average_block_sizes, max_block_size_rows, max_block_size_bytes))
+    , cleanup(cleanup_), enable_vertical_final(enable_vertical_final_)
 {
     if (!is_deleted_column.empty())
         is_deleted_column_number = header_.getPositionByName(is_deleted_column);
@@ -75,7 +74,7 @@ void ReplacingSortedAlgorithm::insertRow()
             to_be_emitted.push(std::move(selected_row.owned_chunk));
     }
     else
-        merged_data.insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows());
+        merged_data->insertRow(*selected_row.all_columns, selected_row.row_num, selected_row.owned_chunk->getNumRows());
 
     selected_row.clear();
 }
@@ -109,8 +108,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
         if (key_differs)
         {
             /// If there are enough rows and the last one is calculated completely
-            if (merged_data.hasEnoughRows())
-                return Status(merged_data.pull());
+            if (merged_data->hasEnoughRows())
+                return Status(merged_data->pull());
 
             /// Write the data for the previous primary key.
             if (!selected_row.empty())
@@ -168,8 +167,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
     }
 
     /// If have enough rows, return block, because it prohibited to overflow requested number of rows.
-    if (merged_data.hasEnoughRows())
-        return Status(merged_data.pull());
+    if (merged_data->hasEnoughRows())
+        return Status(merged_data->pull());
 
     /// We will write the data for the last primary key.
     if (!selected_row.empty())
@@ -193,7 +192,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
         return emitChunk(chunk, to_be_emitted.empty());
     }
 
-    return Status(merged_data.pull(), true);
+    return Status(merged_data->pull(), true);
 }
 
 void ReplacingSortedAlgorithm::saveChunkForSkippingFinalFromSelectedRow()
diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
index 2fbd73c9072..a3ccccf0845 100644
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
@@ -44,8 +44,6 @@ public:
     Status merge() override;
 
 private:
-    MergedData merged_data;
-
     ssize_t is_deleted_column_number = -1;
     ssize_t version_column_number = -1;
     bool cleanup = false;
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index 28160b18269..49a417e7df2 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -382,39 +382,6 @@ static SummingSortedAlgorithm::ColumnsDefinition defineColumns(
     return def;
 }
 
-static MutableColumns getMergedDataColumns(
-    const Block & header,
-    const SummingSortedAlgorithm::ColumnsDefinition & def)
-{
-    MutableColumns columns;
-    size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size();
-    columns.reserve(num_columns);
-
-    for (const auto & desc : def.columns_to_aggregate)
-    {
-        // Wrap aggregated columns in a tuple to match function signature
-        if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType()))
-        {
-            size_t tuple_size = desc.column_numbers.size();
-            MutableColumns tuple_columns(tuple_size);
-            for (size_t i = 0; i < tuple_size; ++i)
-                tuple_columns[i] = header.safeGetByPosition(desc.column_numbers[i]).column->cloneEmpty();
-
-            columns.emplace_back(ColumnTuple::create(std::move(tuple_columns)));
-        }
-        else
-        {
-            const auto & type = desc.nested_type ? desc.nested_type : desc.real_type;
-            columns.emplace_back(type->createColumn());
-        }
-    }
-
-    for (const auto & column_number : def.column_numbers_not_to_aggregate)
-        columns.emplace_back(header.safeGetByPosition(column_number).type->createColumn());
-
-    return columns;
-}
-
 static void preprocessChunk(Chunk & chunk, const SummingSortedAlgorithm::ColumnsDefinition & def)
 {
     auto num_rows = chunk.getNumRows();
@@ -504,11 +471,44 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num,
 }
 
 
-SummingSortedAlgorithm::SummingMergedData::SummingMergedData(
-    MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_)
-    : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_)
+SummingSortedAlgorithm::SummingMergedData::SummingMergedData(UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_)
+    : MergedData(false, max_block_size_rows_, max_block_size_bytes_)
     , def(def_)
 {
+}
+
+void SummingSortedAlgorithm::SummingMergedData::initialize(const DB::Block & header, const IMergingAlgorithm::Inputs & inputs)
+{
+    MergedData::initialize(header, inputs);
+
+    MutableColumns new_columns;
+    size_t num_columns = def.column_numbers_not_to_aggregate.size() + def.columns_to_aggregate.size();
+    new_columns.reserve(num_columns);
+
+    for (const auto & desc : def.columns_to_aggregate)
+    {
+        // Wrap aggregated columns in a tuple to match function signature
+        if (!desc.is_agg_func_type && !desc.is_simple_agg_func_type && isTuple(desc.function->getResultType()))
+        {
+            size_t tuple_size = desc.column_numbers.size();
+            MutableColumns tuple_columns(tuple_size);
+            for (size_t i = 0; i < tuple_size; ++i)
+                tuple_columns[i] = std::move(columns[desc.column_numbers[i]]);
+
+            new_columns.emplace_back(ColumnTuple::create(std::move(tuple_columns)));
+        }
+        else
+        {
+            const auto & type = desc.nested_type ? desc.nested_type : desc.real_type;
+            new_columns.emplace_back(type->createColumn());
+        }
+    }
+
+    for (const auto & column_number : def.column_numbers_not_to_aggregate)
+        new_columns.emplace_back(std::move(columns[column_number]));
+
+    columns = std::move(new_columns);
+
     current_row.resize(def.column_names.size());
     initAggregateDescription();
 
@@ -698,12 +698,14 @@ SummingSortedAlgorithm::SummingSortedAlgorithm(
     size_t max_block_size_bytes)
     : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_))
     , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns))
-    , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition)
+    , merged_data(max_block_size_rows, max_block_size_bytes, columns_definition)
 {
 }
 
 void SummingSortedAlgorithm::initialize(Inputs inputs)
 {
+    merged_data.initialize(header, inputs);
+
     for (auto & input : inputs)
         if (input.chunk)
             preprocessChunk(input.chunk, columns_definition);
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
index dbbe4e53a5f..664b171c4b9 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h
@@ -65,7 +65,9 @@ public:
         using MergedData::insertRow;
 
     public:
-        SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_);
+        SummingMergedData(UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_);
+
+        void initialize(const Block & header, const IMergingAlgorithm::Inputs & inputs) override;
 
         void startGroup(ColumnRawPtrs & raw_columns, size_t row);
         void finishGroup();
diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
index e7a431dc1d0..9f124c6ba18 100644
--- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp
@@ -16,8 +16,7 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm(
     size_t max_block_size_bytes_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
-    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_)
+    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE, std::make_unique<MergedData>(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_))
     /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything.
     , max_rows_in_queue(std::min(std::max<size_t>(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1)
     , current_keys(max_rows_in_queue)
@@ -47,7 +46,7 @@ void VersionedCollapsingAlgorithm::insertGap(size_t gap_size)
 
 void VersionedCollapsingAlgorithm::insertRow(size_t skip_rows, const RowRef & row)
 {
-    merged_data.insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows());
+    merged_data->insertRow(*row.all_columns, row.row_num, row.owned_chunk->getNumRows());
 
     insertGap(skip_rows);
 
@@ -104,8 +103,8 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge()
             --num_rows_to_insert;
 
             /// It's ok to return here, because we didn't affect queue.
-            if (merged_data.hasEnoughRows())
-                return Status(merged_data.pull());
+            if (merged_data->hasEnoughRows())
+                return Status(merged_data->pull());
         }
 
         if (current_keys.empty())
@@ -147,13 +146,13 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge()
         insertRow(gap, row);
         current_keys.popFront();
 
-        if (merged_data.hasEnoughRows())
-            return Status(merged_data.pull());
+        if (merged_data->hasEnoughRows())
+            return Status(merged_data->pull());
     }
 
     /// Write information about last collapsed rows.
     insertGap(current_keys.frontGap());
-    return Status(merged_data.pull(), true);
+    return Status(merged_data->pull(), true);
 }
 
 }
diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
index d98529b301c..e6d20ddac75 100644
--- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h
@@ -29,8 +29,6 @@ public:
     Status merge() override;
 
 private:
-    MergedData merged_data;
-
     size_t sign_column_number = 0;
 
     const size_t max_rows_in_queue;
diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp
index b2e8e9bc89e..6736cd59e83 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.cpp
+++ b/src/Processors/Transforms/ColumnGathererTransform.cpp
@@ -32,15 +32,23 @@ ColumnGathererStream::ColumnGathererStream(
 
 void ColumnGathererStream::initialize(Inputs inputs)
 {
+    Columns source_columns;
+    source_columns.reserve(inputs.size());
     for (size_t i = 0; i < inputs.size(); ++i)
     {
         if (inputs[i].chunk)
         {
             sources[i].update(inputs[i].chunk.detachColumns().at(0));
-            if (!result_column)
-                result_column = sources[i].column->cloneEmpty();
+            source_columns.push_back(sources[i].column);
         }
     }
+
+    if (source_columns.empty())
+        return;
+
+    result_column = source_columns[0]->cloneEmpty();
+    if (result_column->hasDynamicStructure())
+        result_column->takeDynamicStructureFromSourceColumns(source_columns);
 }
 
 IMergingAlgorithm::Status ColumnGathererStream::merge()
@@ -52,7 +60,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
     if (source_to_fully_copy) /// Was set on a previous iteration
     {
         Chunk res;
-        res.addColumn(source_to_fully_copy->column);
+        if (result_column->hasDynamicStructure())
+        {
+            auto col = result_column->cloneEmpty();
+            col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size());
+            res.addColumn(std::move(col));
+        }
+        else
+        {
+            res.addColumn(source_to_fully_copy->column);
+        }
         merged_rows += source_to_fully_copy->size;
         source_to_fully_copy->pos = source_to_fully_copy->size;
         source_to_fully_copy = nullptr;
@@ -96,7 +113,16 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
         Chunk res;
         merged_rows += source_to_fully_copy->column->size();
         merged_bytes += source_to_fully_copy->column->allocatedBytes();
-        res.addColumn(source_to_fully_copy->column);
+        if (result_column->hasDynamicStructure())
+        {
+            auto col = result_column->cloneEmpty();
+            col->insertRangeFrom(*source_to_fully_copy->column, 0, source_to_fully_copy->column->size());
+            res.addColumn(std::move(col));
+        }
+        else
+        {
+            res.addColumn(source_to_fully_copy->column);
+        }
         source_to_fully_copy->pos = source_to_fully_copy->size;
         source_to_fully_copy = nullptr;
         return Status(std::move(res));
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index eae5e1a8a47..db6a4d9f06e 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -1288,7 +1288,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
             /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object).
             /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value).
             /// So we don't allow to do it for now.
-            if (command.data_type->hasDynamicSubcolumns())
+            if (command.data_type->hasDynamicSubcolumnsDeprecated())
                 throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs");
 
             if (virtuals->tryGet(column_name, VirtualsKind::Persistent))
@@ -1366,8 +1366,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
                 const GetColumnsOptions options(GetColumnsOptions::All);
                 const auto old_data_type = all_columns.getColumn(options, column_name).type;
 
-                bool new_type_has_object = command.data_type->hasDynamicSubcolumns();
-                bool old_type_has_object = old_data_type->hasDynamicSubcolumns();
+                bool new_type_has_object = command.data_type->hasDynamicSubcolumnsDeprecated();
+                bool old_type_has_object = old_data_type->hasDynamicSubcolumnsDeprecated();
 
                 if (new_type_has_object || old_type_has_object)
                     throw Exception(
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 16b89f24243..6f844e31970 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -547,7 +547,18 @@ bool ColumnsDescription::hasNested(const String & column_name) const
 
 bool ColumnsDescription::hasSubcolumn(const String & column_name) const
 {
-    return subcolumns.get<0>().count(column_name);
+    if (subcolumns.get<0>().count(column_name))
+        return true;
+
+    auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
+    auto it = columns.get<1>().find(ordinary_column_name);
+    if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
+    {
+        if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name))
+            return true;
+    }
+
+    return false;
 }
 
 const ColumnDescription & ColumnsDescription::get(const String & column_name) const
@@ -644,6 +655,14 @@ std::optional<NameAndTypePair> ColumnsDescription::tryGetColumn(const GetColumns
             return *jt;
     }
 
+    auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
+    it = columns.get<1>().find(ordinary_column_name);
+    if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
+    {
+        if (auto dynamic_subcolumn_type = it->type->tryGetSubcolumnType(dynamic_subcolumn_name))
+            return NameAndTypePair(ordinary_column_name, dynamic_subcolumn_name, it->type, dynamic_subcolumn_type);
+    }
+
     return {};
 }
 
@@ -730,9 +749,18 @@ bool ColumnsDescription::hasAlias(const String & column_name) const
 bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const
 {
     auto it = columns.get<1>().find(column_name);
-    return (it != columns.get<1>().end()
-        && (defaultKindToGetKind(it->default_desc.kind) & kind))
-            || hasSubcolumn(column_name);
+    if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name))
+        return true;
+
+    auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
+    it = columns.get<1>().find(ordinary_column_name);
+    if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
+    {
+        if (auto dynamic_subcolumn_type = it->type->hasSubcolumn(dynamic_subcolumn_name))
+            return true;
+    }
+
+    return false;
 }
 
 bool ColumnsDescription::hasColumnOrNested(GetColumnsOptions::Kind kind, const String & column_name) const
diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h
index b14bb7f997b..785ddcd18f8 100644
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@@ -79,6 +79,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     static ColumnsDescription getTableStructureFromData(
         const String & format,
         const String & uri,
diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h
index 26ebc8601ee..448b4be6c96 100644
--- a/src/Storages/HDFS/StorageHDFSCluster.h
+++ b/src/Storages/HDFS/StorageHDFSCluster.h
@@ -36,6 +36,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 1108eafc6b6..5a23fcceeb9 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -172,8 +172,10 @@ public:
     /// This method can return true for readonly engines that return the same rows for reading (such as SystemNumbers)
     virtual bool supportsTransactions() const { return false; }
 
+    /// Returns true if the storage supports storing of data type Object.
+    virtual bool supportsDynamicSubcolumnsDeprecated() const { return false; }
+
     /// Returns true if the storage supports storing of dynamic subcolumns.
-    /// For now it makes sense only for data type Object.
     virtual bool supportsDynamicSubcolumns() const { return false; }
 
     /// Requires squashing small blocks to large for optimal storage.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 570175f6614..2e2d1dbed4d 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -2392,6 +2392,36 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const
     exception_code = code;
 }
 
+ColumnPtr IMergeTreeDataPart::readColumnSample(const NameAndTypePair & column) const
+{
+    const size_t total_mark = getMarksCount();
+    if (!total_mark)
+        return column.type->createColumn();
+
+    NamesAndTypesList cols;
+    cols.emplace_back(column);
+
+    StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr();
+    StorageSnapshotPtr storage_snapshot_ptr = std::make_shared<StorageSnapshot>(storage, metadata_ptr);
+
+    MergeTreeReaderPtr reader = getReader(
+        cols,
+        storage_snapshot_ptr,
+        MarkRanges{MarkRange(0, 1)},
+        /*virtual_fields=*/ {},
+        /*uncompressed_cache=*/{},
+        storage.getContext()->getMarkCache().get(),
+        std::make_shared<AlterConversions>(),
+        MergeTreeReaderSettings{},
+        ValueSizeMap{},
+        ReadBufferFromFileBase::ProfileCallback{});
+
+    Columns result;
+    result.resize(1);
+    reader->readRows(0, 1, false, 0, result);
+    return result[0];
+}
+
 bool isCompactPart(const MergeTreeDataPartPtr & data_part)
 {
     return (data_part && data_part->getType() == MergeTreeDataPartType::Compact);
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 7519980a7a3..78619f216c0 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -166,6 +166,8 @@ public:
     NameAndTypePair getColumn(const String & name) const;
     std::optional<NameAndTypePair> tryGetColumn(const String & column_name) const;
 
+    ColumnPtr readColumnSample(const NameAndTypePair & column) const;
+
     const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; }
 
     SerializationPtr getSerialization(const String & column_name) const;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 08a2ff89e7b..c47297be84d 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3660,7 +3660,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
             continue;
 
         auto storage_column = columns.getPhysical(part_column.name);
-        if (!storage_column.type->hasDynamicSubcolumns())
+        if (!storage_column.type->hasDynamicSubcolumnsDeprecated())
             continue;
 
         auto concrete_storage_column = object_columns.getPhysical(part_column.name);
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 046376be474..089793beab8 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -434,6 +434,7 @@ public:
 
     bool supportsTTL() const override { return true; }
 
+    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
     bool supportsDynamicSubcolumns() const override { return true; }
 
     bool supportsLightweightDelete() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 1605e5cdb9a..d0a685d95fc 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -44,21 +44,27 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
 
         marks_source_hashing = std::make_unique<HashingWriteBuffer>(*marks_compressor);
     }
+}
+
+void MergeTreeDataPartWriterCompact::initStreamsIfNeeded(const Block & block)
+{
+    if (!compressed_streams.empty())
+        return;
 
     auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
         auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
-        addStreams(column, compression);
+        addStreams(column, block.getByName(column.name).column, compression);
     }
 }
 
-void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc)
+void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc)
 {
     ISerialization::StreamCallback callback = [&](const auto & substream_path)
     {
         assert(!substream_path.empty());
-        String stream_name = ISerialization::getFileNameForStream(column, substream_path);
+        String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path);
 
         /// Shared offsets for Nested type.
         if (compressed_streams.contains(stream_name))
@@ -81,7 +87,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column,
         compressed_streams.emplace(stream_name, stream);
     };
 
-    data_part->getSerialization(column.name)->enumerateStreams(callback, column.type);
+    data_part->getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column);
 }
 
 namespace
@@ -138,6 +144,7 @@ void writeColumnSingleGranule(
     serialize_settings.getter = stream_getter;
     serialize_settings.position_independent_encoding = true;
     serialize_settings.low_cardinality_max_dictionary_size = 0;
+    serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::PREFIX;
 
     serialization->serializeBinaryBulkStatePrefix(*column.column, serialize_settings, state);
     serialization->serializeBinaryBulkWithMultipleStreams(*column.column, from_row, number_of_rows, serialize_settings, state);
@@ -148,6 +155,8 @@ void writeColumnSingleGranule(
 
 void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation)
 {
+    initStreamsIfNeeded(block);
+
     /// Fill index granularity for this block
     /// if it's unknown (in case of insert data or horizontal merge,
     /// but not in case of vertical merge)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index ddb6178dce6..1c748803c52 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -42,7 +42,9 @@ private:
 
     void addToChecksums(MergeTreeDataPartChecksums & checksums);
 
-    void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc);
+    void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc);
+
+    void initStreamsIfNeeded(const Block & block);
 
     Block header;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 6a3b08d4d65..c23a9a81cbc 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -89,16 +89,25 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
            indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
            default_codec_, settings_, index_granularity_)
 {
+}
+
+void MergeTreeDataPartWriterWide::initStreamsIfNeeded(const DB::Block & block)
+{
+    if (!column_streams.empty())
+        return;
+
+    block_sample = block.cloneEmpty();
     auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
         auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
-        addStreams(column, compression);
+        addStreams(column, block_sample.getByName(column.name).column, compression);
     }
 }
 
 void MergeTreeDataPartWriterWide::addStreams(
-    const NameAndTypePair & column,
+    const NameAndTypePair & name_and_type,
+    const ColumnPtr & column,
     const ASTPtr & effective_codec_desc)
 {
     ISerialization::StreamCallback callback = [&](const auto & substream_path)
@@ -106,7 +115,7 @@ void MergeTreeDataPartWriterWide::addStreams(
         assert(!substream_path.empty());
 
         auto storage_settings = storage.getSettings();
-        auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path);
+        auto full_stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path);
 
         String stream_name;
         if (storage_settings->replace_long_file_name_to_hash && full_stream_name.size() > storage_settings->max_file_name_length)
@@ -138,7 +147,7 @@ void MergeTreeDataPartWriterWide::addStreams(
         auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
         CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
 
-        const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
+        const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage());
 
         UInt64 max_compress_block_size = 0;
         if (column_desc)
@@ -163,7 +172,7 @@ void MergeTreeDataPartWriterWide::addStreams(
     };
 
     ISerialization::SubstreamPath path;
-    data_part->getSerialization(column.name)->enumerateStreams(callback, column.type);
+    data_part->getSerialization(name_and_type.name)->enumerateStreams(callback, name_and_type.type, column);
 }
 
 const String & MergeTreeDataPartWriterWide::getStreamName(
@@ -222,6 +231,8 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
 
 void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation)
 {
+    initStreamsIfNeeded(block);
+
     /// Fill index granularity for this block
     /// if it's unknown (in case of insert data or horizontal merge,
     /// but not in case of vertical part of vertical merge)
@@ -302,11 +313,12 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
 }
 
 void MergeTreeDataPartWriterWide::writeSingleMark(
-    const NameAndTypePair & column,
+    const NameAndTypePair & name_and_type,
     WrittenOffsetColumns & offset_columns,
     size_t number_of_rows)
 {
-    StreamsWithMarks marks = getCurrentMarksForColumn(column, offset_columns);
+    auto * sample_column = block_sample.findByName(name_and_type.name);
+    StreamsWithMarks marks = getCurrentMarksForColumn(name_and_type, sample_column ? sample_column->column : nullptr, offset_columns);
     for (const auto & mark : marks)
         flushMarkToFile(mark, number_of_rows);
 }
@@ -323,21 +335,22 @@ void MergeTreeDataPartWriterWide::flushMarkToFile(const StreamNameAndMark & stre
 }
 
 StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
-    const NameAndTypePair & column,
+    const NameAndTypePair & name_and_type,
+    const ColumnPtr & column_sample,
     WrittenOffsetColumns & offset_columns)
 {
     StreamsWithMarks result;
-    const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage());
+    const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), name_and_type.getNameInStorage());
     UInt64 min_compress_block_size = 0;
     if (column_desc)
         if (const auto * value = column_desc->settings.tryGet("min_compress_block_size"))
             min_compress_block_size = value->safeGet<UInt64>();
     if (!min_compress_block_size)
         min_compress_block_size = settings.min_compress_block_size;
-    data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
+    data_part->getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
     {
         bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
-        auto stream_name = getStreamName(column, substream_path);
+        auto stream_name = getStreamName(name_and_type, substream_path);
 
         /// Don't write offsets more than one time for Nested type.
         if (is_offsets && offset_columns.contains(stream_name))
@@ -355,7 +368,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
         stream_with_mark.mark.offset_in_decompressed_block = stream.compressed_hashing.offset();
 
         result.push_back(stream_with_mark);
-    });
+    }, name_and_type.type, column_sample);
 
     return result;
 }
@@ -382,7 +395,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule(
             return;
 
         column_streams.at(stream_name)->compressed_hashing.nextIfAtEnd();
-    });
+    }, name_and_type.type, column.getPtr());
 }
 
 /// Column must not be empty. (column.size() !== 0)
@@ -424,7 +437,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
                                 "We have to add new mark for column, but already have non written mark. "
                                 "Current mark {}, total marks {}, offset {}",
                                 getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark);
-            last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns);
+            last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, column.getPtr(), offset_columns);
         }
 
         writeSingleGranule(
@@ -453,7 +466,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
         bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
         if (is_offsets)
             offset_columns.insert(getStreamName(name_and_type, substream_path));
-    });
+    }, name_and_type.type, column.getPtr());
 }
 
 
@@ -622,6 +635,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum
             if (!serialization_states.empty())
             {
                 serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns);
+                serialize_settings.dynamic_write_statistics = ISerialization::SerializeBinaryBulkSettings::DynamicStatisticsMode::SUFFIX;
                 data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]);
             }
 
@@ -703,17 +717,17 @@ void MergeTreeDataPartWriterWide::finish(bool sync)
 }
 
 void MergeTreeDataPartWriterWide::writeFinalMark(
-    const NameAndTypePair & column,
+    const NameAndTypePair & name_and_type,
     WrittenOffsetColumns & offset_columns)
 {
-    writeSingleMark(column, offset_columns, 0);
+    writeSingleMark(name_and_type, offset_columns, 0);
     /// Memoize information about offsets
-    data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
+    data_part->getSerialization(name_and_type.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
     {
         bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
         if (is_offsets)
-            offset_columns.insert(getStreamName(column, substream_path));
-    });
+            offset_columns.insert(getStreamName(name_and_type, substream_path));
+    }, name_and_type.type, block_sample.getByName(name_and_type.name).column);
 }
 
 static void fillIndexGranularityImpl(
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index f5ff323563d..ebdd907914f 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -63,7 +63,8 @@ private:
 
     /// Take offsets from column and return as MarkInCompressed file with stream name
     StreamsWithMarks getCurrentMarksForColumn(
-        const NameAndTypePair & column,
+        const NameAndTypePair & name_and_type,
+        const ColumnPtr & column_sample,
         WrittenOffsetColumns & offset_columns);
 
     /// Write mark to disk using stream and rows count
@@ -73,18 +74,21 @@ private:
 
     /// Write mark for column taking offsets from column stream
     void writeSingleMark(
-        const NameAndTypePair & column,
+        const NameAndTypePair & name_and_type,
         WrittenOffsetColumns & offset_columns,
         size_t number_of_rows);
 
     void writeFinalMark(
-        const NameAndTypePair & column,
+        const NameAndTypePair & name_and_type,
         WrittenOffsetColumns & offset_columns);
 
     void addStreams(
-        const NameAndTypePair & column,
+        const NameAndTypePair & name_and_type,
+        const ColumnPtr & column,
         const ASTPtr & effective_codec_desc);
 
+    void initStreamsIfNeeded(const Block & block);
+
     /// Method for self check (used in debug-build only). Checks that written
     /// data and corresponding marks are consistent. Otherwise throws logical
     /// errors.
@@ -129,6 +133,8 @@ private:
     /// How many rows we have already written in the current mark.
     /// More than zero when incoming blocks are smaller then their granularity.
     size_t rows_written_in_last_mark = 0;
+
+    Block block_sample;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index cadd94867ec..ad60e31dddc 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -422,7 +422,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
     auto columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames());
 
     for (auto & column : columns)
-        if (column.type->hasDynamicSubcolumns())
+        if (column.type->hasDynamicSubcolumnsDeprecated())
             column.type = block.getByName(column.name).type;
 
     auto minmax_idx = std::make_shared<IMergeTreeDataPart::MinMaxIndex>();
diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
index dba2bc1e56c..02a3f1b1165 100644
--- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp
@@ -116,7 +116,7 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd
         ISerialization::DeserializeBinaryBulkStatePtr state;
         auto serialization = type->getDefaultSerialization();
 
-        serialization->deserializeBinaryBulkStatePrefix(settings, state);
+        serialization->deserializeBinaryBulkStatePrefix(settings, state, nullptr);
         serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr);
 
         block.insert(ColumnWithTypeAndName(new_column, type, column.name));
diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
index a22bff6b8d2..7504ce3cc5f 100644
--- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp
@@ -195,7 +195,7 @@ void MergeTreeReaderCompact::readPrefix(
             deserialize_settings.getter = buffer_getter_for_prefix;
             ISerialization::DeserializeBinaryBulkStatePtr state_for_prefix;
 
-            serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix);
+            serialization_for_prefix->deserializeBinaryBulkStatePrefix(deserialize_settings, state_for_prefix, nullptr);
         }
 
         SerializationPtr serialization;
@@ -206,7 +206,8 @@ void MergeTreeReaderCompact::readPrefix(
 
 
         deserialize_settings.getter = buffer_getter;
-        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name]);
+        deserialize_settings.dynamic_read_statistics = true;
+        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name_and_type.name], nullptr);
     }
     catch (Exception & e)
     {
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 394a22835f1..c8bf12436b0 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -1,4 +1,5 @@
 #include <Storages/MergeTree/MergeTreeReaderWide.h>
+#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnSparse.h>
@@ -43,11 +44,13 @@ MergeTreeReaderWide::MergeTreeReaderWide(
         mark_ranges_,
         settings_,
         avg_value_size_hints_)
+    , profile_callback(profile_callback_)
+    , clock_type(clock_type_)
 {
     try
     {
         for (size_t i = 0; i < columns_to_read.size(); ++i)
-            addStreams(columns_to_read[i], serializations[i], profile_callback_, clock_type_);
+            addStreams(columns_to_read[i], serializations[i]);
     }
     catch (...)
     {
@@ -100,9 +103,10 @@ void MergeTreeReaderWide::prefetchForAllColumns(
         try
         {
             auto & cache = caches[columns_to_read[pos].getNameInStorage()];
+            auto & deserialize_states_cache = deserialize_states_caches[columns_to_read[pos].getNameInStorage()];
             prefetchForColumn(
                 priority, columns_to_read[pos], serializations[pos], from_mark, continue_reading,
-                current_task_last_mark, cache);
+                current_task_last_mark, cache, deserialize_states_cache);
         }
         catch (Exception & e)
         {
@@ -147,11 +151,12 @@ size_t MergeTreeReaderWide::readRows(
             {
                 size_t column_size_before_reading = column->size();
                 auto & cache = caches[column_to_read.getNameInStorage()];
+                auto & deserialize_states_cache = deserialize_states_caches[column_to_read.getNameInStorage()];
 
                 readData(
                     column_to_read, serializations[pos], column,
                     from_mark, continue_reading, current_task_last_mark,
-                    max_rows_to_read, cache, /* was_prefetched =*/ !prefetched_streams.empty());
+                    max_rows_to_read, cache, deserialize_states_cache, /* was_prefetched =*/ !prefetched_streams.empty());
 
                 /// For elements of Nested, column_size_before_reading may be greater than column size
                 ///  if offsets are not empty and were already read, but elements are empty.
@@ -199,9 +204,7 @@ size_t MergeTreeReaderWide::readRows(
 
 void MergeTreeReaderWide::addStreams(
     const NameAndTypePair & name_and_type,
-    const SerializationPtr & serialization,
-    const ReadBufferFromFileBase::ProfileCallback & profile_callback,
-    clockid_t clock_type)
+    const SerializationPtr & serialization)
 {
     bool has_any_stream = false;
     bool has_all_streams = true;
@@ -225,29 +228,8 @@ void MergeTreeReaderWide::addStreams(
             return;
         }
 
-        auto context = data_part_info_for_read->getContext();
-        auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
-
-        auto marks_loader = std::make_shared<MergeTreeMarksLoader>(
-            data_part_info_for_read,
-            mark_cache,
-            data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name),
-            data_part_info_for_read->getMarksCount(),
-            data_part_info_for_read->getIndexGranularityInfo(),
-            settings.save_marks_in_cache,
-            settings.read_settings,
-            load_marks_threadpool,
-            /*num_columns_in_mark=*/ 1);
-
+        addStream(substream_path, *stream_name);
         has_any_stream = true;
-        auto stream_settings = settings;
-        stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
-
-        streams.emplace(*stream_name, std::make_unique<MergeTreeReaderStreamSingleColumn>(
-            data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
-            data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings,
-            uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
-            std::move(marks_loader), profile_callback, clock_type));
     };
 
     serialization->enumerateStreams(callback);
@@ -256,11 +238,36 @@ void MergeTreeReaderWide::addStreams(
         partially_read_columns.insert(name_and_type.name);
 }
 
-static ReadBuffer * getStream(
+MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name)
+{
+    auto context = data_part_info_for_read->getContext();
+    auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr;
+
+    auto marks_loader = std::make_shared<MergeTreeMarksLoader>(
+        data_part_info_for_read,
+        mark_cache,
+        data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name),
+        data_part_info_for_read->getMarksCount(),
+        data_part_info_for_read->getIndexGranularityInfo(),
+        settings.save_marks_in_cache,
+        settings.read_settings,
+        load_marks_threadpool,
+        /*num_columns_in_mark=*/ 1);
+
+    auto stream_settings = settings;
+    stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
+
+    return streams.emplace(stream_name, std::make_unique<MergeTreeReaderStreamSingleColumn>(
+                                      data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION,
+                                      data_part_info_for_read->getMarksCount(), all_mark_ranges, stream_settings,
+                                      uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION),
+                                      std::move(marks_loader), profile_callback, clock_type)).first;
+}
+
+ReadBuffer * MergeTreeReaderWide::getStream(
     bool seek_to_start,
     const ISerialization::SubstreamPath & substream_path,
     const MergeTreeDataPartChecksums & checksums,
-    MergeTreeReaderWide::FileStreams & streams,
     const NameAndTypePair & name_and_type,
     size_t from_mark,
     bool seek_to_mark,
@@ -277,7 +284,13 @@ static ReadBuffer * getStream(
 
     auto it = streams.find(*stream_name);
     if (it == streams.end())
-        return nullptr;
+    {
+        /// If we didn't create requested stream, but file with this path exists, create a stream for it.
+        /// It may happen during reading of columns with dynamic subcolumns, because all streams are known
+        /// only after deserializing of binary bulk prefix.
+
+        it = addStream(substream_path, *stream_name);
+    }
 
     MergeTreeReaderStream & stream = *it->second;
     stream.adjustRightMark(current_task_last_mark);
@@ -294,17 +307,19 @@ void MergeTreeReaderWide::deserializePrefix(
     const SerializationPtr & serialization,
     const NameAndTypePair & name_and_type,
     size_t current_task_last_mark,
-    ISerialization::SubstreamsCache & cache)
+    ISerialization::SubstreamsCache & cache,
+    ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache)
 {
     const auto & name = name_and_type.name;
     if (!deserialize_binary_bulk_state_map.contains(name))
     {
         ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
+        deserialize_settings.dynamic_read_statistics = true;
         deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
         {
-            return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache);
+            return getStream(/* seek_to_start = */true, substream_path, data_part_info_for_read->getChecksums(), name_and_type, 0, /* seek_to_mark = */false, current_task_last_mark, cache);
         };
-        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]);
+        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name], &deserialize_states_cache);
     }
 }
 
@@ -315,9 +330,10 @@ void MergeTreeReaderWide::prefetchForColumn(
     size_t from_mark,
     bool continue_reading,
     size_t current_task_last_mark,
-    ISerialization::SubstreamsCache & cache)
+    ISerialization::SubstreamsCache & cache,
+    ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache)
 {
-    deserializePrefix(serialization, name_and_type, current_task_last_mark, cache);
+    deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache);
 
     serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path)
     {
@@ -326,7 +342,7 @@ void MergeTreeReaderWide::prefetchForColumn(
         if (stream_name && !prefetched_streams.contains(*stream_name))
         {
             bool seek_to_mark = !continue_reading;
-            if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), streams, name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
+            if (ReadBuffer * buf = getStream(false, substream_path, data_part_info_for_read->getChecksums(), name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache))
             {
                 buf->prefetch(priority);
                 prefetched_streams.insert(*stream_name);
@@ -337,15 +353,22 @@ void MergeTreeReaderWide::prefetchForColumn(
 
 
 void MergeTreeReaderWide::readData(
-    const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column,
-    size_t from_mark, bool continue_reading, size_t current_task_last_mark,
-    size_t max_rows_to_read, ISerialization::SubstreamsCache & cache, bool was_prefetched)
+    const NameAndTypePair & name_and_type,
+    const SerializationPtr & serialization,
+    ColumnPtr & column,
+    size_t from_mark,
+    bool continue_reading,
+    size_t current_task_last_mark,
+    size_t max_rows_to_read,
+    ISerialization::SubstreamsCache & cache,
+    ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache,
+    bool was_prefetched)
 {
     double & avg_value_size_hint = avg_value_size_hints[name_and_type.name];
     ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
     deserialize_settings.avg_value_size_hint = avg_value_size_hint;
 
-    deserializePrefix(serialization, name_and_type, current_task_last_mark, cache);
+    deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache);
 
     deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
     {
@@ -353,7 +376,7 @@ void MergeTreeReaderWide::readData(
 
         return getStream(
             /* seek_to_start = */false, substream_path,
-            data_part_info_for_read->getChecksums(), streams,
+            data_part_info_for_read->getChecksums(),
             name_and_type, from_mark, seek_to_mark, current_task_last_mark, cache);
     };
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h
index a9a5526dd65..1eef21b455b 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.h
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.h
@@ -45,14 +45,31 @@ private:
 
     void addStreams(
         const NameAndTypePair & name_and_type,
-        const SerializationPtr & serialization,
-        const ReadBufferFromFileBase::ProfileCallback & profile_callback,
-        clockid_t clock_type);
+        const SerializationPtr & serialization);
+
+    ReadBuffer * getStream(
+        bool seek_to_start,
+        const ISerialization::SubstreamPath & substream_path,
+        const MergeTreeDataPartChecksums & checksums,
+        const NameAndTypePair & name_and_type,
+        size_t from_mark,
+        bool seek_to_mark,
+        size_t current_task_last_mark,
+        ISerialization::SubstreamsCache & cache);
+
+    FileStreams::iterator addStream(const ISerialization::SubstreamPath & substream_path, const String & stream_name);
 
     void readData(
-        const NameAndTypePair & name_and_type, const SerializationPtr & serialization, ColumnPtr & column,
-        size_t from_mark, bool continue_reading, size_t current_task_last_mark, size_t max_rows_to_read,
-        ISerialization::SubstreamsCache & cache, bool was_prefetched);
+        const NameAndTypePair & name_and_type,
+        const SerializationPtr & serialization,
+        ColumnPtr & column,
+        size_t from_mark,
+        bool continue_reading,
+        size_t current_task_last_mark,
+        size_t max_rows_to_read,
+        ISerialization::SubstreamsCache & cache,
+        ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache,
+        bool was_prefetched);
 
     /// Make next readData more simple by calling 'prefetch' of all related ReadBuffers (column streams).
     void prefetchForColumn(
@@ -62,17 +79,22 @@ private:
         size_t from_mark,
         bool continue_reading,
         size_t current_task_last_mark,
-        ISerialization::SubstreamsCache & cache);
+        ISerialization::SubstreamsCache & cache,
+        ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache);
 
     void deserializePrefix(
         const SerializationPtr & serialization,
         const NameAndTypePair & name_and_type,
         size_t current_task_last_mark,
-        ISerialization::SubstreamsCache & cache);
+        ISerialization::SubstreamsCache & cache,
+        ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache);
 
     std::unordered_map<String, ISerialization::SubstreamsCache> caches;
+    std::unordered_map<String, ISerialization::SubstreamsDeserializeStatesCache> deserialize_states_caches;
     std::unordered_set<std::string> prefetched_streams;
     ssize_t prefetched_from_mark = -1;
+    ReadBufferFromFileBase::ProfileCallback profile_callback;
+    clockid_t clock_type;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 9c67a86997b..3ddd6b21ffb 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -43,6 +43,7 @@ struct Settings;
     M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \
     M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \
     M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \
+    /** M(UInt64, max_types_for_dynamic_serialization, 32, "The maximum number of different types in Dynamic column stored separately in MergeTree tables in wide format. If exceeded, new types will be converted to String", 0) */ \
     \
     /** Merge settings. */ \
     M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index f67e9484598..b2817b386fa 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -777,7 +777,13 @@ static NameToNameVector collectFilesForRenames(
                 };
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
-                    serialization->enumerateStreams(callback);
+                {
+                    auto name_and_type = source_part->getColumn(command.column_name);
+                    ColumnPtr column_sample;
+                    if (name_and_type.type->hasDynamicSubcolumns())
+                        column_sample = source_part->readColumnSample(name_and_type);
+                    serialization->enumerateStreams(callback, name_and_type.type, column_sample);
+                }
 
                 /// if we drop a column with statistic, we should also drop the stat file.
                 if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
@@ -813,7 +819,13 @@ static NameToNameVector collectFilesForRenames(
                 };
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
-                    serialization->enumerateStreams(callback);
+                {
+                    auto name_and_type = source_part->getColumn(command.column_name);
+                    ColumnPtr column_sample;
+                    if (name_and_type.type->hasDynamicSubcolumns())
+                        column_sample = source_part->readColumnSample(name_and_type);
+                    serialization->enumerateStreams(callback, name_and_type.type, column_sample);
+                }
 
                 /// if we rename a column with statistic, we should also rename the stat file.
                 if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
index ca8ed9abdb5..a94508ad41f 100644
--- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
@@ -87,6 +87,7 @@ public:
 
     bool supportsPrewhere() const override { return true; }
 
+    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
     bool supportsDynamicSubcolumns() const override { return true; }
 
     bool supportsSubcolumns() const override { return true; }
diff --git a/src/Storages/S3Queue/StorageS3Queue.h b/src/Storages/S3Queue/StorageS3Queue.h
index 1f735b47819..fce6736aa07 100644
--- a/src/Storages/S3Queue/StorageS3Queue.h
+++ b/src/Storages/S3Queue/StorageS3Queue.h
@@ -81,6 +81,7 @@ private:
     void drop() override;
     bool supportsSubsetOfColumns(const ContextPtr & context_) const;
     bool supportsSubcolumns() const override { return true; }
+    bool supportsDynamicSubcolumns() const override { return true; }
 
     std::shared_ptr<FileIterator> createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate);
     std::shared_ptr<StorageS3QueueSource> createSource(
diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h
index 27ac7a5c368..be0e88b9b6d 100644
--- a/src/Storages/StorageAzureBlob.h
+++ b/src/Storages/StorageAzureBlob.h
@@ -98,6 +98,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsSubsetOfColumns(const ContextPtr & context) const;
 
     bool supportsTrivialCountOptimization() const override { return true; }
diff --git a/src/Storages/StorageAzureBlobCluster.h b/src/Storages/StorageAzureBlobCluster.h
index 545e568a772..9521ae4d24e 100644
--- a/src/Storages/StorageAzureBlobCluster.h
+++ b/src/Storages/StorageAzureBlobCluster.h
@@ -35,6 +35,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h
index 6c15c7e0238..cd6dd7b933f 100644
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@@ -89,6 +89,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool /*async_insert*/) override;
 
     void startup() override;
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 12c2ad331ad..5d499fb319b 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -712,7 +712,7 @@ static bool requiresObjectColumns(const ColumnsDescription & all_columns, ASTPtr
         auto name_in_storage = Nested::splitName(required_column).first;
         auto column_in_storage = all_columns.tryGetPhysical(name_in_storage);
 
-        if (column_in_storage && column_in_storage->type->hasDynamicSubcolumns())
+        if (column_in_storage && column_in_storage->type->hasDynamicSubcolumnsDeprecated())
             return true;
     }
 
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 3a7e63aef50..85a8de86953 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -85,6 +85,7 @@ public:
     bool supportsFinal() const override { return true; }
     bool supportsPrewhere() const override { return true; }
     bool supportsSubcolumns() const override { return true; }
+    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
     bool supportsDynamicSubcolumns() const override { return true; }
     StoragePolicyPtr getStoragePolicy() const override;
 
diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h
index e9d8f90f755..a07a5600870 100644
--- a/src/Storages/StorageDummy.h
+++ b/src/Storages/StorageDummy.h
@@ -20,6 +20,7 @@ public:
     bool supportsFinal() const override { return true; }
     bool supportsPrewhere() const override { return true; }
     bool supportsSubcolumns() const override { return true; }
+    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
     bool supportsDynamicSubcolumns() const override { return true; }
     bool canMoveConditionsToPrewhere() const override
     {
diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h
index 93c263008a6..566c407a798 100644
--- a/src/Storages/StorageFile.h
+++ b/src/Storages/StorageFile.h
@@ -90,6 +90,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool prefersLargeBlocks() const override;
 
     bool parallelizeOutputAfterReading(ContextPtr context) const override;
diff --git a/src/Storages/StorageFileCluster.h b/src/Storages/StorageFileCluster.h
index 3acbc71ba7e..b8bb3fd5ea1 100644
--- a/src/Storages/StorageFileCluster.h
+++ b/src/Storages/StorageFileCluster.h
@@ -32,6 +32,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp
index 1ac739f03fd..fcd14fb8ec1 100644
--- a/src/Storages/StorageInMemoryMetadata.cpp
+++ b/src/Storages/StorageInMemoryMetadata.cpp
@@ -628,7 +628,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns)
 
         const auto * available_type = it->getMapped();
 
-        if (!available_type->hasDynamicSubcolumns()
+        if (!available_type->hasDynamicSubcolumnsDeprecated()
             && !column.type->equals(*available_type)
             && !isCompatibleEnumTypes(available_type, column.type.get()))
             throw Exception(
@@ -676,7 +676,7 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns,
         const auto * provided_column_type = it->getMapped();
         const auto * available_column_type = jt->getMapped();
 
-        if (!provided_column_type->hasDynamicSubcolumns()
+        if (!provided_column_type->hasDynamicSubcolumnsDeprecated()
             && !provided_column_type->equals(*available_column_type)
             && !isCompatibleEnumTypes(available_column_type, provided_column_type))
             throw Exception(
@@ -720,7 +720,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const
                 listOfColumns(available_columns));
 
         const auto * available_type = it->getMapped();
-        if (!available_type->hasDynamicSubcolumns()
+        if (!available_type->hasDynamicSubcolumnsDeprecated()
             && !column.type->equals(*available_type)
             && !isCompatibleEnumTypes(available_type, column.type.get()))
             throw Exception(
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 549cfca1b6c..7f09236454c 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -252,7 +252,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu
     if (!deserialize_states.contains(name))
     {
         settings.getter = create_stream_getter(true);
-        serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name]);
+        serialization->deserializeBinaryBulkStatePrefix(settings, deserialize_states[name], nullptr);
     }
 
     settings.getter = create_stream_getter(false);
diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h
index 198b7a642ee..0d906a933f7 100644
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@@ -32,6 +32,7 @@ public:
     bool supportsFinal() const override { return getTargetTable()->supportsFinal(); }
     bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); }
     bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); }
+    bool supportsDynamicSubcolumns() const override { return getTargetTable()->supportsDynamicSubcolumns(); }
     bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); }
 
     SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override;
diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h
index 13f1c971d82..ef422a6c872 100644
--- a/src/Storages/StorageMemory.h
+++ b/src/Storages/StorageMemory.h
@@ -58,6 +58,7 @@ public:
 
     bool supportsParallelInsert() const override { return true; }
     bool supportsSubcolumns() const override { return true; }
+    bool supportsDynamicSubcolumnsDeprecated() const override { return true; }
     bool supportsDynamicSubcolumns() const override { return true; }
 
     /// Smaller blocks (e.g. 64K rows) are better for CPU cache.
diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h
index c049d50f3b4..b08bef0a143 100644
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@@ -49,6 +49,7 @@ public:
     bool supportsSampling() const override { return true; }
     bool supportsFinal() const override { return true; }
     bool supportsSubcolumns() const override { return true; }
+    bool supportsDynamicSubcolumns() const override { return true; }
     bool supportsPrewhere() const override { return tableSupportsPrewhere(); }
     std::optional<NameSet> supportedPrewhereColumns() const override;
 
diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h
index f7ee936db8d..74abf931f8f 100644
--- a/src/Storages/StorageNull.h
+++ b/src/Storages/StorageNull.h
@@ -48,6 +48,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     SinkToStoragePtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, bool) override
     {
         return std::make_shared<NullSinkToStorage>(metadata_snapshot->getSampleBlock());
diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h
index d1f15edfd6d..3a20872bbe4 100644
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@@ -385,6 +385,8 @@ private:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsSubsetOfColumns(const ContextPtr & context) const;
 
     bool prefersLargeBlocks() const override;
diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h
index 6a5b03e682f..3ec84b363fb 100644
--- a/src/Storages/StorageS3Cluster.h
+++ b/src/Storages/StorageS3Cluster.h
@@ -32,6 +32,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization() const override { return true; }
 
 protected:
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index 8b087a4a2bc..aada25168f8 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -115,7 +115,7 @@ std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOpt
 {
     const auto & columns = getMetadataForQuery()->getColumns();
     auto column = columns.tryGetColumn(options, column_name);
-    if (column && (!column->type->hasDynamicSubcolumns() || !options.with_extended_objects))
+    if (column && (!column->type->hasDynamicSubcolumnsDeprecated() || !options.with_extended_objects))
         return column;
 
     if (options.with_extended_objects)
diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h
index 842cfd5b627..3fd7a7f097f 100644
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@@ -295,6 +295,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args);
 
     struct Configuration : public StatelessTableEngineConfiguration
diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h
index dce2e0106ea..ad8113517c5 100644
--- a/src/Storages/StorageURLCluster.h
+++ b/src/Storages/StorageURLCluster.h
@@ -35,6 +35,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization() const override { return true; }
 
 private:
diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp
index 26e953c0578..6ea7bdc312d 100644
--- a/src/Storages/getStructureOfRemoteTable.cpp
+++ b/src/Storages/getStructureOfRemoteTable.cpp
@@ -210,7 +210,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
                 auto type_name = type_col[i].get<const String &>();
 
                 auto storage_column = storage_columns.tryGetPhysical(name);
-                if (storage_column && storage_column->type->hasDynamicSubcolumns())
+                if (storage_column && storage_column->type->hasDynamicSubcolumnsDeprecated())
                     res.add(ColumnDescription(std::move(name), DataTypeFactory::instance().get(type_name)));
             }
         }
diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
index b816a20c818..6bbd127d933 100755
--- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
+++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 4 --group_by_two_level_threshold 752249 --group_by_two_level_threshold_bytes 15083870 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 6583861 --max_read_buffer_size 640584 --prefer_localhost_replica 1 --max_block_size 38844 --max_threads 48 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 4 --optimize_aggregation_in_order 0 --aggregation_in_order_max_block_bytes 18284646 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 0 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 50 --filesystem_prefetch_step_bytes 0 --compile_aggregate_expressions 1 --compile_sort_description 0 --merge_tree_coarse_index_granularity 31 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 1 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2640239625 --min_compress_block_size 3114155 --max_compress_block_size 226550 --merge_tree_compact_parts_min_granules_to_multibuffer_read 118 --optimize_sorting_by_input_stream_properties 0 --http_response_buffer_size 543038 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 0 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 8 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.82 "
 
 
 function test()
diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference
new file mode 100644
index 00000000000..d965245266c
--- /dev/null
+++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference
@@ -0,0 +1,55 @@
+JSON
+{"d":"42","dynamicType(d)":"Int64"}
+{"d":42.42,"dynamicType(d)":"Float64"}
+{"d":"str","dynamicType(d)":"String"}
+{"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"}
+{"d":"2020-01-01","dynamicType(d)":"Date"}
+{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"}
+{"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"}
+{"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"}
+{"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"}
+{"d":["1","str",["1","2","3"]],"dynamicType(d)":"Tuple(Int64, String, Array(Int64))"}
+{"d":null,"dynamicType(d)":"None"}
+{"d":true,"dynamicType(d)":"Bool"}
+{"d":"42","dynamicType(d)":"Int64"}
+{"d":"42.42","dynamicType(d)":"String"}
+{"d":"str","dynamicType(d)":"String"}
+{"d":null,"dynamicType(d)":"None"}
+{"d":"1","dynamicType(d)":"Int64"}
+CSV
+42,"Int64"
+42.42,"Float64"
+"str","String"
+"[1,2,3]","Array(Int64)"
+"2020-01-01","Date"
+"2020-01-01 10:00:00.000000000","DateTime64(9)"
+"[1, 'str', [1, 2, 3]]","String"
+\N,"None"
+true,"Bool"
+TSV
+42	Int64
+42.42	Float64
+str	String
+[1,2,3]	Array(Int64)
+2020-01-01	Date
+2020-01-01 10:00:00.000000000	DateTime64(9)
+[1, \'str\', [1, 2, 3]]	String
+\N	None
+true	Bool
+Values
+(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool')
+Cast using parsing
+42	Int64
+42.42	Float64
+[1,2,3]	Array(Int64)
+2020-01-01	Date
+2020-01-01 10:00:00.000000000	DateTime64(9)
+\N	None
+true	Bool
+42	Int64
+42.42	Float64
+[1, 2, 3]	String
+2020-01-01	String
+2020-01-01 10:00:00	String
+\N	None
+true	String
diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.sql b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql
new file mode 100644
index 00000000000..d12d110fe28
--- /dev/null
+++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.sql
@@ -0,0 +1,74 @@
+set allow_experimental_dynamic_type = 1;
+
+select 'JSON';
+select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic', $$
+{"d" : 42}
+{"d" : 42.42}
+{"d" : "str"}
+{"d" : [1, 2, 3]}
+{"d" : "2020-01-01"}
+{"d" : "2020-01-01 10:00:00"}
+{"d" : {"a" : 42, "b" : "str"}}
+{"d" : {"a" : 43}}
+{"d" : {"a" : 44, "c" : [1, 2, 3]}}
+{"d" : [1, "str", [1, 2, 3]]}
+{"d" : null}
+{"d" : true}
+$$) format JSONEachRow;
+
+select d, dynamicType(d) from format(JSONEachRow, 'd Dynamic(max_types=2)', $$
+{"d" : 42}
+{"d" : 42.42}
+{"d" : "str"}
+{"d" : null}
+{"d" : true}
+$$) format JSONEachRow;
+
+select 'CSV';
+select d, dynamicType(d) from format(CSV, 'd Dynamic',
+$$42
+42.42
+"str"
+"[1, 2, 3]"
+"2020-01-01"
+"2020-01-01 10:00:00"
+"[1, 'str', [1, 2, 3]]"
+\N
+true
+$$) format CSV;
+
+select 'TSV';
+select d, dynamicType(d) from format(TSV, 'd Dynamic',
+$$42
+42.42
+str
+[1, 2, 3]
+2020-01-01
+2020-01-01 10:00:00
+[1, 'str', [1, 2, 3]]
+\N
+true
+$$) format TSV;
+
+select 'Values';
+select d, dynamicType(d) from format(Values, 'd Dynamic', $$
+(42)
+(42.42)
+('str')
+([1, 2, 3])
+('2020-01-01')
+('2020-01-01 10:00:00')
+(NULL)
+(true)
+$$) format Values;
+select '';
+
+select 'Cast using parsing';
+drop table if exists test;
+create table test (s String) engine=Memory;
+insert into test values ('42'), ('42.42'), ('[1, 2, 3]'), ('2020-01-01'), ('2020-01-01 10:00:00'), ('NULL'), ('true');
+set cast_string_to_dynamic_use_inference=1;
+select s::Dynamic as d, dynamicType(d) from test;
+select s::Dynamic(max_types=3) as d, dynamicType(d) from test;
+drop table test;
+
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference
new file mode 100644
index 00000000000..af91add9ddd
--- /dev/null
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference
@@ -0,0 +1,63 @@
+0	UInt64
+1	UInt64
+2	UInt64
+0	String
+1	String
+2	String
+0
+1
+2
+0
+1
+2
+1970-01-01
+1970-01-02
+1970-01-03
+0	UInt64
+1	UInt64
+2	UInt64
+0	UInt64
+\N	None
+2	UInt64
+0	UInt64
+str_1	String
+[0,1]	Array(UInt64)
+\N	None
+4	UInt64
+str_5	String
+0	String
+str_1	String
+[0,1]	String
+\N	None
+4	String
+str_5	String
+0	UInt64
+str_1	String
+[0,1]	String
+\N	None
+4	UInt64
+str_5	String
+0	UInt64
+str_1	String
+[0,1]	Array(UInt64)
+\N	None
+4	UInt64
+str_5	String
+0
+1
+2
+0
+1
+2
+0	UInt64
+str_1	String
+[0,1]	String
+\N	None
+4	UInt64
+str_5	String
+0	UInt64
+1970-01-02	Date
+[0,1]	String
+\N	None
+4	UInt64
+1970-01-06	Date
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql
new file mode 100644
index 00000000000..e9b4944f5d8
--- /dev/null
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql
@@ -0,0 +1,24 @@
+set allow_experimental_dynamic_type=1;
+set allow_experimental_variant_type=1;
+set use_variant_as_common_type=1;
+
+select number::Dynamic as d, dynamicType(d) from numbers(3);
+select number::Dynamic(max_types=1) as d, dynamicType(d) from numbers(3);
+select number::Dynamic::UInt64 as v from numbers(3);
+select number::Dynamic::String as v from numbers(3);
+select number::Dynamic::Date as v from numbers(3);
+select number::Dynamic::Array(UInt64) as v from numbers(3); -- {serverError TYPE_MISMATCH}
+select number::Dynamic::Variant(UInt64, String) as v, variantType(v) from numbers(3);
+select (number % 2 ? NULL : number)::Dynamic as d, dynamicType(d) from numbers(3);
+
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=1) as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6);
+
+select number::Dynamic(max_types=2)::Dynamic(max_types=3) as d from numbers(3);
+select number::Dynamic(max_types=2)::Dynamic(max_types=1) as d from numbers(3);
+select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=3)::Dynamic(max_types=2) as d, dynamicType(d) from numbers(6);
+select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6);
+
+
diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.reference b/tests/queries/0_stateless/03035_dynamic_sorting.reference
new file mode 100644
index 00000000000..9b8df11c7a9
--- /dev/null
+++ b/tests/queries/0_stateless/03035_dynamic_sorting.reference
@@ -0,0 +1,299 @@
+order by d1 nulls first
+\N	None
+\N	None
+\N	None
+\N	None
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,4]	Array(Int64)
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+43	Int64
+abc	String
+abc	String
+abc	String
+abc	String
+abc	String
+abd	String
+order by d1 nulls last
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,4]	Array(Int64)
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+43	Int64
+abc	String
+abc	String
+abc	String
+abc	String
+abc	String
+abd	String
+\N	None
+\N	None
+\N	None
+\N	None
+order by d2 nulls first
+\N	None
+\N	None
+\N	None
+\N	None
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,4]	Array(Int64)
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+43	Int64
+abc	String
+abc	String
+abc	String
+abc	String
+abc	String
+abd	String
+order by d2 nulls last
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,3]	Array(Int64)
+[1,2,4]	Array(Int64)
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+42	Int64
+43	Int64
+abc	String
+abc	String
+abc	String
+abc	String
+abc	String
+abd	String
+\N	None
+\N	None
+\N	None
+\N	None
+order by d1, d2 nulls first
+[1,2,3]	\N	Array(Int64)	None
+[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
+[1,2,3]	42	Array(Int64)	Int64
+[1,2,3]	abc	Array(Int64)	String
+[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
+42	\N	Int64	None
+42	[1,2,3]	Int64	Array(Int64)
+42	42	Int64	Int64
+42	43	Int64	Int64
+42	abc	Int64	String
+43	42	Int64	Int64
+abc	\N	String	None
+abc	[1,2,3]	String	Array(Int64)
+abc	42	String	Int64
+abc	abc	String	String
+abc	abd	String	String
+abd	abc	String	String
+\N	\N	None	None
+\N	[1,2,3]	None	Array(Int64)
+\N	42	None	Int64
+\N	abc	None	String
+order by d1, d2 nulls last
+[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
+[1,2,3]	42	Array(Int64)	Int64
+[1,2,3]	abc	Array(Int64)	String
+[1,2,3]	\N	Array(Int64)	None
+[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
+42	[1,2,3]	Int64	Array(Int64)
+42	42	Int64	Int64
+42	43	Int64	Int64
+42	abc	Int64	String
+42	\N	Int64	None
+43	42	Int64	Int64
+abc	[1,2,3]	String	Array(Int64)
+abc	42	String	Int64
+abc	abc	String	String
+abc	abd	String	String
+abc	\N	String	None
+abd	abc	String	String
+\N	[1,2,3]	None	Array(Int64)
+\N	42	None	Int64
+\N	abc	None	String
+\N	\N	None	None
+order by d2, d1 nulls first
+\N	[1,2,3]	None	Array(Int64)
+[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
+[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
+42	[1,2,3]	Int64	Array(Int64)
+abc	[1,2,3]	String	Array(Int64)
+[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
+\N	42	None	Int64
+[1,2,3]	42	Array(Int64)	Int64
+42	42	Int64	Int64
+43	42	Int64	Int64
+abc	42	String	Int64
+42	43	Int64	Int64
+\N	abc	None	String
+[1,2,3]	abc	Array(Int64)	String
+42	abc	Int64	String
+abc	abc	String	String
+abd	abc	String	String
+abc	abd	String	String
+\N	\N	None	None
+[1,2,3]	\N	Array(Int64)	None
+42	\N	Int64	None
+abc	\N	String	None
+order by d2, d1 nulls last
+[1,2,3]	[1,2,3]	Array(Int64)	Array(Int64)
+[1,2,4]	[1,2,3]	Array(Int64)	Array(Int64)
+42	[1,2,3]	Int64	Array(Int64)
+abc	[1,2,3]	String	Array(Int64)
+\N	[1,2,3]	None	Array(Int64)
+[1,2,3]	[1,2,4]	Array(Int64)	Array(Int64)
+[1,2,3]	42	Array(Int64)	Int64
+42	42	Int64	Int64
+43	42	Int64	Int64
+abc	42	String	Int64
+\N	42	None	Int64
+42	43	Int64	Int64
+[1,2,3]	abc	Array(Int64)	String
+42	abc	Int64	String
+abc	abc	String	String
+abd	abc	String	String
+\N	abc	None	String
+abc	abd	String	String
+[1,2,3]	\N	Array(Int64)	None
+42	\N	Int64	None
+abc	\N	String	None
+\N	\N	None	None
+d1 = d2
+[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	0	Array(Int64)	Array(Int64)
+[1,2,3]	42	0	Array(Int64)	Int64
+[1,2,3]	abc	0	Array(Int64)	String
+[1,2,3]	\N	0	Array(Int64)	None
+[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
+42	[1,2,3]	0	Int64	Array(Int64)
+42	42	1	Int64	Int64
+42	43	0	Int64	Int64
+42	abc	0	Int64	String
+42	\N	0	Int64	None
+43	42	0	Int64	Int64
+abc	[1,2,3]	0	String	Array(Int64)
+abc	42	0	String	Int64
+abc	abc	1	String	String
+abc	abd	0	String	String
+abc	\N	0	String	None
+abd	abc	0	String	String
+\N	[1,2,3]	0	None	Array(Int64)
+\N	42	0	None	Int64
+\N	abc	0	None	String
+\N	\N	1	None	None
+d1 < d2
+[1,2,3]	[1,2,3]	0	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
+[1,2,3]	42	1	Array(Int64)	Int64
+[1,2,3]	abc	1	Array(Int64)	String
+[1,2,3]	\N	1	Array(Int64)	None
+[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
+42	[1,2,3]	0	Int64	Array(Int64)
+42	42	0	Int64	Int64
+42	43	1	Int64	Int64
+42	abc	1	Int64	String
+42	\N	1	Int64	None
+43	42	0	Int64	Int64
+abc	[1,2,3]	0	String	Array(Int64)
+abc	42	0	String	Int64
+abc	abc	0	String	String
+abc	abd	1	String	String
+abc	\N	1	String	None
+abd	abc	0	String	String
+\N	[1,2,3]	0	None	Array(Int64)
+\N	42	0	None	Int64
+\N	abc	0	None	String
+\N	\N	0	None	None
+d1 <= d2
+[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
+[1,2,3]	42	1	Array(Int64)	Int64
+[1,2,3]	abc	1	Array(Int64)	String
+[1,2,3]	\N	1	Array(Int64)	None
+[1,2,4]	[1,2,3]	0	Array(Int64)	Array(Int64)
+42	[1,2,3]	0	Int64	Array(Int64)
+42	42	1	Int64	Int64
+42	43	1	Int64	Int64
+42	abc	1	Int64	String
+42	\N	1	Int64	None
+43	42	0	Int64	Int64
+abc	[1,2,3]	0	String	Array(Int64)
+abc	42	0	String	Int64
+abc	abc	1	String	String
+abc	abd	1	String	String
+abc	\N	1	String	None
+abd	abc	0	String	String
+\N	[1,2,3]	0	None	Array(Int64)
+\N	42	0	None	Int64
+\N	abc	0	None	String
+\N	\N	1	None	None
+d1 > d2
+[1,2,3]	[1,2,3]	0	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	0	Array(Int64)	Array(Int64)
+[1,2,3]	42	0	Array(Int64)	Int64
+[1,2,3]	abc	0	Array(Int64)	String
+[1,2,3]	\N	0	Array(Int64)	None
+[1,2,4]	[1,2,3]	1	Array(Int64)	Array(Int64)
+42	[1,2,3]	1	Int64	Array(Int64)
+42	42	0	Int64	Int64
+42	43	0	Int64	Int64
+42	abc	0	Int64	String
+42	\N	0	Int64	None
+43	42	1	Int64	Int64
+abc	[1,2,3]	1	String	Array(Int64)
+abc	42	1	String	Int64
+abc	abc	0	String	String
+abc	abd	0	String	String
+abc	\N	0	String	None
+abd	abc	1	String	String
+\N	[1,2,3]	1	None	Array(Int64)
+\N	42	1	None	Int64
+\N	abc	1	None	String
+\N	\N	0	None	None
+d1 >= d2
+[1,2,3]	[1,2,3]	1	Array(Int64)	Array(Int64)
+[1,2,3]	[1,2,4]	1	Array(Int64)	Array(Int64)
+[1,2,3]	42	1	Array(Int64)	Int64
+[1,2,3]	abc	1	Array(Int64)	String
+[1,2,3]	\N	1	Array(Int64)	None
+[1,2,4]	[1,2,3]	1	Array(Int64)	Array(Int64)
+42	[1,2,3]	1	Int64	Array(Int64)
+42	42	1	Int64	Int64
+42	43	1	Int64	Int64
+42	abc	1	Int64	String
+42	\N	1	Int64	None
+43	42	1	Int64	Int64
+abc	[1,2,3]	1	String	Array(Int64)
+abc	42	1	String	Int64
+abc	abc	1	String	String
+abc	abd	1	String	String
+abc	\N	1	String	None
+abd	abc	1	String	String
+\N	[1,2,3]	1	None	Array(Int64)
+\N	42	1	None	Int64
+\N	abc	1	None	String
+\N	\N	1	None	None
diff --git a/tests/queries/0_stateless/03035_dynamic_sorting.sql b/tests/queries/0_stateless/03035_dynamic_sorting.sql
new file mode 100644
index 00000000000..0487fafc955
--- /dev/null
+++ b/tests/queries/0_stateless/03035_dynamic_sorting.sql
@@ -0,0 +1,80 @@
+set allow_experimental_dynamic_type = 1;
+
+drop table if exists test;
+create table test (d1 Dynamic, d2 Dynamic) engine=Memory;
+
+insert into test values (42, 42);
+insert into test values (42, 43);
+insert into test values (43, 42);
+
+insert into test values ('abc', 'abc');
+insert into test values ('abc', 'abd');
+insert into test values ('abd', 'abc');
+
+insert into test values ([1,2,3], [1,2,3]);
+insert into test values ([1,2,3], [1,2,4]);
+insert into test values ([1,2,4], [1,2,3]);
+
+insert into test values (NULL, NULL);
+
+insert into test values (42, 'abc');
+insert into test values ('abc', 42);
+
+insert into test values (42, [1,2,3]);
+insert into test values ([1,2,3], 42);
+
+insert into test values (42, NULL);
+insert into test values (NULL, 42);
+
+insert into test values ('abc', [1,2,3]);
+insert into test values ([1,2,3], 'abc');
+
+insert into test values ('abc', NULL);
+insert into test values (NULL, 'abc');
+
+insert into test values ([1,2,3], NULL);
+insert into test values (NULL, [1,2,3]);
+
+
+select 'order by d1 nulls first';
+select d1, dynamicType(d1) from test order by d1 nulls first;
+
+select 'order by d1 nulls last';
+select d1, dynamicType(d1) from test order by d1 nulls last;
+
+select 'order by d2 nulls first';
+select d2, dynamicType(d2) from test order by d2 nulls first;
+
+select 'order by d2 nulls last';
+select d2, dynamicType(d2) from test order by d2 nulls last;
+
+
+select 'order by d1, d2 nulls first';
+select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls first;
+
+select 'order by d1, d2 nulls last';
+select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2 nulls last;
+
+select 'order by d2, d1 nulls first';
+select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls first;
+
+select 'order by d2, d1 nulls last';
+select d1, d2, dynamicType(d1), dynamicType(d2) from test order by d2, d1 nulls last;
+
+select 'd1 = d2';
+select d1, d2, d1 = d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+
+select 'd1 < d2';
+select d1, d2, d1 < d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+
+select 'd1 <= d2';
+select d1, d2, d1 <= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+
+select 'd1 > d2';
+select d1, d2, d1 > d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+
+select 'd1 >= d2';
+select d1, d2, d2 >= d2, dynamicType(d1), dynamicType(d2) from test order by d1, d2;
+
+drop table test;
+
diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference
new file mode 100644
index 00000000000..36984bc8b9b
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference
@@ -0,0 +1,57 @@
+Memory
+test
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+None
+String
+UInt64
+200000
+200000
+200000
+200000
+0
+0
+200000
+200000
+100000
+100000
+200000
+0
+MergeTree compact
+test
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+None
+String
+UInt64
+200000
+200000
+200000
+200000
+0
+0
+200000
+200000
+100000
+100000
+200000
+0
+MergeTree wide
+test
+Array(Array(Dynamic))
+Array(Variant(String, UInt64))
+None
+String
+UInt64
+200000
+200000
+200000
+200000
+0
+0
+200000
+200000
+100000
+100000
+200000
+0
diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh
new file mode 100755
index 00000000000..65517061b99
--- /dev/null
+++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000"
+    $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000"
+    $CH_CLIENT -q "insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000"
+    $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000"
+    $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000"
+
+    $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type"
+    $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'"
+    $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL"
+    $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'"
+    $CH_CLIENT -q "select count() from test where d.String is not NULL"
+    $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'"
+    $CH_CLIENT -q "select count() from test where d.Date is not NULL"
+    $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'"
+    $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)"
+    $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'"
+    $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)"
+    $CH_CLIENT -q "select count() from test where d is NULL"
+    $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)"
+
+    $CH_CLIENT -q "select d, d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null"
+    $CH_CLIENT -q "select d.UInt64, d.String, d.\`Array(Variant(String, UInt64))\` from test format Null"
+    $CH_CLIENT -q "select d.Int8, d.Date, d.\`Array(String)\` from test format Null"
+    $CH_CLIENT -q "select d, d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null"
+    $CH_CLIENT -q "select d.UInt64, d.Date, d.\`Array(Variant(String, UInt64))\`, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64, d.\`Array(Variant(String, UInt64))\`.String from test format Null"
+    $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null"
+    $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null"
+    $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "Memory"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree compact"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+test
+$CH_CLIENT -q "drop table test;"
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference
new file mode 100644
index 00000000000..fff812f0396
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1.reference
@@ -0,0 +1,120 @@
+MergeTree compact + horizontal merge
+test1
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	UInt64
+100000	None
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
+MergeTree wide + horizontal merge
+test1
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	UInt64
+100000	None
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	UInt64
+100000	None
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
+MergeTree compact + vertical merge
+test1
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	UInt64
+100000	None
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	UInt64
+100000	None
+200000	Map(UInt64, UInt64)
+270000	String
+MergeTree wide + vertical merge
+test1
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1.sh
new file mode 100755
index 00000000000..cf524fb9393
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "test"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
+    $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
+    $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
+    $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
+    $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree compact + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh
new file mode 100755
index 00000000000..e9d571c2104
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "test"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(1000000)"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)"
+    $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)"
+
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree compact + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference
new file mode 100644
index 00000000000..f8118ce8b95
--- /dev/null
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference
@@ -0,0 +1,92 @@
+MergeTree compact + horizontal merge
+test
+16667	Tuple(a Dynamic(max_types=3)):Date
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+50000	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+16667	Tuple(a Dynamic(max_types=3)):DateTime
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
+133333	Tuple(a Dynamic(max_types=3)):None
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
+116667	Tuple(a Dynamic(max_types=3)):String
+133333	Tuple(a Dynamic(max_types=3)):None
+MergeTree wide + horizontal merge
+test
+16667	Tuple(a Dynamic(max_types=3)):Date
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+50000	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+16667	Tuple(a Dynamic(max_types=3)):DateTime
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
+133333	Tuple(a Dynamic(max_types=3)):None
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	UInt64:None
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+116667	Tuple(a Dynamic(max_types=3)):String
+133333	Tuple(a Dynamic(max_types=3)):None
+MergeTree compact + vertical merge
+test
+16667	Tuple(a Dynamic(max_types=3)):Date
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):String
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	UInt64:None
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+16667	Tuple(a Dynamic(max_types=3)):DateTime
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+133333	Tuple(a Dynamic(max_types=3)):None
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
+116667	Tuple(a Dynamic(max_types=3)):String
+133333	Tuple(a Dynamic(max_types=3)):None
+MergeTree wide + vertical merge
+test
+16667	Tuple(a Dynamic(max_types=3)):Date
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):String
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	UInt64:None
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	UInt64:None
+16667	Tuple(a Dynamic(max_types=3)):DateTime
+33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+66667	Tuple(a Dynamic(max_types=3)):String
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
+133333	Tuple(a Dynamic(max_types=3)):None
+50000	Tuple(a Dynamic(max_types=3)):UInt64
+100000	UInt64:None
+100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+116667	Tuple(a Dynamic(max_types=3)):String
+133333	Tuple(a Dynamic(max_types=3)):None
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh
new file mode 100755
index 00000000000..afb167ec20d
--- /dev/null
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "test"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+
+    $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
+    $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + horizontal merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree compact + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide + vertical merge"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
+test
+$CH_CLIENT -q "drop table test;"
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
new file mode 100644
index 00000000000..a7fbbabcd46
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
@@ -0,0 +1,88 @@
+MergeTree compact + horizontal merge
+ReplacingMergeTree
+100000	UInt64
+100000	String
+50000	UInt64
+100000	String
+SummingMergeTree
+100000	UInt64
+100000	String
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+AggregatingMergeTree
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+MergeTree wide + horizontal merge
+ReplacingMergeTree
+100000	UInt64
+100000	String
+50000	UInt64
+100000	String
+SummingMergeTree
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+AggregatingMergeTree
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+MergeTree compact + vertical merge
+ReplacingMergeTree
+100000	String
+100000	UInt64
+50000	UInt64
+100000	String
+SummingMergeTree
+100000	UInt64
+100000	String
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+AggregatingMergeTree
+100000	UInt64
+100000	String
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+MergeTree wide + vertical merge
+ReplacingMergeTree
+100000	UInt64
+100000	String
+50000	UInt64
+100000	String
+SummingMergeTree
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
+AggregatingMergeTree
+100000	UInt64
+100000	String
+200000	1
+50000	String
+100000	UInt64
+50000	2
+100000	1
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
new file mode 100755
index 00000000000..3384a135307
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "ReplacingMergeTree"
+    $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "drop table test"
+
+    echo "SummingMergeTree"
+    $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), sum from test group by sum"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), sum from test group by sum"
+    $CH_CLIENT -q "drop table test"
+
+    echo "AggregatingMergeTree"
+    $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number"
+    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
new file mode 100644
index 00000000000..03c8b4564fa
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
@@ -0,0 +1,44 @@
+MergeTree compact + horizontal merge
+CollapsingMergeTree
+100000	String
+100000	UInt64
+50000	UInt64
+50000	String
+VersionedCollapsingMergeTree
+100000	String
+100000	UInt64
+75000	String
+75000	UInt64
+MergeTree wide + horizontal merge
+CollapsingMergeTree
+100000	UInt64
+100000	String
+50000	String
+50000	UInt64
+VersionedCollapsingMergeTree
+100000	UInt64
+100000	String
+75000	String
+75000	UInt64
+MergeTree compact + vertical merge
+CollapsingMergeTree
+100000	UInt64
+100000	String
+50000	UInt64
+50000	String
+VersionedCollapsingMergeTree
+100000	UInt64
+100000	String
+75000	UInt64
+75000	String
+MergeTree wide + vertical merge
+CollapsingMergeTree
+100000	UInt64
+100000	String
+50000	String
+50000	UInt64
+VersionedCollapsingMergeTree
+100000	UInt64
+100000	String
+75000	UInt64
+75000	String
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
new file mode 100755
index 00000000000..5dae9228d0a
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+
+
+function test()
+{
+    echo "CollapsingMergeTree"
+    $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "drop table test"
+
+    echo "VersionedCollapsingMergeTree"
+    $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.reference b/tests/queries/0_stateless/03040_dynamic_type_alters.reference
new file mode 100644
index 00000000000..ca98ec0963c
--- /dev/null
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters.reference
@@ -0,0 +1,526 @@
+Memory
+initial insert
+alter add column 1
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+alter modify column 1
+7	None
+8	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+insert after alter modify column 1
+8	None
+11	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+alter modify column 2
+4	UInt64
+7	String
+8	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+insert after alter modify column 2
+1	Date
+5	UInt64
+8	String
+9	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+19	19	\N	\N	\N	\N	\N
+20	20	20	\N	20	\N	\N
+21	21	str_21	str_21	\N	\N	\N
+22	22	1970-01-23	\N	\N	1970-01-23	\N
+alter modify column 3
+1	Date
+5	UInt64
+8	String
+9	None
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	\N	3	\N	\N
+4	4	4	\N	\N	\N	4	\N	\N
+5	5	5	\N	\N	\N	5	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	\N	12	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+insert after alter modify column 3
+1	Date
+5	UInt64
+8	String
+12	None
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	\N	3	\N	\N
+4	4	4	\N	\N	\N	4	\N	\N
+5	5	5	\N	\N	\N	5	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	\N	12	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+23	\N	\N	\N	\N	\N	\N	\N	\N
+24	24	24	\N	\N	\N	\N	\N	\N
+25	str_25	\N	str_25	\N	\N	\N	\N	\N
+MergeTree compact
+initial insert
+alter add column 1
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+alter modify column 1
+7	None
+8	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+insert after alter modify column 1
+8	None
+11	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+alter modify column 2
+8	None
+11	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+insert after alter modify column 2
+1	Date
+1	UInt64
+9	None
+12	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+19	19	\N	\N	\N	\N	\N
+20	20	20	\N	20	\N	\N
+21	21	str_21	str_21	\N	\N	\N
+22	22	1970-01-23	\N	\N	1970-01-23	\N
+alter modify column 3
+1	Date
+1	UInt64
+9	None
+12	String
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	3	\N	\N	\N
+4	4	4	\N	\N	4	\N	\N	\N
+5	5	5	\N	\N	5	\N	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	12	\N	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+insert after alter modify column 3
+1	Date
+1	UInt64
+12	None
+12	String
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	3	\N	\N	\N
+4	4	4	\N	\N	4	\N	\N	\N
+5	5	5	\N	\N	5	\N	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	12	\N	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+23	\N	\N	\N	\N	\N	\N	\N	\N
+24	24	24	\N	\N	\N	\N	\N	\N
+25	str_25	\N	str_25	\N	\N	\N	\N	\N
+MergeTree wide
+initial insert
+alter add column 1
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+alter modify column 1
+7	None
+8	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+insert after alter modify column 1
+8	None
+11	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+alter modify column 2
+8	None
+11	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+insert after alter modify column 2
+1	Date
+1	UInt64
+9	None
+12	String
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	3	\N	\N	\N
+4	4	4	4	\N	\N	\N
+5	5	5	5	\N	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	12	\N	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+15	15	\N	\N	\N	\N	\N
+16	16	16	16	\N	\N	\N
+17	17	str_17	str_17	\N	\N	\N
+18	18	1970-01-19	1970-01-19	\N	\N	\N
+19	19	\N	\N	\N	\N	\N
+20	20	20	\N	20	\N	\N
+21	21	str_21	str_21	\N	\N	\N
+22	22	1970-01-23	\N	\N	1970-01-23	\N
+alter modify column 3
+1	Date
+1	UInt64
+9	None
+12	String
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	3	\N	\N	\N
+4	4	4	\N	\N	4	\N	\N	\N
+5	5	5	\N	\N	5	\N	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	12	\N	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+insert after alter modify column 3
+1	Date
+1	UInt64
+12	None
+12	String
+0	0	0	\N	\N	\N	\N	\N	\N
+1	1	1	\N	\N	\N	\N	\N	\N
+2	2	2	\N	\N	\N	\N	\N	\N
+3	3	3	\N	\N	3	\N	\N	\N
+4	4	4	\N	\N	4	\N	\N	\N
+5	5	5	\N	\N	5	\N	\N	\N
+6	6	6	\N	\N	str_6	\N	\N	\N
+7	7	7	\N	\N	str_7	\N	\N	\N
+8	8	8	\N	\N	str_8	\N	\N	\N
+9	9	9	\N	\N	\N	\N	\N	\N
+10	10	10	\N	\N	\N	\N	\N	\N
+11	11	11	\N	\N	\N	\N	\N	\N
+12	12	12	\N	\N	12	\N	\N	\N
+13	13	13	\N	\N	str_13	\N	\N	\N
+14	14	14	\N	\N	\N	\N	\N	\N
+15	15	15	\N	\N	\N	\N	\N	\N
+16	16	16	\N	\N	16	\N	\N	\N
+17	17	17	\N	\N	str_17	\N	\N	\N
+18	18	18	\N	\N	1970-01-19	\N	\N	\N
+19	19	19	\N	\N	\N	\N	\N	\N
+20	20	20	\N	\N	\N	20	\N	\N
+21	21	21	\N	\N	str_21	\N	\N	\N
+22	22	22	\N	\N	\N	\N	1970-01-23	\N
+23	\N	\N	\N	\N	\N	\N	\N	\N
+24	24	24	\N	\N	\N	\N	\N	\N
+25	str_25	\N	str_25	\N	\N	\N	\N	\N
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters.sh
new file mode 100755
index 00000000000..a20a92712e0
--- /dev/null
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --stacktrace --max_insert_threads 3 --group_by_two_level_threshold 1000000 --group_by_two_level_threshold_bytes 42526602 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 8125230 --max_read_buffer_size 859505 --prefer_localhost_replica 1 --max_block_size 34577 --max_threads 41 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 1 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 99 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 27635208 --use_uncompressed_cache 0 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 6451111320 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 50 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 0 --remote_filesystem_read_prefetch 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 64Mi --filesystem_prefetches_limit 10 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 1 --merge_tree_coarse_index_granularity 32 --optimize_distinct_in_order 0 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 10737418240 --max_bytes_before_remerge_sort 1374192967 --min_compress_block_size 2152247 --max_compress_block_size 1830907 --merge_tree_compact_parts_min_granules_to_multibuffer_read 79 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 106072 --http_wait_end_of_query True --enable_memory_bound_merging_of_aggregation_results 0 --min_count_to_compile_expression 0 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 3 --session_timezone Africa/Khartoum --prefer_warmed_unmerged_parts_seconds 4 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.03 --ratio_of_defaults_for_sparse_serialization 0.9779014012142565 --prefer_fetch_merged_part_size_threshold 4254002758 --vertical_merge_algorithm_min_rows_to_activate 1 --vertical_merge_algorithm_min_columns_to_activate 1 --allow_vertical_merges_from_compact_to_wide_parts 1 --min_merge_bytes_to_use_direct_io 1 --index_granularity_bytes 4982992 --merge_max_block_size 16662 --index_granularity 22872 --min_bytes_for_wide_part 1073741824 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 86328 --primary_key_compress_block_size 64101 --replace_long_file_name_to_hash 0 --max_file_name_length 81 --min_bytes_for_full_part_storage 536870912 --compact_parts_max_bytes_to_buffer 480908080 --compact_parts_max_granules_to_buffer 1 --compact_parts_merge_max_bytes_to_prefetch_part 4535313 --cache_populated_by_fetch 0"
+
+function run()
+{
+    echo "initial insert"
+    $CH_CLIENT -q "insert into test select number, number from numbers(3)"
+
+    echo "alter add column 1"
+    $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter add column 1"
+    $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)"
+    $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)"
+    $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)"
+    $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "alter modify column 1"
+    $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter modify column 1"
+    $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "alter modify column 2"
+    $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter modify column 2"
+    $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "alter modify column 3"
+    $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter modify column 3"
+    $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "Memory"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory"
+run
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree compact"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;"
+run
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+run
+$CH_CLIENT -q "drop table test;"

From 18e4c0f1da79fc458707c5557b9e611a1fe916bd Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 26 Apr 2024 13:35:18 +0200
Subject: [PATCH 083/392] Fix remaining integration test

---
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp  |  4 ++--
 src/IO/S3/getObjectInfo.cpp                      |  2 +-
 .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp    |  1 -
 .../ObjectStorage/ReadBufferIterator.cpp         |  4 ++--
 .../ObjectStorage/StorageObjectStorageSource.cpp | 16 +++++++++++-----
 .../ObjectStorage/StorageObjectStorageSource.h   |  7 ++-----
 6 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index a2522212f90..507e9dbafcb 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -447,7 +447,7 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
-    result.last_modified = object_info.last_modification_time;
+    result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
     result.attributes = object_info.metadata;
 
     return result;
@@ -462,7 +462,7 @@ ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) cons
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
-    result.last_modified = object_info.last_modification_time;
+    result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
     result.attributes = object_info.metadata;
 
     return result;
diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp
index 88f79f8d8d5..c294e7905bd 100644
--- a/src/IO/S3/getObjectInfo.cpp
+++ b/src/IO/S3/getObjectInfo.cpp
@@ -53,7 +53,7 @@ namespace
         const auto & result = outcome.GetResult();
         ObjectInfo object_info;
         object_info.size = static_cast<size_t>(result.GetContentLength());
-        object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
+        object_info.last_modification_time = result.GetLastModified().Seconds();
 
         if (with_metadata)
             object_info.metadata = result.GetMetadata();
diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
index eeb553e0d62..b37b9de746b 100644
--- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
@@ -116,7 +116,6 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
         }
         // if (file_size != 0 && file_offset >= file_size)
         // {
-        //     LOG_TEST(log, "KSSENII 1 2");
         //     return false;
         // }
 
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index f8ce90a2b1f..9c1d3f79c2b 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -67,11 +67,11 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
         auto get_last_mod_time = [&] -> std::optional<time_t>
         {
             if (object_info->metadata)
-                return object_info->metadata->last_modified.epochMicroseconds();
+                return object_info->metadata->last_modified.epochTime();
             else
             {
                 object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-                return object_info->metadata->last_modified.epochMicroseconds();
+                return object_info->metadata->last_modified.epochTime();
             }
         };
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 3101a7ebf51..4551c2df7c3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -76,6 +76,11 @@ StorageObjectStorageSource::~StorageObjectStorageSource()
     create_reader_pool->wait();
 }
 
+void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_)
+{
+    setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header);
+}
+
 std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
     ConfigurationPtr configuration,
     ObjectStoragePtr object_storage,
@@ -213,9 +218,11 @@ std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const O
 
     auto get_last_mod_time = [&]() -> std::optional<time_t>
     {
-        return object_info->metadata
-            ? object_info->metadata->last_modified.epochMicroseconds()
-            : 0;
+        if (object_info->metadata)
+        {
+            return object_info->metadata->last_modified.epochTime();
+        }
+        return std::nullopt;
     };
     return schema_cache.tryGetNumRows(cache_key, get_last_mod_time);
 }
@@ -260,7 +267,6 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         const auto max_parsing_threads = need_only_count ? std::optional<size_t>(1) : std::nullopt;
         read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes);
 
-        LOG_TEST(&Poco::Logger::get("KSSENII"), "KSSENII HEADER: {}", read_from_format_info.format_header.dumpStructure());
         auto input_format = FormatFactory::instance().getInput(
             configuration->format, *read_buf, read_from_format_info.format_header,
             getContext(), max_block_size, format_settings, max_parsing_threads,
@@ -354,7 +360,7 @@ ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor)
 
     if (object_info)
     {
-        LOG_TEST(&Poco::Logger::get("KeysIterator"), "Next key: {}", object_info->relative_path);
+        LOG_TEST(logger, "Next key: {}", object_info->relative_path);
     }
 
     return object_info;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 3c2cc3f80cd..0afbf77db2b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -38,10 +38,7 @@ public:
 
     String getName() const override { return name; }
 
-    void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override
-    {
-        setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header);
-    }
+    void setKeyCondition(const ActionsDAGPtr & filter_actions_dag, ContextPtr context_) override;
 
     Chunk generate() override;
 
@@ -65,11 +62,11 @@ protected:
     const bool need_only_count;
     const ReadFromFormatInfo read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
+
     ColumnsDescription columns_desc;
     std::shared_ptr<IIterator> file_iterator;
     SchemaCache & schema_cache;
     bool initialized = false;
-
     size_t total_rows_in_file = 0;
     LoggerPtr log = getLogger("StorageObjectStorageSource");
 

From a4ed164074fcd96fc198000722563da70f6a31bf Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 26 Apr 2024 13:38:38 +0200
Subject: [PATCH 084/392] Fix clang tidy

---
 src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp | 2 +-
 src/Storages/ObjectStorage/StorageObjectStorage.cpp        | 2 +-
 src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index c6590ba8d43..571e14325bb 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -136,7 +136,7 @@ struct DeltaLakeMetadata::Impl
      *             \"nullCount\":{\"col-6c990940-59bb-4709-8f2e-17083a82c01a\":0,\"col-763cd7e2-7627-4d8e-9fb7-9e85d0c8845b\":0}}"}}
      * "
      */
-    void processMetadataFile(const String & key, std::set<String> & result)
+    void processMetadataFile(const String & key, std::set<String> & result) const
     {
         auto read_settings = context->getReadSettings();
         auto buf = object_storage->readObject(StoredObject(key), read_settings);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index f5bfb9d2a65..c5565d8b0e8 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -61,7 +61,7 @@ StorageObjectStorage::StorageObjectStorage(
         objects.emplace_back(key);
 
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
-    setInMemoryMetadata(std::move(metadata));
+    setInMemoryMetadata(metadata);
 }
 
 String StorageObjectStorage::getName() const
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index f98fc32a3cc..1a1df399626 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -47,7 +47,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
     metadata.setConstraints(constraints_);
 
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
-    setInMemoryMetadata(std::move(metadata));
+    setInMemoryMetadata(metadata);
 }
 
 std::string StorageObjectStorageCluster::getName() const

From 434d2d16f1056977dd80f47d0b687151ac9d16f2 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 26 Apr 2024 16:34:12 +0200
Subject: [PATCH 085/392] Cleanuo

---
 src/Backups/BackupIO_AzureBlobStorage.cpp     |   4 +-
 src/Backups/BackupIO_AzureBlobStorage.h       |  10 +-
 .../registerBackupEngineAzureBlobStorage.cpp  |   4 +-
 src/CMakeLists.txt                            |   4 +-
 src/Core/Settings.h                           |   4 +
 src/Core/SettingsChangesHistory.h             |   4 +
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |  78 +++----
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |  17 +-
 .../ObjectStorages/ObjectStorageFactory.cpp   |   3 +-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  18 --
 src/Disks/ObjectStorages/S3/diskSettings.cpp  |  10 +-
 src/Interpreters/InterpreterSystemQuery.cpp   |   4 +-
 .../{AzureBlob => Azure}/Configuration.cpp    |  33 +--
 .../{AzureBlob => Azure}/Configuration.h      |  16 +-
 .../ObjectStorage/DataLakes/Common.cpp        |   4 +-
 src/Storages/ObjectStorage/DataLakes/Common.h |   4 +-
 .../DataLakes/DeltaLakeMetadata.cpp           |  12 +-
 .../DataLakes/DeltaLakeMetadata.h             |   5 +-
 .../ObjectStorage/DataLakes/HudiMetadata.h    |   4 +-
 .../DataLakes/IStorageDataLake.h              |   2 +-
 .../DataLakes/IcebergMetadata.cpp             |   6 +-
 .../ObjectStorage/DataLakes/IcebergMetadata.h |   4 +-
 .../DataLakes/registerDataLakeStorages.cpp    |   6 +-
 .../ObjectStorage/HDFS/Configuration.cpp      |  32 +--
 .../ObjectStorage/HDFS/Configuration.h        |  12 +-
 .../ObjectStorage/HDFS/ReadBufferFromHDFS.cpp |   8 +-
 .../ObjectStorage/ReadBufferIterator.cpp      |  53 ++---
 .../ObjectStorage/ReadBufferIterator.h        |   8 +-
 .../ReadFromObjectStorageStep.cpp             |  87 -------
 .../ObjectStorage/ReadFromObjectStorageStep.h |  55 -----
 .../ObjectStorage/S3/Configuration.cpp        |  21 +-
 src/Storages/ObjectStorage/S3/Configuration.h |  11 +-
 .../ObjectStorage/StorageObjectStorage.cpp    | 213 ++++++++++++++++--
 .../ObjectStorage/StorageObjectStorage.h      |  62 ++++-
 .../StorageObjectStorageCluster.cpp           |  20 +-
 .../StorageObjectStorageCluster.h             |  15 +-
 .../StorageObjectStorageConfiguration.cpp     |  74 ------
 .../StorageObjectStorageConfiguration.h       |  75 ------
 .../StorageObjectStorageSink.cpp              |   7 +-
 .../ObjectStorage/StorageObjectStorageSink.h  |  16 +-
 .../StorageObjectStorageSource.cpp            |  23 +-
 .../StorageObjectStorageSource.h              |   7 +-
 .../StorageObjectStorage_fwd_internal.h       |  12 -
 src/Storages/ObjectStorage/Utils.cpp          |   7 +-
 src/Storages/ObjectStorage/Utils.h            |   6 +-
 .../registerStorageObjectStorage.cpp          |  22 +-
 src/Storages/S3Queue/S3QueueTableMetadata.cpp |   3 +-
 src/Storages/S3Queue/S3QueueTableMetadata.h   |   4 +-
 src/Storages/S3Queue/StorageS3Queue.cpp       |   2 +-
 .../StorageSystemSchemaInferenceCache.cpp     |   4 +-
 src/TableFunctions/ITableFunctionDataLake.h   |   2 +-
 .../TableFunctionObjectStorage.cpp            |  73 ++----
 .../TableFunctionObjectStorage.h              |  33 ++-
 .../TableFunctionObjectStorageCluster.cpp     |   4 +-
 .../TableFunctionObjectStorageCluster.h       |   4 +-
 src/TableFunctions/registerTableFunctions.cpp |  12 -
 .../configs/inf_s3_retries.xml                |   1 +
 .../configs/s3_retries.xml                    |   1 +
 58 files changed, 555 insertions(+), 690 deletions(-)
 rename src/Storages/ObjectStorage/{AzureBlob => Azure}/Configuration.cpp (93%)
 rename src/Storages/ObjectStorage/{AzureBlob => Azure}/Configuration.h (78%)
 delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
 delete mode 100644 src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
 delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
 delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
 delete mode 100644 src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index 673930b5976..f00da686c18 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -28,7 +28,7 @@ namespace ErrorCodes
 }
 
 BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
-    const StorageAzureBlobConfiguration & configuration_,
+    const StorageAzureConfiguration & configuration_,
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_)
@@ -112,7 +112,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
 
 
 BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
-    const StorageAzureBlobConfiguration & configuration_,
+    const StorageAzureConfiguration & configuration_,
     const ReadSettings & read_settings_,
     const WriteSettings & write_settings_,
     const ContextPtr & context_,
diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h
index 25c52f9b0d3..4643c103fd5 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.h
+++ b/src/Backups/BackupIO_AzureBlobStorage.h
@@ -6,7 +6,7 @@
 #include <Backups/BackupIO_Default.h>
 #include <Disks/DiskType.h>
 #include <Interpreters/Context_fwd.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 
 
 namespace DB
@@ -17,7 +17,7 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault
 {
 public:
     BackupReaderAzureBlobStorage(
-        const StorageAzureBlobConfiguration & configuration_,
+        const StorageAzureConfiguration & configuration_,
         const ReadSettings & read_settings_,
         const WriteSettings & write_settings_,
         const ContextPtr & context_);
@@ -39,7 +39,7 @@ public:
 private:
     const DataSourceDescription data_source_description;
     std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
-    StorageAzureBlobConfiguration configuration;
+    StorageAzureConfiguration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
@@ -48,7 +48,7 @@ class BackupWriterAzureBlobStorage : public BackupWriterDefault
 {
 public:
     BackupWriterAzureBlobStorage(
-        const StorageAzureBlobConfiguration & configuration_,
+        const StorageAzureConfiguration & configuration_,
         const ReadSettings & read_settings_,
         const WriteSettings & write_settings_,
         const ContextPtr & context_,
@@ -85,7 +85,7 @@ private:
 
     const DataSourceDescription data_source_description;
     std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
-    StorageAzureBlobConfiguration configuration;
+    StorageAzureConfiguration configuration;
     std::unique_ptr<AzureObjectStorage> object_storage;
     std::shared_ptr<const AzureObjectStorageSettings> settings;
 };
diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
index 049a4b1a338..1e3b3759257 100644
--- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp
+++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp
@@ -9,7 +9,7 @@
 #include <IO/Archives/hasRegisteredArchiveFileExtension.h>
 #include <Interpreters/Context.h>
 #include <Poco/Util/AbstractConfiguration.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 #include <filesystem>
 #endif
 
@@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
         const String & id_arg = params.backup_info.id_arg;
         const auto & args = params.backup_info.args;
 
-        StorageAzureBlobConfiguration configuration;
+        StorageAzureConfiguration configuration;
 
         if (!id_arg.empty())
         {
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index c26c40d4b87..d5d17f992dc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -103,7 +103,6 @@ add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhous
 
 add_headers_and_sources(dbms Disks/IO)
 add_headers_and_sources(dbms Disks/ObjectStorages)
-add_headers_and_sources(dbms Disks/ObjectStorages)
 if (TARGET ch_contrib::sqlite)
     add_headers_and_sources(dbms Databases/SQLite)
 endif()
@@ -117,7 +116,7 @@ if (TARGET ch_contrib::nats_io)
 endif()
 
 add_headers_and_sources(dbms Storages/ObjectStorage)
-add_headers_and_sources(dbms Storages/ObjectStorage/AzureBlob)
+add_headers_and_sources(dbms Storages/ObjectStorage/Azure)
 add_headers_and_sources(dbms Storages/ObjectStorage/S3)
 add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
 add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
@@ -148,7 +147,6 @@ if (TARGET ch_contrib::azure_sdk)
 endif()
 
 if (TARGET ch_contrib::hdfs)
-    add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
     add_headers_and_sources(dbms Disks/ObjectStorages/HDFS)
 endif()
 
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ff7a9089327..bf558d7b1ba 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -113,9 +113,12 @@ class IColumn;
     M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
     M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
+    M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
+    M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \
     M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \
     M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \
+    M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
     M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
     M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
@@ -128,6 +131,7 @@ class IColumn;
     M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
     M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
     M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \
+    M(Bool, azure_skip_empty_files, false, "Allow to skip empty files in azure table engine", 0) \
     M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
     M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
     M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index cfe3c290d83..4954fa5d996 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -90,6 +90,10 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
               {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
               {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},
+              {"hdfs_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"},
+              {"azure_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"},
+              {"s3_validate_request_settings", true, true, "Validate S3 request settings"},
+              {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
     }},
     {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"},
               {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"},
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index ed63795cb05..6c2f310a7d1 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -23,15 +23,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-void HDFSObjectStorage::shutdown()
-{
-}
-
-void HDFSObjectStorage::startup()
-{
-}
-
-void HDFSObjectStorage::initializeHDFS() const
+void HDFSObjectStorage::initializeHDFSFS() const
 {
     if (initialized)
         return;
@@ -45,9 +37,25 @@ void HDFSObjectStorage::initializeHDFS() const
     initialized = true;
 }
 
+std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & object) const
+{
+    /// This is very unfortunate, but for disk HDFS we made a mistake
+    /// and now its behaviour is inconsistent with S3 and Azure disks.
+    /// The mistake is that for HDFS we write into metadata files whole URL + data directory + key,
+    /// while for S3 and Azure we write there only data_directory + key.
+    /// This leads us into ambiguity that for StorageHDFS we have just key in object.remote_path,
+    /// but for DiskHDFS we have there URL as well.
+    auto path = object.remote_path;
+    if (path.starts_with(url))
+        path = path.substr(url.size());
+    if (path.starts_with("/"))
+        path.substr(1);
+    return path;
+}
+
 ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
-    initializeHDFS();
+    initializeHDFSFS();
     /// what ever data_source_description.description value is, consider that key as relative key
     chassert(data_directory.starts_with("/"));
     return ObjectStorageKey::createAsRelative(
@@ -56,14 +64,11 @@ ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string &
 
 bool HDFSObjectStorage::exists(const StoredObject & object) const
 {
-    initializeHDFS();
+    initializeHDFSFS();
     std::string path = object.remote_path;
     if (path.starts_with(url_without_path))
         path = path.substr(url_without_path.size());
 
-    // const auto & path = object.remote_path;
-    // const size_t begin_of_path = path.find('/', path.find("//") + 2);
-    // const String remote_fs_object_path = path.substr(begin_of_path);
     return (0 == hdfsExists(hdfs_fs.get(), path.c_str()));
 }
 
@@ -73,13 +78,8 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    initializeHDFS();
-    std::string path = object.remote_path;
-    if (path.starts_with(url))
-        path = path.substr(url.size());
-    if (path.starts_with("/"))
-        path.substr(1);
-
+    initializeHDFSFS();
+    auto path = extractObjectKeyFromURL(object);
     return std::make_unique<ReadBufferFromHDFS>(
         fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings));
 }
@@ -90,21 +90,13 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
     std::optional<size_t>,
     std::optional<size_t>) const
 {
-    initializeHDFS();
+    initializeHDFSFS();
     auto disk_read_settings = patchSettings(read_settings);
     auto read_buffer_creator =
         [this, disk_read_settings]
         (bool /* restricted_seek */, const StoredObject & object_) -> std::unique_ptr<ReadBufferFromFileBase>
     {
-        // size_t begin_of_path = path.find('/', path.find("//") + 2);
-        // auto hdfs_path = path.substr(begin_of_path);
-        // auto hdfs_uri = path.substr(0, begin_of_path);
-
-        std::string path = object_.remote_path;
-        if (path.starts_with(url))
-            path = path.substr(url.size());
-        if (path.starts_with("/"))
-            path.substr(1);
+        auto path = extractObjectKeyFromURL(object_);
         return std::make_unique<ReadBufferFromHDFS>(
             fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
     };
@@ -120,7 +112,7 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
     size_t buf_size,
     const WriteSettings & write_settings)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     if (attributes.has_value())
         throw Exception(
             ErrorCodes::UNSUPPORTED_METHOD,
@@ -142,7 +134,7 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
 /// Remove file. Throws exception if file doesn't exists or it's a directory.
 void HDFSObjectStorage::removeObject(const StoredObject & object)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     auto path = object.remote_path;
     if (path.starts_with(url_without_path))
         path = path.substr(url_without_path.size());
@@ -156,28 +148,28 @@ void HDFSObjectStorage::removeObject(const StoredObject & object)
 
 void HDFSObjectStorage::removeObjects(const StoredObjects & objects)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     for (const auto & object : objects)
         removeObject(object);
 }
 
 void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     if (exists(object))
         removeObject(object);
 }
 
 void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     for (const auto & object : objects)
         removeObjectIfExists(object);
 }
 
 ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
 {
-    initializeHDFS();
+    initializeHDFSFS();
     auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
     if (!file_info)
         throw Exception(ErrorCodes::HDFS_ERROR,
@@ -185,7 +177,7 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
 
     ObjectMetadata metadata;
     metadata.size_bytes = static_cast<size_t>(file_info->mSize);
-    metadata.last_modified = file_info->mLastMod;
+    metadata.last_modified = Poco::Timestamp::fromEpochTime(file_info->mLastMod);
 
     hdfsFreeFileInfo(file_info, 1);
     return metadata;
@@ -193,9 +185,9 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
 
 void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
-    initializeHDFS();
+    initializeHDFSFS();
     auto * log = &Poco::Logger::get("HDFSObjectStorage");
-    LOG_TRACE(log, "Trying to list files for {}", path);
+    LOG_TEST(log, "Trying to list files for {}", path);
 
     HDFSFileInfo ls;
     ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length);
@@ -213,7 +205,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
         throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
     }
 
-    LOG_TRACE(log, "Listed {} files for {}", ls.length, path);
+    LOG_TEST(log, "Listed {} files for {}", ls.length, path);
 
     for (int i = 0; i < ls.length; ++i)
     {
@@ -228,8 +220,6 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
         }
         else
         {
-            LOG_TEST(log, "Found file: {}", file_path);
-
             children.emplace_back(std::make_shared<RelativePathWithMetadata>(
                 String(file_path),
                 ObjectMetadata{
@@ -247,7 +237,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT
     const WriteSettings & write_settings,
     std::optional<ObjectAttributes> object_to_attributes)
 {
-    initializeHDFS();
+    initializeHDFSFS();
     if (object_to_attributes.has_value())
         throw Exception(
             ErrorCodes::UNSUPPORTED_METHOD,
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index b626d3dc779..e747b283400 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -35,7 +35,8 @@ public:
     HDFSObjectStorage(
         const String & hdfs_root_path_,
         SettingsPtr settings_,
-        const Poco::Util::AbstractConfiguration & config_)
+        const Poco::Util::AbstractConfiguration & config_,
+        bool lazy_initialize)
         : config(config_)
         , settings(std::move(settings_))
     {
@@ -46,6 +47,9 @@ public:
             data_directory = url.substr(begin_of_path);
         else
             data_directory = "/";
+
+        if (!lazy_initialize)
+            initializeHDFSFS();
     }
 
     std::string getName() const override { return "HDFSObjectStorage"; }
@@ -98,10 +102,6 @@ public:
 
     void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
 
-    void shutdown() override;
-
-    void startup() override;
-
     String getObjectsNamespace() const override { return ""; }
 
     std::unique_ptr<IObjectStorage> cloneObjectStorage(
@@ -114,8 +114,13 @@ public:
 
     bool isRemote() const override { return true; }
 
+    void startup() override { }
+
+    void shutdown() override { }
+
 private:
-    void initializeHDFS() const;
+    void initializeHDFSFS() const;
+    std::string extractObjectKeyFromURL(const StoredObject & object) const;
 
     const Poco::Util::AbstractConfiguration & config;
 
diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
index 67e38d6389a..1a2ea0c2593 100644
--- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp
@@ -232,7 +232,8 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
             context->getSettingsRef().hdfs_replication
         );
 
-        return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config);
+        return createObjectStorage<HDFSObjectStorage>(
+            ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config, /* lazy_initialize */false);
     });
 }
 #endif
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 507e9dbafcb..0801a84ce13 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -582,27 +582,9 @@ void S3ObjectStorage::applyNewSettings(
         auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri);
         client.set(std::move(new_client));
     }
-
     s3_settings.set(std::move(new_s3_settings));
 }
 
-// void S3ObjectStorage::applyNewSettings(ContextPtr context)
-// {
-//     auto settings = s3_settings.get();
-//     if (!endpoint_settings || !settings->auth_settings.hasUpdates(endpoint_settings->auth_settings))
-//         return;
-//
-//     const auto & config = context->getConfigRef();
-//     auto new_s3_settings = getSettings(uri, config, "s3.", context);
-//
-//     new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
-//
-//     auto new_client = getClient(config, "s3.", context, *new_s3_settings, false);
-//
-//     s3_settings.set(std::move(new_s3_settings));
-//     client.set(std::move(new_client));
-// }
-
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
     const std::string & new_namespace,
     const Poco::Util::AbstractConfiguration & config,
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index 49300a9cd89..a38c0d3c85f 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -100,11 +100,9 @@ std::unique_ptr<S3::Client> getClient(
         settings.request_settings.put_request_throttler,
         url.uri.getScheme());
 
-    client_configuration.endpointOverride = url.endpoint;
-    client_configuration.maxConnections = static_cast<unsigned>(request_settings.max_connections);
     client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value);
     client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value);
-    client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
+    client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast<unsigned>(request_settings.max_connections));
     client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT);
     client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS);
 
@@ -112,12 +110,6 @@ std::unique_ptr<S3::Client> getClient(
     client_configuration.s3_use_adaptive_timeouts = config.getBool(
         config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
 
-    // client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT * 1000);
-    // client_configuration.http_connection_pool_size = config.getUInt(
-    //     config_prefix + ".http_connection_pool_size", static_cast<UInt32>(global_settings.s3_http_connection_pool_size.value));
-    // client_configuration.s3_use_adaptive_timeouts = config.getBool(config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
-    // client_configuration.wait_on_pool_size_limit = for_disk_s3;
-
     if (for_disk_s3)
     {
         /*
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index af9dc08e8c7..56b2904363e 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -55,7 +55,7 @@
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 #include <Storages/MaterializedView/RefreshTask.h>
 #include <Storages/System/StorageSystemFilesystemCache.h>
 #include <Parsers/ASTSystemQuery.h>
@@ -502,7 +502,7 @@ BlockIO InterpreterSystemQuery::execute()
                 StorageURL::getSchemaCache(getContext()).clear();
 #if USE_AZURE_BLOB_STORAGE
             if (caches_to_drop.contains("AZURE"))
-                StorageObjectStorage::getSchemaCache(getContext(), StorageAzureBlobConfiguration::type_name).clear();
+                StorageObjectStorage::getSchemaCache(getContext(), StorageAzureConfiguration::type_name).clear();
 #endif
             break;
         }
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp
similarity index 93%
rename from src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
rename to src/Storages/ObjectStorage/Azure/Configuration.cpp
index f268b812c03..43992a81eef 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.cpp
+++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp
@@ -1,8 +1,9 @@
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 
 #if USE_AZURE_BLOB_STORAGE
 
 #include <azure/storage/common/storage_credential.hpp>
+#include <Storages/NamedCollectionsHelpers.h>
 #include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
 #include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
@@ -65,7 +66,7 @@ namespace
     }
 }
 
-void StorageAzureBlobConfiguration::check(ContextPtr context) const
+void StorageAzureConfiguration::check(ContextPtr context) const
 {
     Poco::URI url_to_check;
     if (is_connection_string)
@@ -77,11 +78,11 @@ void StorageAzureBlobConfiguration::check(ContextPtr context) const
         url_to_check = Poco::URI(connection_url);
 
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url_to_check);
-    StorageObjectStorageConfiguration::check(context);
+    Configuration::check(context);
 }
 
-StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other)
-    : StorageObjectStorageConfiguration(other)
+StorageAzureConfiguration::StorageAzureConfiguration(const StorageAzureConfiguration & other)
+    : Configuration(other)
 {
     connection_url = other.connection_url;
     is_connection_string = other.is_connection_string;
@@ -92,7 +93,7 @@ StorageAzureBlobConfiguration::StorageAzureBlobConfiguration(const StorageAzureB
     blobs_paths = other.blobs_paths;
 }
 
-AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(ContextPtr context)
+AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(ContextPtr context)
 {
     const auto & context_settings = context->getSettingsRef();
     auto settings_ptr = std::make_unique<AzureObjectStorageSettings>();
@@ -102,7 +103,7 @@ AzureObjectStorage::SettingsPtr StorageAzureBlobConfiguration::createSettings(Co
     return settings_ptr;
 }
 
-StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySettings(const ContextPtr & context) const
+StorageObjectStorage::QuerySettings StorageAzureConfiguration::getQuerySettings(const ContextPtr & context) const
 {
     const auto & settings = context->getSettingsRef();
     return StorageObjectStorage::QuerySettings{
@@ -110,14 +111,14 @@ StorageObjectStorage::QuerySettings StorageAzureBlobConfiguration::getQuerySetti
         .create_new_file_on_insert = settings.azure_create_new_file_on_insert,
         .schema_inference_use_cache = settings.schema_inference_use_cache_for_azure,
         .schema_inference_mode = settings.schema_inference_mode,
-        .skip_empty_files = settings.s3_skip_empty_files, /// TODO: add setting for azure
+        .skip_empty_files = settings.azure_skip_empty_files,
         .list_object_keys_size = settings.azure_list_object_keys_size,
-        .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+        .throw_on_zero_files_match = settings.azure_throw_on_zero_files_match,
         .ignore_non_existent_file = settings.azure_ignore_file_doesnt_exist,
     };
 }
 
-ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
+ObjectStoragePtr StorageAzureConfiguration::createObjectStorage(ContextPtr context, bool is_readonly) /// NOLINT
 {
     assertInitialized();
     auto client = createClient(is_readonly, /* attempt_to_create_container */true);
@@ -125,7 +126,7 @@ ObjectStoragePtr StorageAzureBlobConfiguration::createObjectStorage(ContextPtr c
     return std::make_unique<AzureObjectStorage>("AzureBlobStorage", std::move(client), std::move(settings), container);
 }
 
-AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bool attempt_to_create_container)
+AzureClientPtr StorageAzureConfiguration::createClient(bool is_read_only, bool attempt_to_create_container)
 {
     using namespace Azure::Storage::Blobs;
 
@@ -133,8 +134,8 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bo
 
     if (is_connection_string)
     {
-        std::shared_ptr<Azure::Identity::ManagedIdentityCredential> managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
-        std::unique_ptr<BlobServiceClient> blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
+        auto managed_identity_credential = std::make_shared<Azure::Identity::ManagedIdentityCredential>();
+        auto blob_service_client = std::make_unique<BlobServiceClient>(BlobServiceClient::CreateFromConnectionString(connection_url));
         result = std::make_unique<BlobContainerClient>(BlobContainerClient::CreateFromConnectionString(connection_url, container));
 
         if (attempt_to_create_container)
@@ -243,7 +244,7 @@ AzureClientPtr StorageAzureBlobConfiguration::createClient(bool is_read_only, bo
     return result;
 }
 
-void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection & collection)
+void StorageAzureConfiguration::fromNamedCollection(const NamedCollection & collection)
 {
     validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys);
 
@@ -275,7 +276,7 @@ void StorageAzureBlobConfiguration::fromNamedCollection(const NamedCollection &
     blobs_paths = {blob_path};
 }
 
-void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure)
+void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, bool with_structure)
 {
     if (engine_args.size() < 3 || engine_args.size() > (with_structure ? 8 : 7))
     {
@@ -396,7 +397,7 @@ void StorageAzureBlobConfiguration::fromAST(ASTs & engine_args, ContextPtr conte
     blobs_paths = {blob_path};
 }
 
-void StorageAzureBlobConfiguration::addStructureAndFormatToArgs(
+void StorageAzureConfiguration::addStructureAndFormatToArgs(
     ASTs & args, const String & structure_, const String & format_, ContextPtr context)
 {
     if (tryGetNamedCollectionWithOverrides(args, context))
diff --git a/src/Storages/ObjectStorage/AzureBlob/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h
similarity index 78%
rename from src/Storages/ObjectStorage/AzureBlob/Configuration.h
rename to src/Storages/ObjectStorage/Azure/Configuration.h
index 7e105ea82b5..91a9a0bbbd5 100644
--- a/src/Storages/ObjectStorage/AzureBlob/Configuration.h
+++ b/src/Storages/ObjectStorage/Azure/Configuration.h
@@ -5,24 +5,27 @@
 #if USE_AZURE_BLOB_STORAGE
 
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <filesystem>
 
 namespace DB
 {
 class BackupFactory;
 
-class StorageAzureBlobConfiguration : public StorageObjectStorageConfiguration
+class StorageAzureConfiguration : public StorageObjectStorage::Configuration
 {
     friend class BackupReaderAzureBlobStorage;
     friend class BackupWriterAzureBlobStorage;
     friend void registerBackupEngineAzureBlobStorage(BackupFactory & factory);
 
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
     static constexpr auto type_name = "azure";
     static constexpr auto engine_name = "Azure";
 
-    StorageAzureBlobConfiguration() = default;
-    StorageAzureBlobConfiguration(const StorageAzureBlobConfiguration & other);
+    StorageAzureConfiguration() = default;
+    StorageAzureConfiguration(const StorageAzureConfiguration & other);
 
     std::string getTypeName() const override { return type_name; }
     std::string getEngineName() const override { return engine_name; }
@@ -31,16 +34,15 @@ public:
     void setPath(const Path & path) override { blob_path = path; }
 
     const Paths & getPaths() const override { return blobs_paths; }
-    Paths & getPaths() override { return blobs_paths; }
     void setPaths(const Paths & paths) override { blobs_paths = paths; }
 
-    String getDataSourceDescription() override { return fs::path(connection_url) / container; }
+    String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; }
     String getNamespace() const override { return container; }
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
-    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageAzureBlobConfiguration>(*this); }
+    ConfigurationPtr clone() override { return std::make_shared<StorageAzureConfiguration>(*this); }
 
     void fromNamedCollection(const NamedCollection & collection) override;
     void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
diff --git a/src/Storages/ObjectStorage/DataLakes/Common.cpp b/src/Storages/ObjectStorage/DataLakes/Common.cpp
index 0c9237127b9..4830cc52a90 100644
--- a/src/Storages/ObjectStorage/DataLakes/Common.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/Common.cpp
@@ -1,6 +1,6 @@
 #include "Common.h"
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Common/logger_useful.h>
 
 namespace DB
@@ -8,7 +8,7 @@ namespace DB
 
 std::vector<String> listFiles(
     const IObjectStorage & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorage::Configuration & configuration,
     const String & prefix, const String & suffix)
 {
     auto key = std::filesystem::path(configuration.getPath()) / prefix;
diff --git a/src/Storages/ObjectStorage/DataLakes/Common.h b/src/Storages/ObjectStorage/DataLakes/Common.h
index ae3767f2eec..db3afa9e4a6 100644
--- a/src/Storages/ObjectStorage/DataLakes/Common.h
+++ b/src/Storages/ObjectStorage/DataLakes/Common.h
@@ -1,15 +1,15 @@
 #pragma once
 #include <Core/Types.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 namespace DB
 {
 
 class IObjectStorage;
-class StorageObjectStorageConfiguration;
 
 std::vector<String> listFiles(
     const IObjectStorage & object_storage,
-    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorage::Configuration & configuration,
     const String & prefix, const String & suffix);
 
 }
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 571e14325bb..277d07d88ef 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -85,7 +85,7 @@ struct DeltaLakeMetadata::Impl
             while (true)
             {
                 const auto filename = withPadding(++current_version) + metadata_file_suffix;
-                const auto file_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / filename;
+                const auto file_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / filename;
 
                 if (!object_storage->exists(StoredObject(file_path)))
                     break;
@@ -161,12 +161,12 @@ struct DeltaLakeMetadata::Impl
             if (json.has("add"))
             {
                 const auto path = json["add"]["path"].getString();
-                result.insert(fs::path(configuration->getPath()) / path);
+                result.insert(std::filesystem::path(configuration->getPath()) / path);
             }
             else if (json.has("remove"))
             {
                 const auto path = json["remove"]["path"].getString();
-                result.erase(fs::path(configuration->getPath()) / path);
+                result.erase(std::filesystem::path(configuration->getPath()) / path);
             }
         }
     }
@@ -186,7 +186,7 @@ struct DeltaLakeMetadata::Impl
      */
     size_t readLastCheckpointIfExists() const
     {
-        const auto last_checkpoint_file = fs::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint";
+        const auto last_checkpoint_file = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / "_last_checkpoint";
         if (!object_storage->exists(StoredObject(last_checkpoint_file)))
             return 0;
 
@@ -249,7 +249,7 @@ struct DeltaLakeMetadata::Impl
             return 0;
 
         const auto checkpoint_filename = withPadding(version) + ".checkpoint.parquet";
-        const auto checkpoint_path = fs::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename;
+        const auto checkpoint_path = std::filesystem::path(configuration->getPath()) / deltalake_metadata_directory / checkpoint_filename;
 
         LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
 
@@ -311,7 +311,7 @@ struct DeltaLakeMetadata::Impl
             if (filename.empty())
                 continue;
             LOG_TEST(log, "Adding {}", filename);
-            const auto [_, inserted] = result.insert(fs::path(configuration->getPath()) / filename);
+            const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename);
             if (!inserted)
                 throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename);
         }
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
index 5050b88d809..e527721b29e 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.h
@@ -2,7 +2,7 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 
@@ -12,8 +12,7 @@ namespace DB
 class DeltaLakeMetadata final : public IDataLakeMetadata
 {
 public:
-    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
-
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
     static constexpr auto name = "DeltaLake";
 
     DeltaLakeMetadata(
diff --git a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
index 6054c3f15d6..3ab274b1fbf 100644
--- a/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/HudiMetadata.h
@@ -2,7 +2,7 @@
 
 #include <Interpreters/Context_fwd.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Core/Types.h>
@@ -13,7 +13,7 @@ namespace DB
 class HudiMetadata final : public IDataLakeMetadata, private WithContext
 {
 public:
-    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     static constexpr auto name = "Hudi";
 
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 144cc16939c..3119b844aaf 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -88,7 +88,7 @@ public:
         else
         {
             ConfigurationPtr configuration = base_configuration->clone();
-            configuration->getPaths() = metadata->getDataFiles();
+            configuration->setPaths(metadata->getDataFiles());
             return Storage::resolveSchemaFromData(
                 object_storage_, configuration, format_settings_, local_context);
         }
diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
index 8ee6f002ca6..591e5ef03f6 100644
--- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
@@ -45,7 +45,7 @@ namespace ErrorCodes
 
 IcebergMetadata::IcebergMetadata(
     ObjectStoragePtr object_storage_,
-    StorageObjectStorageConfigurationPtr configuration_,
+    ConfigurationPtr configuration_,
     DB::ContextPtr context_,
     Int32 metadata_version_,
     Int32 format_version_,
@@ -341,7 +341,7 @@ MutableColumns parseAvro(
  */
 std::pair<Int32, String> getMetadataFileAndVersion(
     ObjectStoragePtr object_storage,
-    const StorageObjectStorageConfiguration & configuration)
+    const StorageObjectStorage::Configuration & configuration)
 {
     const auto metadata_files = listFiles(*object_storage, configuration, "metadata", ".metadata.json");
     if (metadata_files.empty())
@@ -378,7 +378,7 @@ std::pair<Int32, String> getMetadataFileAndVersion(
 
 DataLakeMetadataPtr IcebergMetadata::create(
     ObjectStoragePtr object_storage,
-    StorageObjectStorageConfigurationPtr configuration,
+    ConfigurationPtr configuration,
     ContextPtr local_context)
 {
     const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration);
diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
index f88e3eecc67..06dbd373bf9 100644
--- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
+++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.h
@@ -5,7 +5,7 @@
 #include <Interpreters/Context_fwd.h>
 #include <Core/Types.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/DataLakes/IDataLakeMetadata.h>
 
 namespace DB
@@ -61,7 +61,7 @@ namespace DB
 class IcebergMetadata : public IDataLakeMetadata, private WithContext
 {
 public:
-    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     static constexpr auto name = "Iceberg";
 
diff --git a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
index a5170e5ed6b..0fa6402e892 100644
--- a/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/registerDataLakeStorages.cpp
@@ -20,7 +20,7 @@ void registerStorageIceberg(StorageFactory & factory)
         [&](const StorageFactory::Arguments & args)
         {
             auto configuration = std::make_shared<StorageS3Configuration>();
-            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+            StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageIceberg::create(
                 configuration, args.getContext(), args.table_id, args.columns,
@@ -43,7 +43,7 @@ void registerStorageDeltaLake(StorageFactory & factory)
         [&](const StorageFactory::Arguments & args)
         {
             auto configuration = std::make_shared<StorageS3Configuration>();
-            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+            StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageDeltaLake::create(
                 configuration, args.getContext(), args.table_id, args.columns,
@@ -64,7 +64,7 @@ void registerStorageHudi(StorageFactory & factory)
         [&](const StorageFactory::Arguments & args)
         {
             auto configuration = std::make_shared<StorageS3Configuration>();
-            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
+            StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getLocalContext(), false);
 
             return StorageHudi::create(
                 configuration, args.getContext(), args.table_id, args.columns,
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
index 12e3f3adb12..a8a9ab5b557 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp
@@ -1,18 +1,21 @@
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 
 #if USE_HDFS
-#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
-#include <Interpreters/Context.h>
-#include <Storages/checkAndGetLiteralArgument.h>
+#include <Common/logger_useful.h>
 #include <Parsers/IAST.h>
-#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
-#include <Interpreters/evaluateConstantExpression.h>
 #include <Formats/FormatFactory.h>
+#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
+
+#include <Interpreters/Context.h>
+#include <Interpreters/evaluateConstantExpression.h>
+
+#include <Storages/NamedCollectionsHelpers.h>
+#include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
+
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
-#include <Common/logger_useful.h>
-
 
 namespace DB
 {
@@ -23,7 +26,7 @@ namespace ErrorCodes
 }
 
 StorageHDFSConfiguration::StorageHDFSConfiguration(const StorageHDFSConfiguration & other)
-    : StorageObjectStorageConfiguration(other)
+    : Configuration(other)
 {
     url = other.url;
     path = other.path;
@@ -34,7 +37,7 @@ void StorageHDFSConfiguration::check(ContextPtr context) const
 {
     context->getRemoteHostFilter().checkURL(Poco::URI(url));
     checkHDFSURL(fs::path(url) / path.substr(1));
-    StorageObjectStorageConfiguration::check(context);
+    Configuration::check(context);
 }
 
 ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
@@ -47,10 +50,11 @@ ObjectStoragePtr StorageHDFSConfiguration::createObjectStorage( /// NOLINT
         settings.remote_read_min_bytes_for_seek,
         settings.hdfs_replication
     );
-    return std::make_shared<HDFSObjectStorage>(url, std::move(hdfs_settings), context->getConfigRef());
+    return std::make_shared<HDFSObjectStorage>(
+        url, std::move(hdfs_settings), context->getConfigRef(), /* lazy_initialize */true);
 }
 
-std::string StorageHDFSConfiguration::getPathWithoutGlob() const
+std::string StorageHDFSConfiguration::getPathWithoutGlobs() const
 {
     /// Unlike s3 and azure, which are object storages,
     /// hdfs is a filesystem, so it cannot list files by partual prefix,
@@ -69,9 +73,9 @@ StorageObjectStorage::QuerySettings StorageHDFSConfiguration::getQuerySettings(c
         .create_new_file_on_insert = settings.hdfs_create_new_file_on_insert,
         .schema_inference_use_cache = settings.schema_inference_use_cache_for_hdfs,
         .schema_inference_mode = settings.schema_inference_mode,
-        .skip_empty_files = settings.hdfs_skip_empty_files, /// TODO: add setting for hdfs
-        .list_object_keys_size = settings.s3_list_object_keys_size, /// TODO: add a setting for hdfs
-        .throw_on_zero_files_match = settings.s3_throw_on_zero_files_match,
+        .skip_empty_files = settings.hdfs_skip_empty_files,
+        .list_object_keys_size = 0, /// HDFS does not support listing in batches.
+        .throw_on_zero_files_match = settings.hdfs_throw_on_zero_files_match,
         .ignore_non_existent_file = settings.hdfs_ignore_file_doesnt_exist,
     };
 }
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index 0a502857153..cac09ee1d92 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -2,17 +2,18 @@
 #include "config.h"
 
 #if USE_HDFS
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Interpreters/Context_fwd.h>
-#include <Parsers/ASTFunction.h>
 #include <Parsers/IAST_fwd.h>
 
 namespace DB
 {
 
-class StorageHDFSConfiguration : public StorageObjectStorageConfiguration
+class StorageHDFSConfiguration : public StorageObjectStorage::Configuration
 {
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
     static constexpr auto type_name = "hdfs";
     static constexpr auto engine_name = "HDFS";
 
@@ -26,7 +27,6 @@ public:
     void setPath(const Path & path_) override { path = path_; }
 
     const Paths & getPaths() const override { return paths; }
-    Paths & getPaths() override { return paths; }
     void setPaths(const Paths & paths_) override { paths = paths_; }
 
     String getNamespace() const override { return ""; }
@@ -35,12 +35,12 @@ public:
 
     void check(ContextPtr context) const override;
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
-    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
+    ConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
     void addStructureAndFormatToArgs(
         ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
 
-    std::string getPathWithoutGlob() const override;
+    std::string getPathWithoutGlobs() const override;
 
 private:
     void fromNamedCollection(const NamedCollection &) override;
diff --git a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
index b37b9de746b..be339d021dc 100644
--- a/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
+++ b/src/Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.cpp
@@ -114,10 +114,10 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory<S
         {
             num_bytes_to_read = internal_buffer.size();
         }
-        // if (file_size != 0 && file_offset >= file_size)
-        // {
-        //     return false;
-        // }
+        if (file_size != 0 && file_offset >= file_size)
+        {
+            return false;
+        }
 
         ResourceGuard rlock(read_settings.resource_link, num_bytes_to_read);
         int bytes_read;
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 9c1d3f79c2b..3705725ffe1 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -10,7 +10,6 @@ namespace ErrorCodes
 {
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
     extern const int CANNOT_DETECT_FORMAT;
-
 }
 
 ReadBufferIterator::ReadBufferIterator(
@@ -29,18 +28,19 @@ ReadBufferIterator::ReadBufferIterator(
     , query_settings(configuration->getQuerySettings(context_))
     , schema_cache(schema_cache_)
     , read_keys(read_keys_)
-    , format(configuration->format == "auto" ? std::nullopt : std::optional<String>(configuration->format))
     , prev_read_keys_size(read_keys_.size())
 {
+    if (configuration->format != "auto")
+        format = configuration->format;
 }
 
 SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const
 {
-    auto source = fs::path(configuration->getDataSourceDescription()) / path;
+    auto source = std::filesystem::path(configuration->getDataSourceDescription()) / path;
     return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext());
 }
 
-SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const
+SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const
 {
     Strings sources;
     sources.reserve(read_keys.size());
@@ -49,7 +49,7 @@ SchemaCache::Keys ReadBufferIterator::getPathsForSchemaCache() const
         std::back_inserter(sources),
         [&](const auto & elem)
         {
-            return fs::path(configuration->getDataSourceDescription()) / elem->relative_path;
+            return std::filesystem::path(configuration->getDataSourceDescription()) / elem->relative_path;
         });
     return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext());
 }
@@ -66,16 +66,14 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
         const auto & object_info = (*it);
         auto get_last_mod_time = [&] -> std::optional<time_t>
         {
-            if (object_info->metadata)
-                return object_info->metadata->last_modified.epochTime();
-            else
-            {
-                object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
-                return object_info->metadata->last_modified.epochTime();
-            }
+            if (!object_info->metadata)
+                object_info->metadata = object_storage->tryGetObjectMetadata(object_info->relative_path);
+
+            return object_info->metadata
+                ? std::optional<time_t>(object_info->metadata->last_modified.epochTime())
+                : std::nullopt;
         };
 
-        chassert(object_info);
         if (format)
         {
             auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format);
@@ -105,14 +103,12 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
 
 void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows)
 {
-    chassert(current_object_info);
     if (query_settings.schema_inference_use_cache)
         schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows);
 }
 
 void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
 {
-    chassert(current_object_info);
     if (query_settings.schema_inference_use_cache
         && query_settings.schema_inference_mode == SchemaInferenceMode::UNION)
     {
@@ -125,7 +121,7 @@ void ReadBufferIterator::setResultingSchema(const ColumnsDescription & columns)
     if (query_settings.schema_inference_use_cache
         && query_settings.schema_inference_mode == SchemaInferenceMode::DEFAULT)
     {
-        schema_cache.addManyColumns(getPathsForSchemaCache(), columns);
+        schema_cache.addManyColumns(getKeysForSchemaCache(), columns);
     }
 }
 
@@ -144,15 +140,11 @@ String ReadBufferIterator::getLastFileName() const
 
 std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
 {
-    chassert(current_object_info);
-
-    auto impl = object_storage->readObject(
-        StoredObject(current_object_info->relative_path), getContext()->getReadSettings());
-
-    int zstd_window_log_max = static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max);
-    return wrapReadBufferWithCompressionMethod(
-        std::move(impl), chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
-        zstd_window_log_max);
+    auto context = getContext();
+    auto impl = object_storage->readObject(StoredObject(current_object_info->relative_path), context->getReadSettings());
+    const auto compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method);
+    const auto zstd_window_log_max = static_cast<int>(context->getSettingsRef().zstd_window_log_max);
+    return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max);
 }
 
 ReadBufferIterator::Data ReadBufferIterator::next()
@@ -190,16 +182,21 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             if (first)
             {
                 if (format.has_value())
+                {
                     throw Exception(
                         ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                        "The table structure cannot be extracted from a {} format file, because there are no files with provided path "
+                        "The table structure cannot be extracted from a {} format file, "
+                        "because there are no files with provided path "
                         "in {} or all files are empty. You can specify table structure manually",
                         *format, object_storage->getName());
+                }
 
                 throw Exception(
                     ErrorCodes::CANNOT_DETECT_FORMAT,
-                    "The data format cannot be detected by the contents of the files, because there are no files with provided path "
-                    "in {} or all files are empty. You can specify the format manually", object_storage->getName());
+                    "The data format cannot be detected by the contents of the files, "
+                    "because there are no files with provided path "
+                    "in {} or all files are empty. You can specify the format manually",
+                    object_storage->getName());
             }
 
             return {nullptr, std::nullopt, format};
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 2d58e1c789e..287e316e243 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -1,8 +1,7 @@
 #pragma once
 #include <Interpreters/Context_fwd.h>
-#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
-#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 #include <Formats/ReadSchemaUtils.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 
 
 namespace DB
@@ -12,6 +11,9 @@ class ReadBufferIterator : public IReadBufferIterator, WithContext
 {
 public:
     using FileIterator = std::shared_ptr<StorageObjectStorageSource::IIterator>;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+    using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr;
+    using ObjectInfos = StorageObjectStorage::ObjectInfos;
 
     ReadBufferIterator(
         ObjectStoragePtr object_storage_,
@@ -40,7 +42,7 @@ public:
 
 private:
     SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const;
-    SchemaCache::Keys getPathsForSchemaCache() const;
+    SchemaCache::Keys getKeysForSchemaCache() const;
     std::optional<ColumnsDescription> tryGetColumnsFromCache(
         const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end);
 
diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
deleted file mode 100644
index f19e01cdc3e..00000000000
--- a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
-#include <Processors/Sources/NullSource.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
-
-namespace DB
-{
-
-ReadFromObjectStorageStep::ReadFromObjectStorageStep(
-    ObjectStoragePtr object_storage_,
-    ConfigurationPtr configuration_,
-    const String & name_,
-    const Names & columns_to_read,
-    const NamesAndTypesList & virtual_columns_,
-    const SelectQueryInfo & query_info_,
-    const StorageSnapshotPtr & storage_snapshot_,
-    const std::optional<DB::FormatSettings> & format_settings_,
-    bool distributed_processing_,
-    ReadFromFormatInfo info_,
-    SchemaCache & schema_cache_,
-    const bool need_only_count_,
-    ContextPtr context_,
-    size_t max_block_size_,
-    size_t num_streams_)
-    : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
-    , object_storage(object_storage_)
-    , configuration(configuration_)
-    , info(std::move(info_))
-    , virtual_columns(virtual_columns_)
-    , format_settings(format_settings_)
-    , query_settings(configuration->getQuerySettings(context_))
-    , schema_cache(schema_cache_)
-    , name(name_ + "Source")
-    , need_only_count(need_only_count_)
-    , max_block_size(max_block_size_)
-    , num_streams(num_streams_)
-    , distributed_processing(distributed_processing_)
-{
-}
-
-void ReadFromObjectStorageStep::createIterator(const ActionsDAG::Node * predicate)
-{
-    if (!iterator_wrapper)
-    {
-        auto context = getContext();
-        iterator_wrapper = StorageObjectStorageSource::createFileIterator(
-            configuration, object_storage, distributed_processing,
-            context, predicate, virtual_columns, nullptr, context->getFileProgressCallback());
-    }
-}
-
-void ReadFromObjectStorageStep::applyFilters(ActionDAGNodes added_filter_nodes)
-{
-    filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
-    const ActionsDAG::Node * predicate = nullptr;
-    if (filter_actions_dag)
-        predicate = filter_actions_dag->getOutputs().at(0);
-
-    createIterator(predicate);
-}
-
-void ReadFromObjectStorageStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
-{
-    createIterator(nullptr);
-    auto context = getContext();
-
-    Pipes pipes;
-    for (size_t i = 0; i < num_streams; ++i)
-    {
-        auto source = std::make_shared<StorageObjectStorageSource>(
-            getName(), object_storage, configuration, info, format_settings, query_settings,
-            context, max_block_size, iterator_wrapper, need_only_count, schema_cache);
-
-        source->setKeyCondition(filter_actions_dag, context);
-        pipes.emplace_back(std::move(source));
-    }
-
-    auto pipe = Pipe::unitePipes(std::move(pipes));
-    if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(info.source_header));
-
-    for (const auto & processor : pipe.getProcessors())
-        processors.emplace_back(processor);
-
-    pipeline.init(std::move(pipe));
-}
-
-}
diff --git a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h b/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
deleted file mode 100644
index d98ebfef1f2..00000000000
--- a/src/Storages/ObjectStorage/ReadFromObjectStorageStep.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#pragma once
-#include <Processors/QueryPlan/SourceStepWithFilter.h>
-#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-
-namespace DB
-{
-
-class ReadFromObjectStorageStep : public SourceStepWithFilter
-{
-public:
-    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
-
-    ReadFromObjectStorageStep(
-        ObjectStoragePtr object_storage_,
-        ConfigurationPtr configuration_,
-        const String & name_,
-        const Names & columns_to_read,
-        const NamesAndTypesList & virtual_columns_,
-        const SelectQueryInfo & query_info_,
-        const StorageSnapshotPtr & storage_snapshot_,
-        const std::optional<DB::FormatSettings> & format_settings_,
-        bool distributed_processing_,
-        ReadFromFormatInfo info_,
-        SchemaCache & schema_cache_,
-        bool need_only_count_,
-        ContextPtr context_,
-        size_t max_block_size_,
-        size_t num_streams_);
-
-    std::string getName() const override { return name; }
-
-    void applyFilters(ActionDAGNodes added_filter_nodes) override;
-
-    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
-
-private:
-    ObjectStoragePtr object_storage;
-    ConfigurationPtr configuration;
-    std::shared_ptr<StorageObjectStorageSource::IIterator> iterator_wrapper;
-
-    const ReadFromFormatInfo info;
-    const NamesAndTypesList virtual_columns;
-    const std::optional<DB::FormatSettings> format_settings;
-    const StorageObjectStorage::QuerySettings query_settings;
-    SchemaCache & schema_cache;
-    const String name;
-    const bool need_only_count;
-    const size_t max_block_size;
-    const size_t num_streams;
-    const bool distributed_processing;
-
-    void createIterator(const ActionsDAG::Node * predicate);
-};
-
-}
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index bfd61c647f8..9fcbc6a6816 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -1,17 +1,23 @@
 #include <Storages/ObjectStorage/S3/Configuration.h>
 
 #if USE_AWS_S3
-
 #include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/NamedCollectionsHelpers.h>
 #include <Storages/StorageURL.h>
+
+#include <IO/S3/getObjectInfo.h>
 #include <Formats/FormatFactory.h>
-#include <boost/algorithm/string.hpp>
+
 #include <Disks/ObjectStorages/S3/S3ObjectStorage.h>
 #include <Disks/ObjectStorages/S3/diskSettings.h>
+
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 
+#include <boost/algorithm/string.hpp>
+#include <filesystem>
+
 namespace DB
 {
 namespace ErrorCodes
@@ -46,7 +52,7 @@ static const std::unordered_set<std::string_view> optional_configuration_keys =
 
 String StorageS3Configuration::getDataSourceDescription()
 {
-    return fs::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket;
+    return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket;
 }
 
 void StorageS3Configuration::check(ContextPtr context) const
@@ -54,7 +60,7 @@ void StorageS3Configuration::check(ContextPtr context) const
     validateNamespace(url.bucket);
     context->getGlobalContext()->getRemoteHostFilter().checkURL(url.uri);
     context->getGlobalContext()->getHTTPHeaderFilter().checkHeaders(headers_from_ast);
-    StorageObjectStorageConfiguration::check(context);
+    Configuration::check(context);
 }
 
 void StorageS3Configuration::validateNamespace(const String & name) const
@@ -63,7 +69,7 @@ void StorageS3Configuration::validateNamespace(const String & name) const
 }
 
 StorageS3Configuration::StorageS3Configuration(const StorageS3Configuration & other)
-    : StorageObjectStorageConfiguration(other)
+    : Configuration(other)
 {
     url = other.url;
     static_configuration = other.static_configuration;
@@ -91,11 +97,12 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context,
     assertInitialized();
 
     const auto & config = context->getConfigRef();
+    const auto & settings = context->getSettingsRef();
     const std::string config_prefix = "s3.";
 
-    auto s3_settings = getSettings(config, config_prefix, context, false); /// FIXME: add a setting
+    auto s3_settings = getSettings(config, config_prefix, context, settings.s3_validate_request_settings);
 
-    request_settings.updateFromSettingsIfChanged(context->getSettingsRef());
+    request_settings.updateFromSettingsIfChanged(settings);
     auth_settings.updateFrom(s3_settings->auth_settings);
 
     s3_settings->auth_settings = auth_settings;
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index de4a6d17579..9eb724c4a64 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -4,17 +4,17 @@
 
 #if USE_AWS_S3
 
-#include <IO/S3/getObjectInfo.h>
 #include <Storages/StorageS3Settings.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Common/CurrentMetrics.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 namespace DB
 {
 
-class StorageS3Configuration : public StorageObjectStorageConfiguration
+class StorageS3Configuration : public StorageObjectStorage::Configuration
 {
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
     static constexpr auto type_name = "s3";
 
     StorageS3Configuration() = default;
@@ -27,7 +27,6 @@ public:
     void setPath(const Path & path) override { url.key = path; }
 
     const Paths & getPaths() const override { return keys; }
-    Paths & getPaths() override { return keys; }
     void setPaths(const Paths & paths) override { keys = paths; }
 
     String getNamespace() const override { return url.bucket; }
@@ -37,7 +36,7 @@ public:
     void check(ContextPtr context) const override;
     void validateNamespace(const String & name) const override;
 
-    StorageObjectStorageConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
+    ConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
     bool isStaticConfiguration() const override { return static_configuration; }
 
     ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index c5565d8b0e8..2c9831f0d29 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -2,21 +2,25 @@
 
 #include <Formats/FormatFactory.h>
 #include <Parsers/ASTInsertQuery.h>
-#include <Processors/Formats/IOutputFormat.h>
 #include <Formats/ReadSchemaUtils.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+
+#include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/Formats/IOutputFormat.h>
+#include <Processors/QueryPlan/SourceStepWithFilter.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
+
 #include <Storages/StorageFactory.h>
+#include <Storages/Cache/SchemaCache.h>
 #include <Storages/VirtualColumnUtils.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/Utils.h>
+#include <Storages/NamedCollectionsHelpers.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/ReadFromObjectStorageStep.h>
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
-#include <Storages/ObjectStorage/Utils.h>
-#include <Storages/Cache/SchemaCache.h>
 
 
 namespace DB
@@ -26,6 +30,7 @@ namespace ErrorCodes
 {
     extern const int DATABASE_ACCESS_DENIED;
     extern const int NOT_IMPLEMENTED;
+    extern const int LOGICAL_ERROR;
 }
 
 StorageObjectStorage::StorageObjectStorage(
@@ -90,6 +95,110 @@ void StorageObjectStorage::updateConfiguration(ContextPtr context)
         object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
 }
 
+namespace
+{
+class ReadFromObjectStorageStep : public SourceStepWithFilter
+{
+public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
+    ReadFromObjectStorageStep(
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr configuration_,
+        const String & name_,
+        const Names & columns_to_read,
+        const NamesAndTypesList & virtual_columns_,
+        const SelectQueryInfo & query_info_,
+        const StorageSnapshotPtr & storage_snapshot_,
+        const std::optional<DB::FormatSettings> & format_settings_,
+        bool distributed_processing_,
+        ReadFromFormatInfo info_,
+        SchemaCache & schema_cache_,
+        const bool need_only_count_,
+        ContextPtr context_,
+        size_t max_block_size_,
+        size_t num_streams_)
+        : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
+        , object_storage(object_storage_)
+        , configuration(configuration_)
+        , schema_cache(schema_cache_)
+        , info(std::move(info_))
+        , virtual_columns(virtual_columns_)
+        , format_settings(format_settings_)
+        , query_settings(configuration->getQuerySettings(context_))
+        , name(name_ + "Source")
+        , need_only_count(need_only_count_)
+        , max_block_size(max_block_size_)
+        , num_streams(num_streams_)
+        , distributed_processing(distributed_processing_)
+    {
+    }
+
+    std::string getName() const override { return name; }
+
+    void applyFilters(ActionDAGNodes added_filter_nodes) override
+    {
+        filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
+        const ActionsDAG::Node * predicate = nullptr;
+        if (filter_actions_dag)
+            predicate = filter_actions_dag->getOutputs().at(0);
+        createIterator(predicate);
+    }
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
+    {
+        createIterator(nullptr);
+        Pipes pipes;
+        auto context = getContext();
+
+        for (size_t i = 0; i < num_streams; ++i)
+        {
+            auto source = std::make_shared<StorageObjectStorageSource>(
+                getName(), object_storage, configuration, info, format_settings, query_settings,
+                context, max_block_size, iterator_wrapper, need_only_count, schema_cache);
+
+            source->setKeyCondition(filter_actions_dag, context);
+            pipes.emplace_back(std::move(source));
+        }
+
+        auto pipe = Pipe::unitePipes(std::move(pipes));
+        if (pipe.empty())
+            pipe = Pipe(std::make_shared<NullSource>(info.source_header));
+
+        for (const auto & processor : pipe.getProcessors())
+            processors.emplace_back(processor);
+
+        pipeline.init(std::move(pipe));
+    }
+
+private:
+    ObjectStoragePtr object_storage;
+    ConfigurationPtr configuration;
+    std::shared_ptr<StorageObjectStorageSource::IIterator> iterator_wrapper;
+    SchemaCache & schema_cache;
+
+    const ReadFromFormatInfo info;
+    const NamesAndTypesList virtual_columns;
+    const std::optional<DB::FormatSettings> format_settings;
+    const StorageObjectStorage::QuerySettings query_settings;
+    const String name;
+    const bool need_only_count;
+    const size_t max_block_size;
+    const size_t num_streams;
+    const bool distributed_processing;
+
+    void createIterator(const ActionsDAG::Node * predicate)
+    {
+        if (iterator_wrapper)
+            return;
+        auto context = getContext();
+        iterator_wrapper = StorageObjectStorageSource::createFileIterator(
+            configuration, object_storage, distributed_processing,
+            context, predicate, virtual_columns, nullptr, context->getFileProgressCallback());
+    }
+};
+}
+
 void StorageObjectStorage::read(
     QueryPlan & query_plan,
     const Names & column_names,
@@ -123,7 +232,7 @@ void StorageObjectStorage::read(
         storage_snapshot,
         format_settings,
         distributed_processing,
-        std::move(read_from_format_info),
+        read_from_format_info,
         getSchemaCache(local_context),
         need_only_count,
         local_context,
@@ -169,12 +278,13 @@ SinkToStoragePtr StorageObjectStorage::write(
             getName(), configuration->getPath());
     }
 
-    auto & paths = configuration->getPaths();
+    auto paths = configuration->getPaths();
     if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
             *object_storage, *configuration, settings, paths.front(), paths.size()))
     {
         paths.push_back(*new_key);
     }
+    configuration->setPaths(paths);
 
     return std::make_shared<StorageObjectStorageSink>(
         object_storage,
@@ -185,10 +295,10 @@ SinkToStoragePtr StorageObjectStorage::write(
 }
 
 void StorageObjectStorage::truncate(
-    const ASTPtr &,
-    const StorageMetadataPtr &,
-    ContextPtr,
-    TableExclusiveLockHolder &)
+    const ASTPtr & /* query */,
+    const StorageMetadataPtr & /* metadata_snapshot */,
+    ContextPtr /* context */,
+    TableExclusiveLockHolder & /* table_holder */)
 {
     if (configuration->withGlobs())
     {
@@ -233,10 +343,8 @@ ColumnsDescription StorageObjectStorage::resolveSchemaFromData(
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
-    auto read_buffer_iterator = createReadBufferIterator(
-        object_storage, configuration, format_settings, read_keys, context);
-    return readSchemaFromFormat(
-        configuration->format, format_settings, *read_buffer_iterator, context);
+    auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    return readSchemaFromFormat(configuration->format, format_settings, *iterator, context);
 }
 
 std::string StorageObjectStorage::resolveFormatFromData(
@@ -246,10 +354,8 @@ std::string StorageObjectStorage::resolveFormatFromData(
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
-    auto read_buffer_iterator = createReadBufferIterator(
-        object_storage, configuration, format_settings, read_keys, context);
-    return detectFormatAndReadSchema(
-        format_settings, *read_buffer_iterator, context).second;
+    auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    return detectFormatAndReadSchema(format_settings, *iterator, context).second;
 }
 
 std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAndFormatFromData(
@@ -259,10 +365,8 @@ std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAn
     const ContextPtr & context)
 {
     ObjectInfos read_keys;
-    auto read_buffer_iterator = createReadBufferIterator(
-        object_storage, configuration, format_settings, read_keys, context);
-
-    auto [columns, format] = detectFormatAndReadSchema(format_settings, *read_buffer_iterator, context);
+    auto iterator = createReadBufferIterator(object_storage, configuration, format_settings, read_keys, context);
+    auto [columns, format] = detectFormatAndReadSchema(format_settings, *iterator, context);
     configuration->format = format;
     return std::pair(columns, format);
 }
@@ -302,4 +406,65 @@ SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, c
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported storage type: {}", storage_type_name);
 }
 
+void StorageObjectStorage::Configuration::initialize(
+    Configuration & configuration,
+    ASTs & engine_args,
+    ContextPtr local_context,
+    bool with_table_structure)
+{
+    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
+        configuration.fromNamedCollection(*named_collection);
+    else
+        configuration.fromAST(engine_args, local_context, with_table_structure);
+
+    // FIXME: it should be - if (format == "auto" && get_format_from_file)
+    if (configuration.format == "auto")
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
+    else
+        FormatFactory::instance().checkFormatName(configuration.format);
+
+    configuration.initialized = true;
+}
+
+void StorageObjectStorage::Configuration::check(ContextPtr) const
+{
+    FormatFactory::instance().checkFormatName(format);
+}
+
+StorageObjectStorage::Configuration::Configuration(const Configuration & other)
+{
+    format = other.format;
+    compression_method = other.compression_method;
+    structure = other.structure;
+}
+
+bool StorageObjectStorage::Configuration::withWildcard() const
+{
+    static const String PARTITION_ID_WILDCARD = "{_partition_id}";
+    return getPath().find(PARTITION_ID_WILDCARD) != String::npos
+        || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos;
+}
+
+bool StorageObjectStorage::Configuration::isPathWithGlobs() const
+{
+    return getPath().find_first_of("*?{") != std::string::npos;
+}
+
+bool StorageObjectStorage::Configuration::isNamespaceWithGlobs() const
+{
+    return getNamespace().find_first_of("*?{") != std::string::npos;
+}
+
+std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const
+{
+    return getPath().substr(0, getPath().find_first_of("*?{"));
+}
+
+void StorageObjectStorage::Configuration::assertInitialized() const
+{
+    if (!initialized)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage");
+    }
+}
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index d46a875bf42..46d422b26c2 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -2,15 +2,16 @@
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/threadPoolCallbackRunner.h>
 #include <Storages/IStorage.h>
+#include <Parsers/IAST_fwd.h>
 #include <Storages/prepareReadingFromFormat.h>
 #include <Processors/Formats/IInputFormat.h>
 
 namespace DB
 {
 
-class StorageObjectStorageConfiguration;
 class ReadBufferIterator;
 class SchemaCache;
+class NamedCollection;
 
 /**
  * A general class containing implementation for external table engines
@@ -20,7 +21,7 @@ class SchemaCache;
 class StorageObjectStorage : public IStorage
 {
 public:
-    using Configuration = StorageObjectStorageConfiguration;
+    class Configuration;
     using ConfigurationPtr = std::shared_ptr<Configuration>;
     using ObjectInfo = RelativePathWithMetadata;
     using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
@@ -134,4 +135,61 @@ protected:
     std::mutex configuration_update_mutex;
 };
 
+class StorageObjectStorage::Configuration
+{
+public:
+    Configuration() = default;
+    Configuration(const Configuration & other);
+    virtual ~Configuration() = default;
+
+    using Path = std::string;
+    using Paths = std::vector<Path>;
+
+    static void initialize(
+        Configuration & configuration,
+        ASTs & engine_args,
+        ContextPtr local_context,
+        bool with_table_structure);
+
+    virtual std::string getTypeName() const = 0;
+    virtual std::string getEngineName() const = 0;
+
+    virtual Path getPath() const = 0;
+    virtual void setPath(const Path & path) = 0;
+
+    virtual const Paths & getPaths() const = 0;
+    virtual void setPaths(const Paths & paths) = 0;
+
+    virtual String getDataSourceDescription() = 0;
+    virtual String getNamespace() const = 0;
+    virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
+    virtual void addStructureAndFormatToArgs(
+        ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
+
+    bool withWildcard() const;
+    bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
+    bool isPathWithGlobs() const;
+    bool isNamespaceWithGlobs() const;
+    virtual std::string getPathWithoutGlobs() const;
+
+    virtual void check(ContextPtr context) const;
+    virtual void validateNamespace(const String & /* name */) const {}
+
+    virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
+    virtual ConfigurationPtr clone() = 0;
+    virtual bool isStaticConfiguration() const { return true; }
+
+    String format = "auto";
+    String compression_method = "auto";
+    String structure = "auto";
+
+protected:
+    virtual void fromNamedCollection(const NamedCollection & collection) = 0;
+    virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
+
+    void assertInitialized() const;
+
+    bool initialized = false;
+};
+
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 1a1df399626..193894a1d44 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -1,21 +1,15 @@
 #include "Storages/ObjectStorage/StorageObjectStorageCluster.h"
 
-#include "config.h"
-#include <Interpreters/AddDefaultDatabaseVisitor.h>
-#include <Interpreters/InterpreterSelectQuery.h>
-#include <Processors/Sources/RemoteSource.h>
-#include <Processors/Transforms/AddingDefaultsTransform.h>
-#include <QueryPipeline/RemoteQueryExecutor.h>
-#include <Storages/IStorage.h>
-#include <Storages/StorageURL.h>
-#include <Storages/StorageDictionary.h>
-#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Common/Exception.h>
 #include <Parsers/queryToString.h>
+#include <Processors/Sources/RemoteSource.h>
+#include <QueryPipeline/RemoteQueryExecutor.h>
+
+#include <Storages/VirtualColumnUtils.h>
 #include <Storages/ObjectStorage/Utils.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
+#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
+
 
 namespace DB
 {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index 2db8f5c352e..b38eb722df5 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -1,12 +1,10 @@
 #pragma once
 
-#include "config.h"
-
-#include <Interpreters/Cluster.h>
+// #include <Interpreters/Cluster.h>
 #include <Storages/IStorageCluster.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-#include <TableFunctions/TableFunctionObjectStorageCluster.h>
+// #include <TableFunctions/TableFunctionObjectStorageCluster.h>
 
 namespace DB
 {
@@ -29,17 +27,14 @@ public:
 
     std::string getName() const override;
 
-    RemoteQueryExecutor::Extension getTaskIteratorExtension(
-        const ActionsDAG::Node * predicate,
-        const ContextPtr & context) const override;
-
     bool supportsSubcolumns() const override { return true; }
 
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
 
-private:
-    void updateBeforeRead(const ContextPtr & /* context */) override {}
+    RemoteQueryExecutor::Extension getTaskIteratorExtension(
+        const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
 
+private:
     void updateQueryToSendIfNeeded(
         ASTPtr & query,
         const StorageSnapshotPtr & storage_snapshot,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
deleted file mode 100644
index 89c15085274..00000000000
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Formats/FormatFactory.h>
-#include <Common/logger_useful.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-void StorageObjectStorageConfiguration::initialize(
-    StorageObjectStorageConfiguration & configuration,
-    ASTs & engine_args,
-    ContextPtr local_context,
-    bool with_table_structure)
-{
-    if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, local_context))
-        configuration.fromNamedCollection(*named_collection);
-    else
-        configuration.fromAST(engine_args, local_context, with_table_structure);
-
-    // FIXME: it should be - if (format == "auto" && get_format_from_file)
-    if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
-    else
-        FormatFactory::instance().checkFormatName(configuration.format);
-
-    configuration.initialized = true;
-}
-
-void StorageObjectStorageConfiguration::check(ContextPtr) const
-{
-    FormatFactory::instance().checkFormatName(format);
-}
-
-StorageObjectStorageConfiguration::StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other)
-{
-    format = other.format;
-    compression_method = other.compression_method;
-    structure = other.structure;
-}
-
-bool StorageObjectStorageConfiguration::withWildcard() const
-{
-    static const String PARTITION_ID_WILDCARD = "{_partition_id}";
-    return getPath().find(PARTITION_ID_WILDCARD) != String::npos
-        || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos;
-}
-
-bool StorageObjectStorageConfiguration::isPathWithGlobs() const
-{
-    return getPath().find_first_of("*?{") != std::string::npos;
-}
-
-bool StorageObjectStorageConfiguration::isNamespaceWithGlobs() const
-{
-    return getNamespace().find_first_of("*?{") != std::string::npos;
-}
-
-std::string StorageObjectStorageConfiguration::getPathWithoutGlob() const
-{
-    return getPath().substr(0, getPath().find_first_of("*?{"));
-}
-
-void StorageObjectStorageConfiguration::assertInitialized() const
-{
-    if (!initialized)
-    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Configuration was not initialized before usage");
-    }
-}
-
-}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h b/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
deleted file mode 100644
index c55362aa8bd..00000000000
--- a/src/Storages/ObjectStorage/StorageObjectStorageConfiguration.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#pragma once
-#include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/NamedCollectionsHelpers.h>
-#include "StorageObjectStorage.h"
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace DB
-{
-
-class StorageObjectStorageConfiguration;
-using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
-
-class StorageObjectStorageConfiguration
-{
-public:
-    StorageObjectStorageConfiguration() = default;
-    StorageObjectStorageConfiguration(const StorageObjectStorageConfiguration & other);
-    virtual ~StorageObjectStorageConfiguration() = default;
-
-    using Path = std::string;
-    using Paths = std::vector<Path>;
-
-    static void initialize(
-        StorageObjectStorageConfiguration & configuration,
-        ASTs & engine_args,
-        ContextPtr local_context,
-        bool with_table_structure);
-
-    virtual std::string getTypeName() const = 0;
-    virtual std::string getEngineName() const = 0;
-
-    virtual Path getPath() const = 0;
-    virtual void setPath(const Path & path) = 0;
-
-    virtual const Paths & getPaths() const = 0;
-    virtual Paths & getPaths() = 0;
-    virtual void setPaths(const Paths & paths) = 0;
-
-    virtual String getDataSourceDescription() = 0;
-    virtual String getNamespace() const = 0;
-    virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
-    virtual void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
-
-    bool withWildcard() const;
-    bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
-    bool isPathWithGlobs() const;
-    bool isNamespaceWithGlobs() const;
-    virtual std::string getPathWithoutGlob() const;
-
-    virtual void check(ContextPtr context) const;
-    virtual void validateNamespace(const String & /* name */) const {}
-
-    virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
-    virtual StorageObjectStorageConfigurationPtr clone() = 0;
-    virtual bool isStaticConfiguration() const { return true; }
-
-    String format = "auto";
-    String compression_method = "auto";
-    String structure = "auto";
-
-protected:
-    virtual void fromNamedCollection(const NamedCollection & collection) = 0;
-    virtual void fromAST(ASTs & args, ContextPtr context, bool with_structure) = 0;
-
-    void assertInitialized() const;
-
-    bool initialized = false;
-};
-
-using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
-
-}
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 62367a6b933..81bdeaa43a3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -14,14 +14,13 @@ namespace ErrorCodes
 
 StorageObjectStorageSink::StorageObjectStorageSink(
     ObjectStoragePtr object_storage,
-    StorageObjectStorageConfigurationPtr configuration,
+    ConfigurationPtr configuration,
     std::optional<FormatSettings> format_settings_,
     const Block & sample_block_,
     ContextPtr context,
     const std::string & blob_path)
     : SinkToStorage(sample_block_)
     , sample_block(sample_block_)
-    , format_settings(format_settings_)
 {
     const auto & settings = context->getSettingsRef();
     const auto path = blob_path.empty() ? configuration->getPaths().back() : blob_path;
@@ -37,7 +36,7 @@ StorageObjectStorageSink::StorageObjectStorageSink(
                     static_cast<int>(settings.output_format_compression_zstd_window_log));
 
     writer = FormatFactory::instance().getOutputFormatParallelIfPossible(
-        configuration->format, *write_buf, sample_block, context, format_settings);
+        configuration->format, *write_buf, sample_block, context, format_settings_);
 }
 
 void StorageObjectStorageSink::consume(Chunk chunk)
@@ -102,7 +101,7 @@ void StorageObjectStorageSink::release()
 
 PartitionedStorageObjectStorageSink::PartitionedStorageObjectStorageSink(
     ObjectStoragePtr object_storage_,
-    StorageObjectStorageConfigurationPtr configuration_,
+    ConfigurationPtr configuration_,
     std::optional<FormatSettings> format_settings_,
     const Block & sample_block_,
     ContextPtr context_,
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index 6c2f73e40e3..a3c8ef68cf0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -1,17 +1,18 @@
 #pragma once
 #include <Storages/PartitionedSink.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
 #include <Processors/Formats/IOutputFormat.h>
-#include <Disks/ObjectStorages/IObjectStorage_fwd.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 namespace DB
 {
 class StorageObjectStorageSink : public SinkToStorage
 {
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
     StorageObjectStorageSink(
         ObjectStoragePtr object_storage,
-        StorageObjectStorageConfigurationPtr configuration,
+        ConfigurationPtr configuration,
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context,
@@ -29,8 +30,6 @@ public:
 
 private:
     const Block sample_block;
-    const std::optional<FormatSettings> format_settings;
-
     std::unique_ptr<WriteBuffer> write_buf;
     OutputFormatPtr writer;
     bool cancelled = false;
@@ -43,9 +42,11 @@ private:
 class PartitionedStorageObjectStorageSink : public PartitionedSink
 {
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+
     PartitionedStorageObjectStorageSink(
         ObjectStoragePtr object_storage_,
-        StorageObjectStorageConfigurationPtr configuration_,
+        ConfigurationPtr configuration_,
         std::optional<FormatSettings> format_settings_,
         const Block & sample_block_,
         ContextPtr context_,
@@ -58,7 +59,8 @@ private:
     void validateNamespace(const String & str);
 
     ObjectStoragePtr object_storage;
-    StorageObjectStorageConfigurationPtr configuration;
+    ConfigurationPtr configuration;
+
     const StorageObjectStorage::QuerySettings query_settings;
     const std::optional<FormatSettings> format_settings;
     const Block sample_block;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 4551c2df7c3..b224afb7a58 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -9,10 +9,11 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/parseGlobs.h>
 
+namespace fs = std::filesystem;
 
 namespace ProfileEvents
 {
@@ -218,11 +219,9 @@ std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const O
 
     auto get_last_mod_time = [&]() -> std::optional<time_t>
     {
-        if (object_info->metadata)
-        {
-            return object_info->metadata->last_modified.epochTime();
-        }
-        return std::nullopt;
+        return object_info->metadata
+            ? std::optional<size_t>(object_info->metadata->last_modified.epochTime())
+            : std::nullopt;
     };
     return schema_cache.tryGetNumRows(cache_key, get_last_mod_time);
 }
@@ -354,7 +353,7 @@ StorageObjectStorageSource::IIterator::IIterator(const std::string & logger_name
 {
 }
 
-ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor)
+StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next(size_t processor)
 {
     auto object_info = nextImpl(processor);
 
@@ -392,7 +391,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     else if (configuration->isPathWithGlobs())
     {
         const auto key_with_globs = configuration_->getPath();
-        const auto key_prefix = configuration->getPathWithoutGlob();
+        const auto key_prefix = configuration->getPathWithoutGlobs();
         object_storage_iterator = object_storage->iterate(key_prefix, list_object_keys_size);
 
         matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(key_with_globs));
@@ -423,7 +422,7 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor)
+StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor)
 {
     std::lock_guard lock(next_mutex);
     auto object_info = nextImplUnlocked(processor);
@@ -439,7 +438,7 @@ ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processo
     return object_info;
 }
 
-ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */)
+StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImplUnlocked(size_t /* processor */)
 {
     bool current_batch_processed = object_infos.empty() || index >= object_infos.size();
     if (is_finished && current_batch_processed)
@@ -533,7 +532,7 @@ StorageObjectStorageSource::KeysIterator::KeysIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */)
+StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::KeysIterator::nextImpl(size_t /* processor */)
 {
     while (true)
     {
@@ -614,7 +613,7 @@ StorageObjectStorageSource::ReadTaskIterator::ReadTaskIterator(
     }
 }
 
-ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t)
+StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator::nextImpl(size_t)
 {
     size_t current_index = index.fetch_add(1, std::memory_order_relaxed);
     if (current_index >= buffer.size())
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 0afbf77db2b..356478422bc 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -3,8 +3,8 @@
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Interpreters/Context_fwd.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h>
 #include <Processors/Formats/IInputFormat.h>
+#include <Common/re2.h>
 
 
 namespace DB
@@ -16,6 +16,11 @@ class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext
 {
     friend class StorageS3QueueSource;
 public:
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
+    using ObjectInfo = StorageObjectStorage::ObjectInfo;
+    using ObjectInfos = StorageObjectStorage::ObjectInfos;
+    using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr;
+
     class IIterator;
     class ReadTaskIterator;
     class GlobIterator;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h b/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
deleted file mode 100644
index 241e2f20962..00000000000
--- a/src/Storages/ObjectStorage/StorageObjectStorage_fwd_internal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#pragma once
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-
-namespace DB
-{
-
-using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
-using ObjectInfo = RelativePathWithMetadata;
-using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
-using ObjectInfos = std::vector<ObjectInfoPtr>;
-
-}
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index 2a7236ab196..bde3cb7e1cb 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -1,6 +1,6 @@
 #include <Storages/ObjectStorage/Utils.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 namespace DB
 {
@@ -47,14 +47,15 @@ void resolveSchemaAndFormat(
     ColumnsDescription & columns,
     std::string & format,
     ObjectStoragePtr object_storage,
-    const StorageObjectStorageConfigurationPtr & configuration,
+    const StorageObjectStorage::ConfigurationPtr & configuration,
     std::optional<FormatSettings> format_settings,
     const ContextPtr & context)
 {
     if (columns.empty())
     {
         if (format == "auto")
-            std::tie(columns, format) = StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context);
+            std::tie(columns, format) =
+                StorageObjectStorage::resolveSchemaAndFormatFromData(object_storage, configuration, format_settings, context);
         else
             columns = StorageObjectStorage::resolveSchemaFromData(object_storage, configuration, format_settings, context);
     }
diff --git a/src/Storages/ObjectStorage/Utils.h b/src/Storages/ObjectStorage/Utils.h
index 3a752e6b8f0..2077999df41 100644
--- a/src/Storages/ObjectStorage/Utils.h
+++ b/src/Storages/ObjectStorage/Utils.h
@@ -1,14 +1,10 @@
 #pragma once
-#include <Core/Types.h>
 #include "StorageObjectStorage.h"
 
 namespace DB
 {
 
 class IObjectStorage;
-class StorageObjectStorageConfiguration;
-using StorageObjectStorageConfigurationPtr = std::shared_ptr<StorageObjectStorageConfiguration>;
-struct StorageObjectStorageSettings;
 
 std::optional<std::string> checkAndGetNewFileOnInsertIfNeeded(
     const IObjectStorage & object_storage,
@@ -21,7 +17,7 @@ void resolveSchemaAndFormat(
     ColumnsDescription & columns,
     std::string & format,
     ObjectStoragePtr object_storage,
-    const StorageObjectStorageConfigurationPtr & configuration,
+    const StorageObjectStorage::ConfigurationPtr & configuration,
     std::optional<FormatSettings> format_settings,
     const ContextPtr & context);
 
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index 06b8aefb716..c23b180215e 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -1,8 +1,8 @@
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/StorageFactory.h>
 #include <Formats/FormatFactory.h>
 
@@ -18,13 +18,15 @@ namespace ErrorCodes
 
 static std::shared_ptr<StorageObjectStorage> createStorageObjectStorage(
     const StorageFactory::Arguments & args,
-    typename StorageObjectStorage::ConfigurationPtr configuration,
+    StorageObjectStorage::ConfigurationPtr configuration,
     ContextPtr context)
 {
     auto & engine_args = args.engine_args;
     if (engine_args.empty())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
 
+    StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, context, false);
+
     // Use format settings from global server context + settings from
     // the SETTINGS clause of the create query. Settings from current
     // session and user are ignored.
@@ -75,10 +77,8 @@ void registerStorageAzure(StorageFactory & factory)
 {
     factory.registerStorage("AzureBlobStorage", [](const StorageFactory::Arguments & args)
     {
-        auto context = args.getLocalContext();
-        auto configuration = std::make_shared<StorageAzureBlobConfiguration>();
-        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage(args, configuration, context);
+        auto configuration = std::make_shared<StorageAzureConfiguration>();
+        return createStorageObjectStorage(args, configuration, args.getLocalContext());
     },
     {
         .supports_settings = true,
@@ -94,10 +94,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
 {
     factory.registerStorage(name, [=](const StorageFactory::Arguments & args)
     {
-        auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageS3Configuration>();
-        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage(args, configuration, context);
+        return createStorageObjectStorage(args, configuration, args.getLocalContext());
     },
     {
         .supports_settings = true,
@@ -129,10 +127,8 @@ void registerStorageHDFS(StorageFactory & factory)
 {
     factory.registerStorage("HDFS", [=](const StorageFactory::Arguments & args)
     {
-        auto context = args.getLocalContext();
         auto configuration = std::make_shared<StorageHDFSConfiguration>();
-        StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, context, false);
-        return createStorageObjectStorage(args, configuration, context);
+        return createStorageObjectStorage(args, configuration, args.getLocalContext());
     },
     {
         .supports_settings = true,
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.cpp b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
index 8354e6aa2ae..f0b7568ae7f 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.cpp
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.cpp
@@ -7,6 +7,7 @@
 #include <Poco/JSON/Parser.h>
 #include <Storages/S3Queue/S3QueueSettings.h>
 #include <Storages/S3Queue/S3QueueTableMetadata.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 
 namespace DB
@@ -32,7 +33,7 @@ namespace
 
 
 S3QueueTableMetadata::S3QueueTableMetadata(
-    const StorageObjectStorageConfiguration & configuration,
+    const StorageObjectStorage::Configuration & configuration,
     const S3QueueSettings & engine_settings,
     const StorageInMemoryMetadata & storage_metadata)
 {
diff --git a/src/Storages/S3Queue/S3QueueTableMetadata.h b/src/Storages/S3Queue/S3QueueTableMetadata.h
index 2158b189070..bb8f8ccf2c4 100644
--- a/src/Storages/S3Queue/S3QueueTableMetadata.h
+++ b/src/Storages/S3Queue/S3QueueTableMetadata.h
@@ -4,7 +4,7 @@
 
 #include <Storages/S3Queue/S3QueueSettings.h>
 #include <Storages/StorageInMemoryMetadata.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <base/types.h>
 
 namespace DB
@@ -29,7 +29,7 @@ struct S3QueueTableMetadata
 
     S3QueueTableMetadata() = default;
     S3QueueTableMetadata(
-        const StorageObjectStorageConfiguration & configuration,
+        const StorageObjectStorage::Configuration & configuration,
         const S3QueueSettings & engine_settings,
         const StorageInMemoryMetadata & storage_metadata);
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index e84dabecf3b..38934a7895a 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -591,7 +591,7 @@ void registerStorageS3QueueImpl(const String & name, StorageFactory & factory)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "External data source must have arguments");
 
             auto configuration = std::make_shared<StorageS3Configuration>();
-            StorageObjectStorageConfiguration::initialize(*configuration, args.engine_args, args.getContext(), false);
+            StorageObjectStorage::Configuration::initialize(*configuration, args.engine_args, args.getContext(), false);
 
             // Use format settings from global server context + settings from
             // the SETTINGS clause of the create query. Settings from current
diff --git a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
index a2d3f342a63..b67a8b23e9d 100644
--- a/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
+++ b/src/Storages/System/StorageSystemSchemaInferenceCache.cpp
@@ -11,7 +11,7 @@
 #include <Formats/ReadSchemaUtils.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 
 namespace DB
 {
@@ -84,7 +84,7 @@ void StorageSystemSchemaInferenceCache::fillData(MutableColumns & res_columns, C
 #endif
     fillDataImpl(res_columns, StorageURL::getSchemaCache(context), "URL");
 #if USE_AZURE_BLOB_STORAGE
-    fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureBlobConfiguration::type_name), "Azure");
+    fillDataImpl(res_columns, StorageObjectStorage::getSchemaCache(context, StorageAzureConfiguration::type_name), "Azure");
 #endif
 }
 
diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 02c8c623e61..6ad8689a9b4 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -6,7 +6,7 @@
 #include <Interpreters/parseColumnsListForTableFunction.h>
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
 #include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index 06676a8adfa..a997b34a75c 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -1,23 +1,23 @@
 #include "config.h"
 
+#include <Access/Common/AccessFlags.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/TableFunctionNode.h>
 #include <Interpreters/Context.h>
+
 #include <TableFunctions/TableFunctionFactory.h>
+#include <TableFunctions/registerTableFunctions.h>
 #include <TableFunctions/TableFunctionObjectStorage.h>
 #include <TableFunctions/TableFunctionObjectStorageCluster.h>
+
 #include <Interpreters/parseColumnsListForTableFunction.h>
-#include <Access/Common/AccessFlags.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
-#include <Storages/ObjectStorage/S3/Configuration.h>
-#include <Storages/ObjectStorage/HDFS/Configuration.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+
 #include <Storages/ObjectStorage/Utils.h>
 #include <Storages/NamedCollectionsHelpers.h>
-#include <Storages/VirtualColumnUtils.h>
-#include <Analyzer/TableFunctionNode.h>
-#include <Formats/FormatFactory.h>
-#include <Analyzer/FunctionNode.h>
-#include "registerTableFunctions.h"
+#include <Storages/ObjectStorage/S3/Configuration.h>
+#include <Storages/ObjectStorage/HDFS/Configuration.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 
 
 namespace DB
@@ -29,8 +29,7 @@ namespace ErrorCodes
 }
 
 template <typename Definition, typename Configuration>
-ObjectStoragePtr TableFunctionObjectStorage<
-    Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
+ObjectStoragePtr TableFunctionObjectStorage<Definition, Configuration>::getObjectStorage(const ContextPtr & context, bool create_readonly) const
 {
     if (!object_storage)
         object_storage = configuration->createObjectStorage(context, create_readonly);
@@ -38,8 +37,7 @@ ObjectStoragePtr TableFunctionObjectStorage<
 }
 
 template <typename Definition, typename Configuration>
-StorageObjectStorageConfigurationPtr TableFunctionObjectStorage<
-    Definition, Configuration>::getConfiguration() const
+StorageObjectStorage::ConfigurationPtr TableFunctionObjectStorage<Definition, Configuration>::getConfiguration() const
 {
     if (!configuration)
         configuration = std::make_shared<Configuration>();
@@ -47,8 +45,8 @@ StorageObjectStorageConfigurationPtr TableFunctionObjectStorage<
 }
 
 template <typename Definition, typename Configuration>
-std::vector<size_t> TableFunctionObjectStorage<
-    Definition, Configuration>::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
+std::vector<size_t> TableFunctionObjectStorage<Definition, Configuration>::skipAnalysisForArguments(
+    const QueryTreeNodePtr & query_node_table_function, ContextPtr) const
 {
     auto & table_function_node = query_node_table_function->as<TableFunctionNode &>();
     auto & table_function_arguments_nodes = table_function_node.getArguments().getNodes();
@@ -64,19 +62,6 @@ std::vector<size_t> TableFunctionObjectStorage<
     return result;
 }
 
-template <typename Definition, typename Configuration>
-void TableFunctionObjectStorage<Definition, Configuration>::updateStructureAndFormatArgumentsIfNeeded(
-        ASTs & args, const String & structure, const String & format, const ContextPtr & context)
-{
-    Configuration().addStructureAndFormatToArgs(args, structure, format, context);
-}
-
-template <typename Definition, typename Configuration>
-void TableFunctionObjectStorage<Definition, Configuration>::parseArgumentsImpl(ASTs & engine_args, const ContextPtr & local_context)
-{
-    StorageObjectStorageConfiguration::initialize(*getConfiguration(), engine_args, local_context, true);
-}
-
 template <typename Definition, typename Configuration>
 void TableFunctionObjectStorage<Definition, Configuration>::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
@@ -94,32 +79,16 @@ template <typename Definition, typename Configuration>
 ColumnsDescription TableFunctionObjectStorage<
     Definition, Configuration>::getActualTableStructure(ContextPtr context, bool is_insert_query) const
 {
-    chassert(configuration);
     if (configuration->structure == "auto")
     {
         context->checkAccess(getSourceAccessType());
-        auto storage = getObjectStorage(context, !is_insert_query);
         ColumnsDescription columns;
+        auto storage = getObjectStorage(context, !is_insert_query);
         resolveSchemaAndFormat(columns, configuration->format, storage, configuration, std::nullopt, context);
         return columns;
     }
-
-    return parseColumnsListFromString(configuration->structure, context);
-}
-
-template <typename Definition, typename Configuration>
-bool TableFunctionObjectStorage<
-    Definition, Configuration>::supportsReadingSubsetOfColumns(const ContextPtr & context)
-{
-    chassert(configuration);
-    return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
-}
-
-template <typename Definition, typename Configuration>
-std::unordered_set<String> TableFunctionObjectStorage<
-    Definition, Configuration>::getVirtualsToCheckBeforeUsingStructureHint() const
-{
-    return VirtualColumnUtils::getVirtualNamesForFileLikeStorage();
+    else
+        return parseColumnsListFromString(configuration->structure, context);
 }
 
 template <typename Definition, typename Configuration>
@@ -205,7 +174,7 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-    factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>>(
+    factory.registerFunction<TableFunctionObjectStorage<AzureDefinition, StorageAzureConfiguration>>(
     {
         .documentation =
         {
@@ -229,8 +198,8 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 }
 
 #if USE_AZURE_BLOB_STORAGE
-template class TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
-template class TableFunctionObjectStorage<AzureClusterDefinition, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorage<AzureDefinition, StorageAzureConfiguration>;
+template class TableFunctionObjectStorage<AzureClusterDefinition, StorageAzureConfiguration>;
 #endif
 
 #if USE_AWS_S3
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
index bd43cae3697..bbc40cc6191 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.h
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -1,19 +1,18 @@
 #pragma once
 
 #include "config.h"
-
 #include <TableFunctions/ITableFunction.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorageConfiguration.h>
+#include <Formats/FormatFactory.h>
 #include <Disks/ObjectStorages/IObjectStorage_fwd.h>
-
+#include <Storages/VirtualColumnUtils.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 namespace DB
 {
 
 class Context;
 class StorageS3Configuration;
-class StorageAzureBlobConfiguration;
+class StorageAzureConfiguration;
 class StorageHDFSConfiguration;
 struct S3StorageSettings;
 struct AzureStorageSettings;
@@ -104,20 +103,32 @@ public:
 
     void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
 
-    bool supportsReadingSubsetOfColumns(const ContextPtr & context) override;
+    bool supportsReadingSubsetOfColumns(const ContextPtr & context) override
+    {
+        return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
+    }
 
-    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override;
+    std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override
+    {
+        return VirtualColumnUtils::getVirtualNamesForFileLikeStorage();
+    }
 
-    virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context);
+    virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context)
+    {
+        StorageObjectStorage::Configuration::initialize(*getConfiguration(), args, context, true);
+    }
 
     static void updateStructureAndFormatArgumentsIfNeeded(
       ASTs & args,
       const String & structure,
       const String & format,
-      const ContextPtr & context);
+      const ContextPtr & context)
+    {
+        Configuration().addStructureAndFormatToArgs(args, structure, format, context);
+    }
 
 protected:
-    using ConfigurationPtr = StorageObjectStorageConfigurationPtr;
+    using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     StoragePtr executeImpl(
         const ASTPtr & ast_function,
@@ -146,7 +157,7 @@ using TableFunctionS3 = TableFunctionObjectStorage<S3Definition, StorageS3Config
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, StorageAzureBlobConfiguration>;
+using TableFunctionAzureBlob = TableFunctionObjectStorage<AzureDefinition, StorageAzureConfiguration>;
 #endif
 
 #if USE_HDFS
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
index ce78076dd21..449bd2c8c49 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.cpp
@@ -8,7 +8,7 @@
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
-#include <Storages/ObjectStorage/AzureBlob/Configuration.h>
+#include <Storages/ObjectStorage/Azure/Configuration.h>
 
 
 namespace DB
@@ -109,7 +109,7 @@ template class TableFunctionObjectStorageCluster<S3ClusterDefinition, StorageS3C
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-template class TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
+template class TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureConfiguration>;
 #endif
 
 #if USE_HDFS
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h
index a8bc11b5e40..76786fafe99 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.h
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h
@@ -13,7 +13,7 @@ class Context;
 class StorageS3Settings;
 class StorageAzureBlobSettings;
 class StorageS3Configuration;
-class StorageAzureBlobConfiguration;
+class StorageAzureConfiguration;
 
 struct AzureClusterDefinition
 {
@@ -90,7 +90,7 @@ using TableFunctionS3Cluster = TableFunctionObjectStorageCluster<S3ClusterDefini
 #endif
 
 #if USE_AZURE_BLOB_STORAGE
-using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureBlobConfiguration>;
+using TableFunctionAzureBlobCluster = TableFunctionObjectStorageCluster<AzureClusterDefinition, StorageAzureConfiguration>;
 #endif
 
 #if USE_HDFS
diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp
index 5e0bc3267d8..26b9a771416 100644
--- a/src/TableFunctions/registerTableFunctions.cpp
+++ b/src/TableFunctions/registerTableFunctions.cpp
@@ -29,18 +29,6 @@ void registerTableFunctions()
     registerTableFunctionFuzzJSON(factory);
 #endif
 
-#if USE_AWS_S3
-    // registerTableFunctionS3Cluster(factory);
-    // registerTableFunctionHudi(factory);
-#if USE_PARQUET
-    // registerTableFunctionDeltaLake(factory);
-#endif
-#if USE_AVRO
-    // registerTableFunctionIceberg(factory);
-#endif
-
-#endif
-
 #if USE_HIVE
     registerTableFunctionHive(factory);
 #endif
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
index 4210c13b727..7df7b56b3b4 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/inf_s3_retries.xml
@@ -5,6 +5,7 @@
         <default>
             <s3_retry_attempts>1000000</s3_retry_attempts>
             <s3_use_adaptive_timeouts>1</s3_use_adaptive_timeouts>
+            <s3_validate_request_settings>0</s3_validate_request_settings>
         </default>
     </profiles>
 </clickhouse>
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
index 95a313ea4f2..c1ca258f6c4 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
+++ b/tests/integration/test_checking_s3_blobs_paranoid/configs/s3_retries.xml
@@ -5,6 +5,7 @@
         <default>
             <s3_retry_attempts>5</s3_retry_attempts>
             <s3_use_adaptive_timeouts>0</s3_use_adaptive_timeouts>
+            <s3_validate_request_settings>0</s3_validate_request_settings>
         </default>
     </profiles>
 </clickhouse>

From 0db76bf631475c6a7647096baf26bfdac35cc181 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 26 Apr 2024 18:52:49 +0000
Subject: [PATCH 086/392] Add more tests and docs, fix collecting statistics,
 fix prefetching columns in wide parts

---
 src/Columns/ColumnDynamic.cpp                 |  4 +-
 src/Columns/ColumnNullable.cpp                | 19 ++++
 src/Columns/ColumnNullable.h                  |  3 +
 src/DataTypes/Serializations/ISerialization.h | 37 ++++----
 .../Serializations/SerializationArray.cpp     |  3 +-
 .../Serializations/SerializationDynamic.cpp   | 88 ++++++++++---------
 .../SerializationDynamicElement.cpp           | 35 ++++++--
 .../Serializations/SerializationMap.cpp       |  3 +-
 .../Serializations/SerializationTuple.cpp     | 41 ++++-----
 .../Serializations/SerializationVariant.cpp   | 24 ++---
 .../SerializationVariantElement.cpp           | 84 ++++++++----------
 .../MergeTree/MergeTreeReaderWide.cpp         |  9 +-
 src/Storages/MergeTree/MutateTask.cpp         | 16 +---
 .../03034_dynamic_conversions.reference       | 25 ++++++
 .../0_stateless/03034_dynamic_conversions.sql | 10 +++
 .../03037_dynamic_merges_1.reference          | 18 ++--
 .../0_stateless/03037_dynamic_merges_1.sh     | 17 ++--
 .../03037_dynamic_merges_2.reference          | 20 +++++
 .../0_stateless/03037_dynamic_merges_2.sh     |  2 +-
 ... => 03040_dynamic_type_alters.sh.disabled} |  0
 20 files changed, 275 insertions(+), 183 deletions(-)
 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2.reference
 rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh => 03040_dynamic_type_alters.sh.disabled} (100%)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 293055b43fc..3074504973a 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -687,7 +687,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so
             }
 
             size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name);
-//            LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size);
+            LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size);
             it->second += size;
         }
     }
@@ -701,7 +701,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so
         variants_with_sizes.reserve(all_variants.size());
         for (const auto & variant : all_variants)
         {
-//            LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]);
+            LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]);
             variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
         }
         std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 4474816601e..011f3702bdf 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -900,4 +900,23 @@ ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column)
     return column;
 }
 
+ColumnPtr removeNullable(const ColumnPtr & column)
+{
+    if (const auto * column_nullable = typeid_cast<const ColumnNullable *>(column.get()))
+        return column_nullable->getNestedColumnPtr();
+    return column;
+}
+
+ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column)
+{
+    if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
+    {
+        if (!column_low_cardinality->nestedIsNullable())
+            return column;
+        return column_low_cardinality->cloneWithDefaultOnNull();
+    }
+
+    return removeNullable(column);
+}
+
 }
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 73bd75527f8..4e6f05b35ec 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -210,4 +210,7 @@ ColumnPtr makeNullableSafe(const ColumnPtr & column);
 ColumnPtr makeNullableOrLowCardinalityNullable(const ColumnPtr & column);
 ColumnPtr makeNullableOrLowCardinalityNullableSafe(const ColumnPtr & column);
 
+ColumnPtr removeNullable(const ColumnPtr & column);
+ColumnPtr removeNullableOrLowCardinalityNullable(const ColumnPtr & column);
+
 }
diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index 65493cf6dda..ddbed34f614 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -99,6 +99,19 @@ public:
 
     using SubcolumnCreatorPtr = std::shared_ptr<const ISubcolumnCreator>;
 
+    struct SerializeBinaryBulkState
+    {
+        virtual ~SerializeBinaryBulkState() = default;
+    };
+
+    struct DeserializeBinaryBulkState
+    {
+        virtual ~DeserializeBinaryBulkState() = default;
+    };
+
+    using SerializeBinaryBulkStatePtr = std::shared_ptr<SerializeBinaryBulkState>;
+    using DeserializeBinaryBulkStatePtr = std::shared_ptr<DeserializeBinaryBulkState>;
+
     struct SubstreamData
     {
         SubstreamData() = default;
@@ -125,10 +138,17 @@ public:
             return *this;
         }
 
+        SubstreamData & withDeserializePrefix(DeserializeBinaryBulkStatePtr deserialize_prefix_state_)
+        {
+            deserialize_prefix_state = std::move(deserialize_prefix_state_);
+            return *this;
+        }
+
         SerializationPtr serialization;
         DataTypePtr type;
         ColumnPtr column;
         SerializationInfoPtr serialization_info;
+        DeserializeBinaryBulkStatePtr deserialize_prefix_state;
     };
 
     struct Substream
@@ -221,21 +241,6 @@ public:
     using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
     using InputStreamGetter = std::function<ReadBuffer*(const SubstreamPath &)>;
 
-    struct SerializeBinaryBulkState
-    {
-        virtual ~SerializeBinaryBulkState() = default;
-    };
-
-    struct DeserializeBinaryBulkState
-    {
-        virtual ~DeserializeBinaryBulkState() = default;
-    };
-
-    using SerializeBinaryBulkStatePtr = std::shared_ptr<SerializeBinaryBulkState>;
-    using DeserializeBinaryBulkStatePtr = std::shared_ptr<DeserializeBinaryBulkState>;
-
-    using SubstreamsDeserializeStatesCache = std::unordered_map<String, DeserializeBinaryBulkStatePtr>;
-
     struct SerializeBinaryBulkSettings
     {
         OutputStreamGetter getter;
@@ -285,6 +290,8 @@ public:
         SerializeBinaryBulkSettings & /*settings*/,
         SerializeBinaryBulkStatePtr & /*state*/) const {}
 
+    using SubstreamsDeserializeStatesCache = std::unordered_map<String, DeserializeBinaryBulkStatePtr>;
+
     /// Call before before deserializeBinaryBulkWithMultipleStreams chain to get DeserializeBinaryBulkStatePtr.
     virtual void deserializeBinaryBulkStatePrefix(
         DeserializeBinaryBulkSettings & /*settings*/,
diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp
index d6546b338b5..6a8555a3714 100644
--- a/src/DataTypes/Serializations/SerializationArray.cpp
+++ b/src/DataTypes/Serializations/SerializationArray.cpp
@@ -254,7 +254,8 @@ void SerializationArray::enumerateStreams(
     auto next_data = SubstreamData(nested)
         .withType(type_array ? type_array->getNestedType() : nullptr)
         .withColumn(column_array ? column_array->getDataPtr() : nullptr)
-        .withSerializationInfo(data.serialization_info);
+        .withSerializationInfo(data.serialization_info)
+        .withDeserializePrefix(data.deserialize_prefix_state);
 
     nested->enumerateStreams(settings, callback, next_data);
     settings.path.pop_back();
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index c9fe8dd6b29..858445ed257 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -21,45 +21,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-void SerializationDynamic::enumerateStreams(
-    EnumerateStreamsSettings & settings,
-    const StreamCallback & callback,
-    const SubstreamData & data) const
-{
-    settings.path.push_back(Substream::DynamicStructure);
-    callback(settings.path);
-    settings.path.pop_back();
-
-    const auto * column_dynamic = data.column ? &assert_cast<const ColumnDynamic &>(*data.column) : nullptr;
-
-    /// If column is nullptr, nothing to enumerate as we don't have any variants.
-    if (!column_dynamic)
-        return;
-
-    const auto & variant_info = column_dynamic->getVariantInfo();
-    auto variant_serialization = variant_info.variant_type->getDefaultSerialization();
-
-    settings.path.push_back(Substream::DynamicData);
-    auto variant_data = SubstreamData(variant_serialization)
-                         .withType(variant_info.variant_type)
-                         .withColumn(column_dynamic->getVariantColumnPtr())
-                         .withSerializationInfo(data.serialization_info);
-    settings.path.back().data = variant_data;
-    variant_serialization->enumerateStreams(settings, callback, variant_data);
-    settings.path.pop_back();
-}
-
-SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast<Value>(version))
-{
-    checkVersion(version);
-}
-
-void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version)
-{
-    if (version != VariantTypeName)
-        throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization.");
-}
-
 struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryBulkState
 {
     SerializationDynamic::DynamicStructureSerializationVersion structure_version;
@@ -68,10 +29,6 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB
     SerializationPtr variant_serialization;
     ISerialization::SerializeBinaryBulkStatePtr variant_state;
 
-    /// Pointer to currently serialized dynamic column.
-    /// Used to calculate statistics for the whole column and not for some range.
-    const ColumnDynamic * current_dynamic_column = nullptr;
-
     /// Variants statistics. Map (Variant name) -> (Variant size).
     ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ };
 
@@ -91,6 +48,47 @@ struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBin
     ISerialization::DeserializeBinaryBulkStatePtr structure_state;
 };
 
+void SerializationDynamic::enumerateStreams(
+    EnumerateStreamsSettings & settings,
+    const StreamCallback & callback,
+    const SubstreamData & data) const
+{
+    settings.path.push_back(Substream::DynamicStructure);
+    callback(settings.path);
+    settings.path.pop_back();
+
+    const auto * column_dynamic = data.column ? &assert_cast<const ColumnDynamic &>(*data.column) : nullptr;
+    const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateDynamic>(data.deserialize_prefix_state) : nullptr;
+
+    /// If column is nullptr and we didn't deserizlize prefix yet, nothing to enumerate as we don't have any variants.
+    if (!column_dynamic && !deserialize_prefix_state)
+        return;
+
+    const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(deserialize_prefix_state->structure_state)->variant_type;
+    auto variant_serialization = variant_type->getDefaultSerialization();
+
+    settings.path.push_back(Substream::DynamicData);
+    auto variant_data = SubstreamData(variant_serialization)
+                         .withType(variant_type)
+                         .withColumn(column_dynamic ? column_dynamic->getVariantColumnPtr() : nullptr)
+                         .withSerializationInfo(data.serialization_info)
+                         .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_state : nullptr);
+    settings.path.back().data = variant_data;
+    variant_serialization->enumerateStreams(settings, callback, variant_data);
+    settings.path.pop_back();
+}
+
+SerializationDynamic::DynamicStructureSerializationVersion::DynamicStructureSerializationVersion(UInt64 version) : value(static_cast<Value>(version))
+{
+    checkVersion(version);
+}
+
+void SerializationDynamic::DynamicStructureSerializationVersion::checkVersion(UInt64 version)
+{
+    if (version != VariantTypeName)
+        throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid version for Dynamic structure serialization.");
+}
+
 void SerializationDynamic::serializeBinaryBulkStatePrefix(
     const DB::IColumn & column,
     SerializeBinaryBulkSettings & settings,
@@ -245,6 +243,10 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams(
     if (!variant_info.variant_type->equals(*dynamic_state->variant_type))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName());
 
+    /// Update statistics.
+    if (offset == 0)
+        dynamic_state->updateStatistics(*variant_column);
+
     settings.path.push_back(Substream::DynamicData);
     dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state);
     settings.path.pop_back();
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
index 386a6579519..9be9802d926 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -14,17 +14,41 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
+
+struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState
+{
+    ISerialization::DeserializeBinaryBulkStatePtr structure_state;
+    SerializationPtr variant_serialization;
+    ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
+};
+
 void SerializationDynamicElement::enumerateStreams(
     DB::ISerialization::EnumerateStreamsSettings & settings,
     const DB::ISerialization::StreamCallback & callback,
-    const DB::ISerialization::SubstreamData &) const
+    const DB::ISerialization::SubstreamData & data) const
 {
     settings.path.push_back(Substream::DynamicStructure);
     callback(settings.path);
     settings.path.pop_back();
 
-    /// We don't know if we have actually have this variant in Dynamic column,
+    /// If we didn't deserialize prefix yet, we don't know if we actually have this variant in Dynamic column,
     /// so we cannot enumerate variant streams.
+    if (!data.deserialize_prefix_state)
+        return;
+
+    auto * deserialize_prefix_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(data.deserialize_prefix_state);
+    /// If we don't have this variant, no need to enumerate streams for it as we won't read from any stream.
+    if (!deserialize_prefix_state->variant_serialization)
+        return;
+
+    settings.path.push_back(Substream::DynamicData);
+    auto variant_data = SubstreamData(deserialize_prefix_state->variant_serialization)
+                            .withType(data.type)
+                            .withColumn(data.column)
+                            .withSerializationInfo(data.serialization_info)
+                            .withDeserializePrefix(deserialize_prefix_state->variant_element_state);
+    deserialize_prefix_state->variant_serialization->enumerateStreams(settings, callback, variant_data);
+    settings.path.pop_back();
 }
 
 void SerializationDynamicElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
@@ -39,13 +63,6 @@ void SerializationDynamicElement::serializeBinaryBulkStateSuffix(SerializeBinary
         ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationDynamicElement");
 }
 
-struct DeserializeBinaryBulkStateDynamicElement : public ISerialization::DeserializeBinaryBulkState
-{
-    ISerialization::DeserializeBinaryBulkStatePtr structure_state;
-    SerializationPtr variant_serialization;
-    ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
-};
-
 void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
 {
diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp
index dac4fbe88e0..cda82f31820 100644
--- a/src/DataTypes/Serializations/SerializationMap.cpp
+++ b/src/DataTypes/Serializations/SerializationMap.cpp
@@ -398,7 +398,8 @@ void SerializationMap::enumerateStreams(
     auto next_data = SubstreamData(nested)
         .withType(data.type ? assert_cast<const DataTypeMap &>(*data.type).getNestedType() : nullptr)
         .withColumn(data.column ? assert_cast<const ColumnMap &>(*data.column).getNestedColumnPtr() : nullptr)
-        .withSerializationInfo(data.serialization_info);
+        .withSerializationInfo(data.serialization_info)
+        .withDeserializePrefix(data.deserialize_prefix_state);
 
     nested->enumerateStreams(settings, callback, next_data);
 }
diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index bb7c19aa78d..6e4b4c4c533 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -549,26 +549,6 @@ bool SerializationTuple::tryDeserializeTextCSV(IColumn & column, ReadBuffer & is
     return tryDeserializeText(column, rb, settings, true);
 }
 
-void SerializationTuple::enumerateStreams(
-    EnumerateStreamsSettings & settings,
-    const StreamCallback & callback,
-    const SubstreamData & data) const
-{
-    const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
-    const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
-    const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
-
-    for (size_t i = 0; i < elems.size(); ++i)
-    {
-        auto next_data = SubstreamData(elems[i])
-            .withType(type_tuple ? type_tuple->getElement(i) : nullptr)
-            .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr)
-            .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr);
-
-        elems[i]->enumerateStreams(settings, callback, next_data);
-    }
-}
-
 struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState
 {
     std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
@@ -579,6 +559,27 @@ struct DeserializeBinaryBulkStateTuple : public ISerialization::DeserializeBinar
     std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
 };
 
+void SerializationTuple::enumerateStreams(
+    EnumerateStreamsSettings & settings,
+    const StreamCallback & callback,
+    const SubstreamData & data) const
+{
+    const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
+    const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
+    const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
+    const auto * tuple_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateTuple>(data.deserialize_prefix_state) : nullptr;
+
+    for (size_t i = 0; i < elems.size(); ++i)
+    {
+        auto next_data = SubstreamData(elems[i])
+            .withType(type_tuple ? type_tuple->getElement(i) : nullptr)
+            .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr)
+            .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr)
+            .withDeserializePrefix(tuple_deserialize_prefix_state ? tuple_deserialize_prefix_state->states[i] : nullptr);
+
+        elems[i]->enumerateStreams(settings, callback, next_data);
+    }
+}
 
 void SerializationTuple::serializeBinaryBulkStatePrefix(
     const IColumn & column,
diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp
index 3fe26b773e3..8e0ef112444 100644
--- a/src/DataTypes/Serializations/SerializationVariant.cpp
+++ b/src/DataTypes/Serializations/SerializationVariant.cpp
@@ -28,6 +28,16 @@ namespace ErrorCodes
     extern const int INCORRECT_DATA;
 }
 
+struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
+{
+    std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
+};
+
+struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
+{
+    std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
+};
+
 void SerializationVariant::enumerateStreams(
     EnumerateStreamsSettings & settings,
     const StreamCallback & callback,
@@ -35,6 +45,7 @@ void SerializationVariant::enumerateStreams(
 {
     const auto * type_variant = data.type ? &assert_cast<const DataTypeVariant &>(*data.type) : nullptr;
     const auto * column_variant = data.column ? &assert_cast<const ColumnVariant &>(*data.column) : nullptr;
+    const auto * variant_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateVariant>(data.deserialize_prefix_state) : nullptr;
 
     auto discriminators_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<ColumnVariant::Discriminator>>(), "discr", SubstreamType::NamedVariantDiscriminators);
     auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr;
@@ -59,7 +70,8 @@ void SerializationVariant::enumerateStreams(
         auto variant_data = SubstreamData(variants[i])
                              .withType(type_variant ? type_variant->getVariant(i) : nullptr)
                              .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
-                             .withSerializationInfo(data.serialization_info);
+                             .withSerializationInfo(data.serialization_info)
+                             .withDeserializePrefix(variant_deserialize_prefix_state ? variant_deserialize_prefix_state->states[i] : nullptr);
 
         addVariantElementToPath(settings.path, i);
         settings.path.back().data = variant_data;
@@ -70,16 +82,6 @@ void SerializationVariant::enumerateStreams(
     settings.path.pop_back();
 }
 
-struct SerializeBinaryBulkStateVariant : public ISerialization::SerializeBinaryBulkState
-{
-    std::vector<ISerialization::SerializeBinaryBulkStatePtr> states;
-};
-
-struct DeserializeBinaryBulkStateVariant : public ISerialization::DeserializeBinaryBulkState
-{
-    std::vector<ISerialization::DeserializeBinaryBulkStatePtr> states;
-};
-
 void SerializationVariant::serializeBinaryBulkStatePrefix(
     const IColumn & column,
     SerializeBinaryBulkSettings & settings,
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp
index 4f120ecac06..0e1ad81ce5b 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp
@@ -12,34 +12,6 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
-void SerializationVariantElement::enumerateStreams(
-    DB::ISerialization::EnumerateStreamsSettings & settings,
-    const DB::ISerialization::StreamCallback & callback,
-    const DB::ISerialization::SubstreamData & data) const
-{
-    /// We will need stream for discriminators during deserialization.
-    settings.path.push_back(Substream::VariantDiscriminators);
-    callback(settings.path);
-    settings.path.pop_back();
-
-    addVariantToPath(settings.path);
-    settings.path.back().data = data;
-    nested_serialization->enumerateStreams(settings, callback, data);
-    removeVariantFromPath(settings.path);
-}
-
-void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
-{
-    throw Exception(
-        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement");
-}
-
-void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
-{
-    throw Exception(
-        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement");
-}
-
 struct DeserializeBinaryBulkStateVariantElement : public ISerialization::DeserializeBinaryBulkState
 {
     /// During deserialization discriminators and variant streams can be shared.
@@ -56,6 +28,40 @@ struct DeserializeBinaryBulkStateVariantElement : public ISerialization::Deseria
     ISerialization::DeserializeBinaryBulkStatePtr variant_element_state;
 };
 
+void SerializationVariantElement::enumerateStreams(
+    DB::ISerialization::EnumerateStreamsSettings & settings,
+    const DB::ISerialization::StreamCallback & callback,
+    const DB::ISerialization::SubstreamData & data) const
+{
+    /// We will need stream for discriminators during deserialization.
+    settings.path.push_back(Substream::VariantDiscriminators);
+    callback(settings.path);
+    settings.path.pop_back();
+
+    const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateVariantElement>(data.deserialize_prefix_state) : nullptr;
+    addVariantToPath(settings.path);
+    auto nested_data = SubstreamData(nested_serialization)
+                       .withType(data.type ? removeNullableOrLowCardinalityNullable(data.type) : nullptr)
+                       .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr)
+                       .withSerializationInfo(data.serialization_info)
+                       .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_element_state : nullptr);
+    settings.path.back().data = data;
+    nested_serialization->enumerateStreams(settings, callback, data);
+    removeVariantFromPath(settings.path);
+}
+
+void SerializationVariantElement::serializeBinaryBulkStatePrefix(const IColumn &, SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
+{
+    throw Exception(
+        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStatePrefix is not implemented for SerializationVariantElement");
+}
+
+void SerializationVariantElement::serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings &, SerializeBinaryBulkStatePtr &) const
+{
+    throw Exception(
+        ErrorCodes::NOT_IMPLEMENTED, "Method serializeBinaryBulkStateSuffix is not implemented for SerializationVariantElement");
+}
+
 void SerializationVariantElement::deserializeBinaryBulkStatePrefix(
     DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsDeserializeStatesCache * cache) const
 {
@@ -82,7 +88,6 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
 {
     auto * variant_element_state = checkAndGetState<DeserializeBinaryBulkStateVariantElement>(state);
 
-    size_t variant_limit = 0;
     /// First, deserialize discriminators from Variant column.
     settings.path.push_back(Substream::VariantDiscriminators);
     if (auto cached_discriminators = getFromSubstreamsCache(cache, settings.path))
@@ -99,30 +104,17 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
         if (!variant_element_state->discriminators || result_column->empty())
             variant_element_state->discriminators = ColumnVariant::ColumnDiscriminators::create();
 
-//        ColumnVariant::Discriminator discr;
-//        readBinaryLittleEndian(discr, *discriminators_stream);
-//        if (discr == ColumnVariant::NULL_DISCRIMINATOR)
-//        {
         SerializationNumber<ColumnVariant::Discriminator>().deserializeBinaryBulk(*variant_element_state->discriminators->assumeMutable(), *discriminators_stream, limit, 0);
-//        }
-//        else
-//        {
-//            auto & discriminators_data = assert_cast<ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators->assumeMutable()).getData();
-//            discriminators_data.resize_fill(discriminators_data.size() + limit, discr);
-//        }
-
         addToSubstreamsCache(cache, settings.path, variant_element_state->discriminators);
     }
     settings.path.pop_back();
 
+    /// Iterate through new discriminators to calculate the limit for our variant.
     const auto & discriminators_data = assert_cast<const ColumnVariant::ColumnDiscriminators &>(*variant_element_state->discriminators).getData();
     size_t discriminators_offset = variant_element_state->discriminators->size() - limit;
-    /// Iterate through new discriminators to calculate the limit for our variant.
-    if (!variant_limit)
-    {
-        for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
-            variant_limit += (discriminators_data[i] == variant_discriminator);
-    }
+    size_t variant_limit = 0;
+    for (size_t i = discriminators_offset; i != discriminators_data.size(); ++i)
+        variant_limit += (discriminators_data[i] == variant_discriminator);
 
     /// Now we know the limit for our variant and can deserialize it.
 
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index c8bf12436b0..d18d5eec975 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -334,8 +334,7 @@ void MergeTreeReaderWide::prefetchForColumn(
     ISerialization::SubstreamsDeserializeStatesCache & deserialize_states_cache)
 {
     deserializePrefix(serialization, name_and_type, current_task_last_mark, cache, deserialize_states_cache);
-
-    serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path)
+    auto callback = [&](const ISerialization::SubstreamPath & substream_path)
     {
         auto stream_name = IMergeTreeDataPart::getStreamNameForColumn(name_and_type, substream_path, data_part_info_for_read->getChecksums());
 
@@ -348,7 +347,11 @@ void MergeTreeReaderWide::prefetchForColumn(
                 prefetched_streams.insert(*stream_name);
             }
         }
-    });
+    };
+
+    auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializePrefix(deserialize_binary_bulk_state_map[name_and_type.name]);
+    ISerialization::EnumerateStreamsSettings settings;
+    serialization->enumerateStreams(settings, callback, data);
 }
 
 
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index fb3e318687a..5e388d6a8ac 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -779,13 +779,7 @@ static NameToNameVector collectFilesForRenames(
                 };
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
-                {
-                    auto name_and_type = source_part->getColumn(command.column_name);
-                    ColumnPtr column_sample;
-                    if (name_and_type.type->hasDynamicSubcolumns())
-                        column_sample = source_part->readColumnSample(name_and_type);
-                    serialization->enumerateStreams(callback, name_and_type.type, column_sample);
-                }
+                    serialization->enumerateStreams(callback);
 
                 /// if we drop a column with statistic, we should also drop the stat file.
                 if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
@@ -821,13 +815,7 @@ static NameToNameVector collectFilesForRenames(
                 };
 
                 if (auto serialization = source_part->tryGetSerialization(command.column_name))
-                {
-                    auto name_and_type = source_part->getColumn(command.column_name);
-                    ColumnPtr column_sample;
-                    if (name_and_type.type->hasDynamicSubcolumns())
-                        column_sample = source_part->readColumnSample(name_and_type);
-                    serialization->enumerateStreams(callback, name_and_type.type, column_sample);
-                }
+                    serialization->enumerateStreams(callback);
 
                 /// if we rename a column with statistic, we should also rename the stat file.
                 if (source_part->checksums.has(STAT_FILE_PREFIX + command.column_name + STAT_FILE_SUFFIX))
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.reference b/tests/queries/0_stateless/03034_dynamic_conversions.reference
index af91add9ddd..45f94f7ecc4 100644
--- a/tests/queries/0_stateless/03034_dynamic_conversions.reference
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.reference
@@ -61,3 +61,28 @@ str_5	String
 \N	None
 4	UInt64
 1970-01-06	Date
+0
+42
+42.42
+1
+0
+\N
+42
+42.42
+1
+0
+
+42
+42.42
+true
+e10
+\N
+42
+42.42
+true
+e10
+\N
+42
+\N
+1
+\N
diff --git a/tests/queries/0_stateless/03034_dynamic_conversions.sql b/tests/queries/0_stateless/03034_dynamic_conversions.sql
index e9b4944f5d8..ed75fbf2377 100644
--- a/tests/queries/0_stateless/03034_dynamic_conversions.sql
+++ b/tests/queries/0_stateless/03034_dynamic_conversions.sql
@@ -22,3 +22,13 @@ select multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(numb
 select multiIf(number % 4 == 0, number, number % 4 == 1, toDate(number), number % 4 == 2, range(number), NULL)::Dynamic(max_types=4)::Dynamic(max_types=3) as d, dynamicType(d) from numbers(6);
 
 
+create table test (d Dynamic) engine = Memory;
+insert into test values (NULL), (42), ('42.42'), (true), ('e10');
+select d::Float64 from test;
+select d::Nullable(Float64) from test;
+select d::String from test;
+select d::Nullable(String) from test;
+select d::UInt64 from test; -- {serverError CANNOT_PARSE_TEXT}
+select d::Nullable(UInt64) from test;
+select d::Date from test; -- {serverError CANNOT_PARSE_DATE}
+
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference
index fff812f0396..0a647b41c4b 100644
--- a/tests/queries/0_stateless/03037_dynamic_merges_1.reference
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1.reference
@@ -1,5 +1,5 @@
 MergeTree compact + horizontal merge
-test1
+test
 50000	DateTime
 60000	Date
 70000	Array(UInt16)
@@ -20,8 +20,8 @@ test1
 200000	Map(UInt64, UInt64)
 260000	String
 10000	Tuple(UInt64, UInt64)
-100000	UInt64
 100000	None
+100000	UInt64
 200000	Map(UInt64, UInt64)
 260000	String
 100000	None
@@ -29,7 +29,7 @@ test1
 200000	Map(UInt64, UInt64)
 270000	String
 MergeTree wide + horizontal merge
-test1
+test
 50000	DateTime
 60000	Date
 70000	Array(UInt16)
@@ -41,8 +41,8 @@ test1
 100000	UInt64
 190000	String
 70000	Array(UInt16)
-100000	UInt64
 100000	None
+100000	UInt64
 190000	String
 200000	Map(UInt64, UInt64)
 100000	None
@@ -50,8 +50,8 @@ test1
 200000	Map(UInt64, UInt64)
 260000	String
 10000	Tuple(UInt64, UInt64)
-100000	UInt64
 100000	None
+100000	UInt64
 200000	Map(UInt64, UInt64)
 260000	String
 100000	None
@@ -59,7 +59,7 @@ test1
 200000	Map(UInt64, UInt64)
 270000	String
 MergeTree compact + vertical merge
-test1
+test
 50000	DateTime
 60000	Date
 70000	Array(UInt16)
@@ -71,8 +71,8 @@ test1
 100000	UInt64
 190000	String
 70000	Array(UInt16)
-100000	UInt64
 100000	None
+100000	UInt64
 190000	String
 200000	Map(UInt64, UInt64)
 100000	None
@@ -84,12 +84,12 @@ test1
 100000	UInt64
 200000	Map(UInt64, UInt64)
 260000	String
-100000	UInt64
 100000	None
+100000	UInt64
 200000	Map(UInt64, UInt64)
 270000	String
 MergeTree wide + vertical merge
-test1
+test
 50000	DateTime
 60000	Date
 70000	Array(UInt16)
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1.sh
index cf524fb9393..056f6702727 100755
--- a/tests/queries/0_stateless/03037_dynamic_merges_1.sh
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1.sh
@@ -21,35 +21,36 @@ function test()
     $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
     $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
 
     $CH_CLIENT -q "system stop merges test"
     $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
 
     $CH_CLIENT -q "system stop merges test"
     $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
 }
 
 $CH_CLIENT -q "drop table if exists test;"
 
 echo "MergeTree compact + horizontal merge"
-$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;"
 test
 $CH_CLIENT -q "drop table test;"
 
 echo "MergeTree wide + horizontal merge"
-$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;"
 test
 $CH_CLIENT -q "drop table test;"
 
+
 echo "MergeTree compact + vertical merge"
 $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
 test
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference
new file mode 100644
index 00000000000..420b8185b16
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2.reference
@@ -0,0 +1,20 @@
+MergeTree compact + horizontal merge
+test
+1000000	Array(UInt16)
+1000000	String
+1000000	UInt64
+MergeTree wide + horizontal merge
+test
+1000000	Array(UInt16)
+1000000	String
+1000000	UInt64
+MergeTree compact + vertical merge
+test
+1000000	Array(UInt16)
+1000000	String
+1000000	UInt64
+MergeTree wide + vertical merge
+test
+1000000	Array(UInt16)
+1000000	String
+1000000	UInt64
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh
index e9d571c2104..40adbdd4262 100755
--- a/tests/queries/0_stateless/03037_dynamic_merges_2.sh
+++ b/tests/queries/0_stateless/03037_dynamic_merges_2.sh
@@ -19,7 +19,7 @@ function test()
     $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)"
 
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
 }
 
 $CH_CLIENT -q "drop table if exists test;"
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled
similarity index 100%
rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh
rename to tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled

From 671650bd2eaf2a07d5e6f517b40905c71ce798b6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sun, 28 Apr 2024 12:18:24 +0200
Subject: [PATCH 087/392] Cleanup

---
 src/Backups/BackupIO_AzureBlobStorage.cpp        |  4 ++--
 src/Storages/ObjectStorage/Azure/Configuration.h | 16 ++++++++++------
 .../ObjectStorage/DataLakes/IStorageDataLake.h   |  4 +---
 src/Storages/ObjectStorage/HDFS/Configuration.h  | 11 +++++++----
 src/Storages/ObjectStorage/S3/Configuration.h    | 10 ++++++----
 .../ObjectStorage/StorageObjectStorage.cpp       | 16 +++-------------
 .../ObjectStorage/StorageObjectStorage.h         |  5 +----
 .../ObjectStorage/StorageObjectStorageSource.cpp |  9 ++++-----
 .../ObjectStorage/StorageObjectStorageSource.h   |  5 +----
 .../registerStorageObjectStorage.cpp             |  3 +--
 src/Storages/S3Queue/StorageS3Queue.cpp          |  6 ++----
 11 files changed, 38 insertions(+), 51 deletions(-)

diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp
index f00da686c18..3af66e5470f 100644
--- a/src/Backups/BackupIO_AzureBlobStorage.cpp
+++ b/src/Backups/BackupIO_AzureBlobStorage.cpp
@@ -36,7 +36,7 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    auto client_ptr = configuration.createClient(/* is_read_only */ false, /* attempt_to_create_container */true);
+    auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true);
     object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
                                                           std::move(client_ptr),
                                                           configuration.createSettings(context_),
@@ -121,7 +121,7 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
     , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false}
     , configuration(configuration_)
 {
-    auto client_ptr = configuration.createClient(/* is_read_only */ false, attempt_to_create_container);
+    auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container);
     object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
                                                           std::move(client_ptr),
                                                           configuration.createSettings(context_),
diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h
index 91a9a0bbbd5..1591cb42469 100644
--- a/src/Storages/ObjectStorage/Azure/Configuration.h
+++ b/src/Storages/ObjectStorage/Azure/Configuration.h
@@ -3,7 +3,6 @@
 #include "config.h"
 
 #if USE_AZURE_BLOB_STORAGE
-
 #include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <filesystem>
@@ -36,20 +35,25 @@ public:
     const Paths & getPaths() const override { return blobs_paths; }
     void setPaths(const Paths & paths) override { blobs_paths = paths; }
 
-    String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; }
     String getNamespace() const override { return container; }
+    String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; }
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
-    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     ConfigurationPtr clone() override { return std::make_shared<StorageAzureConfiguration>(*this); }
 
-    void fromNamedCollection(const NamedCollection & collection) override;
-    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override;
+
     void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
+        ASTs & args,
+        const String & structure_,
+        const String & format_,
+        ContextPtr context) override;
 
 protected:
+    void fromNamedCollection(const NamedCollection & collection) override;
+    void fromAST(ASTs & args, ContextPtr context, bool with_structure) override;
+
     using AzureClient = Azure::Storage::Blobs::BlobContainerClient;
     using AzureClientPtr = std::unique_ptr<Azure::Storage::Blobs::BlobContainerClient>;
 
diff --git a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
index 3119b844aaf..83865c47eb8 100644
--- a/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
+++ b/src/Storages/ObjectStorage/DataLakes/IStorageDataLake.h
@@ -38,7 +38,7 @@ public:
         std::optional<FormatSettings> format_settings_,
         LoadingStrictnessLevel mode)
     {
-        auto object_storage = base_configuration->createObjectStorage(context);
+        auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true);
         DataLakeMetadataPtr metadata;
         NamesAndTypesList schema_from_metadata;
 
@@ -96,8 +96,6 @@ public:
 
     void updateConfiguration(ContextPtr local_context) override
     {
-        std::lock_guard lock(Storage::configuration_update_mutex);
-
         Storage::updateConfiguration(local_context);
 
         auto new_metadata = DataLakeMetadata::create(Storage::object_storage, base_configuration, local_context);
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index cac09ee1d92..dc06e754c44 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -28,19 +28,22 @@ public:
 
     const Paths & getPaths() const override { return paths; }
     void setPaths(const Paths & paths_) override { paths = paths_; }
+    std::string getPathWithoutGlobs() const override;
 
     String getNamespace() const override { return ""; }
     String getDataSourceDescription() override { return url; }
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
-    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
     ConfigurationPtr clone() override { return std::make_shared<StorageHDFSConfiguration>(*this); }
 
-    void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure_, const String & format_, ContextPtr context) override;
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override;
 
-    std::string getPathWithoutGlobs() const override;
+    void addStructureAndFormatToArgs(
+        ASTs & args,
+        const String & structure_,
+        const String & format_,
+        ContextPtr context) override;
 
 private:
     void fromNamedCollection(const NamedCollection &) override;
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index 9eb724c4a64..b28b1c226a7 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -3,7 +3,6 @@
 #include "config.h"
 
 #if USE_AWS_S3
-
 #include <Storages/StorageS3Settings.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 
@@ -35,13 +34,16 @@ public:
 
     void check(ContextPtr context) const override;
     void validateNamespace(const String & name) const override;
-
     ConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
     bool isStaticConfiguration() const override { return static_configuration; }
 
-    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) override; /// NOLINT
+    ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) override;
+
     void addStructureAndFormatToArgs(
-        ASTs & args, const String & structure, const String & format, ContextPtr context) override;
+        ASTs & args,
+        const String & structure,
+        const String & format,
+        ContextPtr context) override;
 
 private:
     void fromNamedCollection(const NamedCollection & collection) override;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 2c9831f0d29..a187a8fc54d 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -91,6 +91,7 @@ bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) c
 
 void StorageObjectStorage::updateConfiguration(ContextPtr context)
 {
+    /// FIXME: we should be able to update everything apart from client if static_configuration == true.
     if (!configuration->isStaticConfiguration())
         object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
 }
@@ -113,7 +114,6 @@ public:
         const std::optional<DB::FormatSettings> & format_settings_,
         bool distributed_processing_,
         ReadFromFormatInfo info_,
-        SchemaCache & schema_cache_,
         const bool need_only_count_,
         ContextPtr context_,
         size_t max_block_size_,
@@ -121,11 +121,9 @@ public:
         : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
         , object_storage(object_storage_)
         , configuration(configuration_)
-        , schema_cache(schema_cache_)
         , info(std::move(info_))
         , virtual_columns(virtual_columns_)
         , format_settings(format_settings_)
-        , query_settings(configuration->getQuerySettings(context_))
         , name(name_ + "Source")
         , need_only_count(need_only_count_)
         , max_block_size(max_block_size_)
@@ -154,8 +152,8 @@ public:
         for (size_t i = 0; i < num_streams; ++i)
         {
             auto source = std::make_shared<StorageObjectStorageSource>(
-                getName(), object_storage, configuration, info, format_settings, query_settings,
-                context, max_block_size, iterator_wrapper, need_only_count, schema_cache);
+                getName(), object_storage, configuration, info, format_settings,
+                context, max_block_size, iterator_wrapper, need_only_count);
 
             source->setKeyCondition(filter_actions_dag, context);
             pipes.emplace_back(std::move(source));
@@ -175,12 +173,10 @@ private:
     ObjectStoragePtr object_storage;
     ConfigurationPtr configuration;
     std::shared_ptr<StorageObjectStorageSource::IIterator> iterator_wrapper;
-    SchemaCache & schema_cache;
 
     const ReadFromFormatInfo info;
     const NamesAndTypesList virtual_columns;
     const std::optional<DB::FormatSettings> format_settings;
-    const StorageObjectStorage::QuerySettings query_settings;
     const String name;
     const bool need_only_count;
     const size_t max_block_size;
@@ -233,7 +229,6 @@ void StorageObjectStorage::read(
         format_settings,
         distributed_processing,
         read_from_format_info,
-        getSchemaCache(local_context),
         need_only_count,
         local_context,
         max_block_size,
@@ -371,11 +366,6 @@ std::pair<ColumnsDescription, std::string> StorageObjectStorage::resolveSchemaAn
     return std::pair(columns, format);
 }
 
-SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context)
-{
-    return getSchemaCache(context, configuration->getTypeName());
-}
-
 SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name)
 {
     if (storage_type_name == "s3")
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 46d422b26c2..3f8ff79ad54 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -92,8 +92,6 @@ public:
 
     bool parallelizeOutputAfterReading(ContextPtr context) const override;
 
-    SchemaCache & getSchemaCache(const ContextPtr & context);
-
     static SchemaCache & getSchemaCache(const ContextPtr & context, const std::string & storage_type_name);
 
     static ColumnsDescription resolveSchemaFromData(
@@ -132,7 +130,6 @@ protected:
     const bool distributed_processing;
 
     LoggerPtr log;
-    std::mutex configuration_update_mutex;
 };
 
 class StorageObjectStorage::Configuration
@@ -175,7 +172,7 @@ public:
     virtual void check(ContextPtr context) const;
     virtual void validateNamespace(const String & /* name */) const {}
 
-    virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly = true) = 0; /// NOLINT
+    virtual ObjectStoragePtr createObjectStorage(ContextPtr context, bool is_readonly) = 0;
     virtual ConfigurationPtr clone() = 0;
     virtual bool isStaticConfiguration() const { return true; }
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index b224afb7a58..cb3f732ce83 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -44,19 +44,16 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     ConfigurationPtr configuration_,
     const ReadFromFormatInfo & info,
     std::optional<FormatSettings> format_settings_,
-    const StorageObjectStorage::QuerySettings & query_settings_,
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IIterator> file_iterator_,
-    bool need_only_count_,
-    SchemaCache & schema_cache_)
+    bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
     , name(std::move(name_))
     , object_storage(object_storage_)
     , configuration(configuration_)
     , format_settings(format_settings_)
-    , query_settings(query_settings_)
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
     , read_from_format_info(info)
@@ -67,7 +64,7 @@ StorageObjectStorageSource::StorageObjectStorageSource(
         1/* max_threads */))
     , columns_desc(info.columns_description)
     , file_iterator(file_iterator_)
-    , schema_cache(schema_cache_)
+    , schema_cache(StorageObjectStorage::getSchemaCache(context_, configuration->getTypeName()))
     , create_reader_scheduler(threadPoolCallbackRunnerUnsafe<ReaderHolder>(*create_reader_pool, "Reader"))
 {
 }
@@ -229,6 +226,8 @@ std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const O
 StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReader(size_t processor)
 {
     ObjectInfoPtr object_info;
+    auto query_settings = configuration->getQuerySettings(getContext());
+
     do
     {
         object_info = file_iterator->next(processor);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 356478422bc..a8df00bc0ac 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -32,12 +32,10 @@ public:
         ConfigurationPtr configuration,
         const ReadFromFormatInfo & info,
         std::optional<FormatSettings> format_settings_,
-        const StorageObjectStorage::QuerySettings & query_settings_,
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IIterator> file_iterator_,
-        bool need_only_count_,
-        SchemaCache & schema_cache_);
+        bool need_only_count_);
 
     ~StorageObjectStorageSource() override;
 
@@ -62,7 +60,6 @@ protected:
     ObjectStoragePtr object_storage;
     const ConfigurationPtr configuration;
     const std::optional<FormatSettings> format_settings;
-    const StorageObjectStorage::QuerySettings query_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
     const ReadFromFormatInfo read_from_format_info;
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index c23b180215e..74c8aeaad7d 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -2,7 +2,6 @@
 #include <Storages/ObjectStorage/S3/Configuration.h>
 #include <Storages/ObjectStorage/HDFS/Configuration.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/StorageFactory.h>
 #include <Formats/FormatFactory.h>
 
@@ -59,7 +58,7 @@ static std::shared_ptr<StorageObjectStorage> createStorageObjectStorage(
 
     return std::make_shared<StorageObjectStorage>(
         configuration,
-        configuration->createObjectStorage(context),
+        configuration->createObjectStorage(context, /* is_readonly */false),
         args.getContext(),
         args.table_id,
         args.columns,
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 38934a7895a..b9c67c7d801 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -138,7 +138,7 @@ StorageS3Queue::StorageS3Queue(
 
     checkAndAdjustSettings(*s3queue_settings, context_->getSettingsRef());
 
-    object_storage = configuration->createObjectStorage(context_);
+    object_storage = configuration->createObjectStorage(context_, /* is_readonly */true);
     FormatFactory::instance().checkFormatName(configuration->format);
     configuration->check(context_);
 
@@ -361,12 +361,10 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         configuration,
         info,
         format_settings,
-        configuration->getQuerySettings(local_context),
         local_context,
         max_block_size,
         file_iterator,
-        false,
-        StorageObjectStorage::getSchemaCache(local_context, configuration->getTypeName()));
+        false);
 
     auto file_deleter = [=, this](const std::string & path) mutable
     {

From 1ccae23170f7668b56a44cb3063e86530f32ce10 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 29 Apr 2024 17:05:31 +0000
Subject: [PATCH 088/392] Fix alter modify column for dynamic columns, make
 check part work for dynamic columns, fix style errors and tests

---
 src/Columns/ColumnDynamic.cpp                 |  5 ---
 src/Columns/ColumnDynamic.h                   |  7 +---
 src/Core/SettingsChangesHistory.h             |  2 +
 src/DataTypes/DataTypeVariant.cpp             |  2 -
 src/DataTypes/IDataType.h                     |  6 +++
 src/DataTypes/Serializations/ISerialization.h | 10 +++--
 .../Serializations/SerializationArray.cpp     |  2 +-
 .../Serializations/SerializationDynamic.cpp   | 32 ++++++--------
 .../SerializationDynamicElement.cpp           | 12 +++---
 .../Serializations/SerializationMap.cpp       |  2 +-
 .../Serializations/SerializationTuple.cpp     |  4 +-
 .../Serializations/SerializationVariant.cpp   | 22 ++++++++--
 .../Serializations/SerializationVariant.h     |  8 ++++
 .../SerializationVariantElement.cpp           |  4 +-
 src/Functions/FunctionsConversion.cpp         | 29 ++++---------
 src/Functions/dynamicElement.cpp              | 42 +++++++++++--------
 src/Functions/variantElement.cpp              |  4 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  6 ++-
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  4 +-
 .../MergeTreeDataPartWriterCompact.cpp        | 28 +++++++++----
 .../MergeTreeDataPartWriterCompact.h          |  4 +-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp | 40 +++++++++++-------
 .../MergeTree/MergeTreeDataPartWriterWide.h   |  4 +-
 .../MergeTree/MergeTreeReaderWide.cpp         |  2 +-
 src/Storages/MergeTree/MutateTask.cpp         | 21 +++++++++-
 src/Storages/MergeTree/checkDataPart.cpp      |  2 +-
 ....disabled => 03040_dynamic_type_alters.sh} |  0
 27 files changed, 180 insertions(+), 124 deletions(-)
 rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh.disabled => 03040_dynamic_type_alters.sh} (100%)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 3074504973a..f3dff01af25 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -10,7 +10,6 @@
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <Interpreters/castColumn.h>
 
-#include <Common/logger_useful.h>
 
 namespace DB
 {
@@ -687,7 +686,6 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so
             }
 
             size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name);
-            LOG_DEBUG(getLogger("ColumnDynamic"), "Source variant: {}. Variant: {}. Size: {}", source_variant_info.variant_name, variant_name, size);
             it->second += size;
         }
     }
@@ -700,10 +698,7 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & so
         std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
         variants_with_sizes.reserve(all_variants.size());
         for (const auto & variant : all_variants)
-        {
-            LOG_DEBUG(getLogger("ColumnDynamic"), "Variant: {}. Size: {}", variant->getName(), total_sizes[variant->getName()]);
             variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant);
-        }
         std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
 
         /// Take first max_dynamic_types variants from sorted list.
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index 7487a5aa0db..b5167f4b9d9 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -9,11 +9,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
 /**
  * Column for storing Dynamic type values.
  * Dynamic column allows to insert and store values of any data types inside.
@@ -340,7 +335,7 @@ private:
     /// Combine current variant with the other variant and return global discriminators mapping
     /// from other variant to the combined one. It's used for inserting from
     /// different variants.
-    /// Returns nullptr if maximum number of Variants is reached and tne new Variant cannot be created.
+    /// Returns nullptr if maximum number of Variants is reached and the new Variant cannot be created.
     std::vector<UInt8> * combineVariants(const VariantInfo & other_variant_info);
 
     void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type);
diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index d3b5de06e70..42cda26d73c 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -85,6 +85,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.5", {{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
+              {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}}},
     {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
               {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"},
               {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp
index b918b79a2ed..6478bd598f1 100644
--- a/src/DataTypes/DataTypeVariant.cpp
+++ b/src/DataTypes/DataTypeVariant.cpp
@@ -7,7 +7,6 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/FieldToDataType.h>
 #include <Common/assert_cast.h>
-#include <IO/WriteHelpers.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Parsers/IAST.h>
@@ -18,7 +17,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int BAD_ARGUMENTS;
-    extern const int EMPTY_DATA_PASSED;
 }
 
 
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index dde61ca3a48..46c30240ef8 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -11,6 +11,12 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+
 class ReadBuffer;
 class WriteBuffer;
 
diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index ddbed34f614..b233230f9cc 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -138,9 +138,9 @@ public:
             return *this;
         }
 
-        SubstreamData & withDeserializePrefix(DeserializeBinaryBulkStatePtr deserialize_prefix_state_)
+        SubstreamData & withDeserializeState(DeserializeBinaryBulkStatePtr deserialize_state_)
         {
-            deserialize_prefix_state = std::move(deserialize_prefix_state_);
+            deserialize_state = std::move(deserialize_state_);
             return *this;
         }
 
@@ -148,7 +148,11 @@ public:
         DataTypePtr type;
         ColumnPtr column;
         SerializationInfoPtr serialization_info;
-        DeserializeBinaryBulkStatePtr deserialize_prefix_state;
+
+        /// For types with dynamic subcolumns deserialize state contains information
+        /// about current dynamic structure. And this information can be useful
+        /// when we call enumerateStreams to enumerate dynamic streams.
+        DeserializeBinaryBulkStatePtr deserialize_state;
     };
 
     struct Substream
diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp
index 6a8555a3714..ac7b8f4d084 100644
--- a/src/DataTypes/Serializations/SerializationArray.cpp
+++ b/src/DataTypes/Serializations/SerializationArray.cpp
@@ -255,7 +255,7 @@ void SerializationArray::enumerateStreams(
         .withType(type_array ? type_array->getNestedType() : nullptr)
         .withColumn(column_array ? column_array->getDataPtr() : nullptr)
         .withSerializationInfo(data.serialization_info)
-        .withDeserializePrefix(data.deserialize_prefix_state);
+        .withDeserializeState(data.deserialize_state);
 
     nested->enumerateStreams(settings, callback, next_data);
     settings.path.pop_back();
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 858445ed257..5e6106f560f 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -1,4 +1,5 @@
 #include <DataTypes/Serializations/SerializationDynamic.h>
+#include <DataTypes/Serializations/SerializationVariant.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeVariant.h>
@@ -30,15 +31,9 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB
     ISerialization::SerializeBinaryBulkStatePtr variant_state;
 
     /// Variants statistics. Map (Variant name) -> (Variant size).
-    ColumnDynamic::Statistics statistics = { .source =ColumnDynamic::Statistics::Source::READ };
+    ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ };
 
     SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
-
-    void updateStatistics(const ColumnVariant & column_variant)
-    {
-        for (size_t i = 0; i != variant_names.size(); ++i)
-            statistics.data[variant_names[i]] += column_variant.getVariantPtrByGlobalDiscriminator(i)->size();
-    }
 };
 
 struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState
@@ -58,13 +53,13 @@ void SerializationDynamic::enumerateStreams(
     settings.path.pop_back();
 
     const auto * column_dynamic = data.column ? &assert_cast<const ColumnDynamic &>(*data.column) : nullptr;
-    const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateDynamic>(data.deserialize_prefix_state) : nullptr;
+    const auto * deserialize_state = data.deserialize_state ? checkAndGetState<DeserializeBinaryBulkStateDynamic>(data.deserialize_state) : nullptr;
 
-    /// If column is nullptr and we didn't deserizlize prefix yet, nothing to enumerate as we don't have any variants.
-    if (!column_dynamic && !deserialize_prefix_state)
+    /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants.
+    if (!column_dynamic && !deserialize_state)
         return;
 
-    const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(deserialize_prefix_state->structure_state)->variant_type;
+    const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState<DeserializeBinaryBulkStateDynamicStructure>(deserialize_state->structure_state)->variant_type;
     auto variant_serialization = variant_type->getDefaultSerialization();
 
     settings.path.push_back(Substream::DynamicData);
@@ -72,7 +67,7 @@ void SerializationDynamic::enumerateStreams(
                          .withType(variant_type)
                          .withColumn(column_dynamic ? column_dynamic->getVariantColumnPtr() : nullptr)
                          .withSerializationInfo(data.serialization_info)
-                         .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_state : nullptr);
+                         .withDeserializeState(deserialize_state ? deserialize_state->variant_state : nullptr);
     settings.path.back().data = variant_data;
     variant_serialization->enumerateStreams(settings, callback, variant_data);
     settings.path.pop_back();
@@ -124,11 +119,11 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
         {
             size_t size = 0;
             /// Use statistics from column if it was created during merge.
-            if (statistics.data.empty() || statistics.source != ColumnDynamic::Statistics::Source::MERGE)
-                size = variant_column.getVariantByGlobalDiscriminator(i).size();
+            if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE)
+                size = statistics.data.at(variant_info.variant_names[i]);
             /// Otherwise we can use only variant sizes from current column.
             else
-                size = statistics.data.at(variant_info.variant_names[i]);
+                size = variant_column.getVariantByGlobalDiscriminator(i).size();
             writeVarUInt(size, *stream);
         }
     }
@@ -243,12 +238,9 @@ void SerializationDynamic::serializeBinaryBulkWithMultipleStreams(
     if (!variant_info.variant_type->equals(*dynamic_state->variant_type))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Mismatch of internal columns of Dynamic. Expected: {}, Got: {}", dynamic_state->variant_type->getName(), variant_info.variant_type->getName());
 
-    /// Update statistics.
-    if (offset == 0)
-        dynamic_state->updateStatistics(*variant_column);
-
     settings.path.push_back(Substream::DynamicData);
-    dynamic_state->variant_serialization->serializeBinaryBulkWithMultipleStreams(*variant_column, offset, limit, settings, dynamic_state->variant_state);
+    assert_cast<const SerializationVariant &>(*dynamic_state->variant_serialization)
+        .serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(*variant_column, offset, limit, settings, dynamic_state->variant_state, dynamic_state->statistics.data);
     settings.path.pop_back();
 }
 
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
index 9be9802d926..059a7d57e4e 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -33,21 +33,21 @@ void SerializationDynamicElement::enumerateStreams(
 
     /// If we didn't deserialize prefix yet, we don't know if we actually have this variant in Dynamic column,
     /// so we cannot enumerate variant streams.
-    if (!data.deserialize_prefix_state)
+    if (!data.deserialize_state)
         return;
 
-    auto * deserialize_prefix_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(data.deserialize_prefix_state);
+    auto * deserialize_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(data.deserialize_state);
     /// If we don't have this variant, no need to enumerate streams for it as we won't read from any stream.
-    if (!deserialize_prefix_state->variant_serialization)
+    if (!deserialize_state->variant_serialization)
         return;
 
     settings.path.push_back(Substream::DynamicData);
-    auto variant_data = SubstreamData(deserialize_prefix_state->variant_serialization)
+    auto variant_data = SubstreamData(deserialize_state->variant_serialization)
                             .withType(data.type)
                             .withColumn(data.column)
                             .withSerializationInfo(data.serialization_info)
-                            .withDeserializePrefix(deserialize_prefix_state->variant_element_state);
-    deserialize_prefix_state->variant_serialization->enumerateStreams(settings, callback, variant_data);
+                            .withDeserializeState(deserialize_state->variant_element_state);
+    deserialize_state->variant_serialization->enumerateStreams(settings, callback, variant_data);
     settings.path.pop_back();
 }
 
diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp
index cda82f31820..10635fb9142 100644
--- a/src/DataTypes/Serializations/SerializationMap.cpp
+++ b/src/DataTypes/Serializations/SerializationMap.cpp
@@ -399,7 +399,7 @@ void SerializationMap::enumerateStreams(
         .withType(data.type ? assert_cast<const DataTypeMap &>(*data.type).getNestedType() : nullptr)
         .withColumn(data.column ? assert_cast<const ColumnMap &>(*data.column).getNestedColumnPtr() : nullptr)
         .withSerializationInfo(data.serialization_info)
-        .withDeserializePrefix(data.deserialize_prefix_state);
+        .withDeserializeState(data.deserialize_state);
 
     nested->enumerateStreams(settings, callback, next_data);
 }
diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp
index 6e4b4c4c533..ef0a75fac40 100644
--- a/src/DataTypes/Serializations/SerializationTuple.cpp
+++ b/src/DataTypes/Serializations/SerializationTuple.cpp
@@ -567,7 +567,7 @@ void SerializationTuple::enumerateStreams(
     const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
     const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
     const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
-    const auto * tuple_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateTuple>(data.deserialize_prefix_state) : nullptr;
+    const auto * tuple_deserialize_state = data.deserialize_state ? checkAndGetState<DeserializeBinaryBulkStateTuple>(data.deserialize_state) : nullptr;
 
     for (size_t i = 0; i < elems.size(); ++i)
     {
@@ -575,7 +575,7 @@ void SerializationTuple::enumerateStreams(
             .withType(type_tuple ? type_tuple->getElement(i) : nullptr)
             .withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr)
             .withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr)
-            .withDeserializePrefix(tuple_deserialize_prefix_state ? tuple_deserialize_prefix_state->states[i] : nullptr);
+            .withDeserializeState(tuple_deserialize_state ? tuple_deserialize_state->states[i] : nullptr);
 
         elems[i]->enumerateStreams(settings, callback, next_data);
     }
diff --git a/src/DataTypes/Serializations/SerializationVariant.cpp b/src/DataTypes/Serializations/SerializationVariant.cpp
index 8e0ef112444..9456ffa3ad3 100644
--- a/src/DataTypes/Serializations/SerializationVariant.cpp
+++ b/src/DataTypes/Serializations/SerializationVariant.cpp
@@ -45,7 +45,7 @@ void SerializationVariant::enumerateStreams(
 {
     const auto * type_variant = data.type ? &assert_cast<const DataTypeVariant &>(*data.type) : nullptr;
     const auto * column_variant = data.column ? &assert_cast<const ColumnVariant &>(*data.column) : nullptr;
-    const auto * variant_deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateVariant>(data.deserialize_prefix_state) : nullptr;
+    const auto * variant_deserialize_state = data.deserialize_state ? checkAndGetState<DeserializeBinaryBulkStateVariant>(data.deserialize_state) : nullptr;
 
     auto discriminators_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<ColumnVariant::Discriminator>>(), "discr", SubstreamType::NamedVariantDiscriminators);
     auto local_discriminators = column_variant ? column_variant->getLocalDiscriminatorsPtr() : nullptr;
@@ -71,7 +71,7 @@ void SerializationVariant::enumerateStreams(
                              .withType(type_variant ? type_variant->getVariant(i) : nullptr)
                              .withColumn(column_variant ? column_variant->getVariantPtrByGlobalDiscriminator(i) : nullptr)
                              .withSerializationInfo(data.serialization_info)
-                             .withDeserializePrefix(variant_deserialize_prefix_state ? variant_deserialize_prefix_state->states[i] : nullptr);
+                             .withDeserializeState(variant_deserialize_state ? variant_deserialize_state->states[i] : nullptr);
 
         addVariantElementToPath(settings.path, i);
         settings.path.back().data = variant_data;
@@ -144,12 +144,13 @@ void SerializationVariant::deserializeBinaryBulkStatePrefix(
 }
 
 
-void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
+void SerializationVariant::serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
     const IColumn & column,
     size_t offset,
     size_t limit,
     SerializeBinaryBulkSettings & settings,
-    SerializeBinaryBulkStatePtr & state) const
+    SerializeBinaryBulkStatePtr & state,
+    std::unordered_map<String, size_t> & variants_statistics) const
 {
     const ColumnVariant & col = assert_cast<const ColumnVariant &>(column);
     if (const size_t size = col.size(); limit == 0 || offset + limit > size)
@@ -188,6 +189,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
         {
             addVariantElementToPath(settings.path, i);
             variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
+            variants_statistics[variant_names[i]] += col.getVariantByGlobalDiscriminator(i).size();
             settings.path.pop_back();
         }
         settings.path.pop_back();
@@ -208,6 +210,7 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
         addVariantElementToPath(settings.path, non_empty_global_discr);
         /// We can use the same offset/limit as for whole Variant column
         variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
+        variants_statistics[variant_names[non_empty_global_discr]] += limit;
         settings.path.pop_back();
         settings.path.pop_back();
         return;
@@ -247,12 +250,23 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
                 variant_offsets_and_limits[i].second,
                 settings,
                 variant_state->states[i]);
+            variants_statistics[variant_names[i]] += variant_offsets_and_limits[i].second;
             settings.path.pop_back();
         }
     }
     settings.path.pop_back();
 }
 
+void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
+    const DB::IColumn & column,
+    size_t offset,
+    size_t limit,
+    DB::ISerialization::SerializeBinaryBulkSettings & settings,
+    DB::ISerialization::SerializeBinaryBulkStatePtr & state) const
+{
+    std::unordered_map<String, size_t> tmp_statistics;
+    serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(column, offset, limit, settings, state, tmp_statistics);
+}
 
 void SerializationVariant::deserializeBinaryBulkWithMultipleStreams(
     ColumnPtr & column,
diff --git a/src/DataTypes/Serializations/SerializationVariant.h b/src/DataTypes/Serializations/SerializationVariant.h
index 0de786f5561..b6aa1534538 100644
--- a/src/DataTypes/Serializations/SerializationVariant.h
+++ b/src/DataTypes/Serializations/SerializationVariant.h
@@ -69,6 +69,14 @@ public:
         SerializeBinaryBulkSettings & settings,
         SerializeBinaryBulkStatePtr & state) const override;
 
+    void serializeBinaryBulkWithMultipleStreamsAndUpdateVariantStatistics(
+        const IColumn & column,
+        size_t offset,
+        size_t limit,
+        SerializeBinaryBulkSettings & settings,
+        SerializeBinaryBulkStatePtr & state,
+        std::unordered_map<String, size_t> & variants_statistics) const;
+
     void deserializeBinaryBulkWithMultipleStreams(
         ColumnPtr & column,
         size_t limit,
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp
index 0e1ad81ce5b..dc7fc3b9b35 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp
@@ -38,13 +38,13 @@ void SerializationVariantElement::enumerateStreams(
     callback(settings.path);
     settings.path.pop_back();
 
-    const auto * deserialize_prefix_state = data.deserialize_prefix_state ? checkAndGetState<DeserializeBinaryBulkStateVariantElement>(data.deserialize_prefix_state) : nullptr;
+    const auto * deserialize_state = data.deserialize_state ? checkAndGetState<DeserializeBinaryBulkStateVariantElement>(data.deserialize_state) : nullptr;
     addVariantToPath(settings.path);
     auto nested_data = SubstreamData(nested_serialization)
                        .withType(data.type ? removeNullableOrLowCardinalityNullable(data.type) : nullptr)
                        .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr)
                        .withSerializationInfo(data.serialization_info)
-                       .withDeserializePrefix(deserialize_prefix_state ? deserialize_prefix_state->variant_element_state : nullptr);
+                       .withDeserializeState(deserialize_state ? deserialize_state->variant_element_state : nullptr);
     settings.path.back().data = data;
     nested_serialization->enumerateStreams(settings, callback, data);
     removeVariantFromPath(settings.path);
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 9a8ed03a81d..b01643a9532 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -66,8 +66,6 @@
 #include <Common/assert_cast.h>
 #include <Common/quoteString.h>
 
-#include <Common/logger_useful.h>
-
 namespace DB
 {
 
@@ -4050,9 +4048,9 @@ private:
             casted_variant_columns.reserve(variant_types.size());
             for (size_t i = 0; i != variant_types.size(); ++i)
             {
-                auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i);
+                auto variant_col = column_variant.getVariantPtrByGlobalDiscriminator(i);
                 ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }};
-                const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)];
+                const auto & variant_wrapper = variant_wrappers[i];
                 casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size()));
             }
 
@@ -4062,11 +4060,11 @@ private:
             res->reserve(input_rows_count);
             for (size_t i = 0; i != input_rows_count; ++i)
             {
-                auto local_discr = local_discriminators[i];
-                if (local_discr == ColumnVariant::NULL_DISCRIMINATOR)
+                auto global_discr = column_variant.globalDiscriminatorByLocal(local_discriminators[i]);
+                if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
                     res->insertDefault();
                 else
-                    res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i));
+                    res->insertFrom(*casted_variant_columns[global_discr], column_variant.offsetAt(i));
             }
 
             return res;
@@ -4236,14 +4234,14 @@ private:
         return createColumnToVariantWrapper(from_type, assert_cast<const DataTypeVariant &>(*to_type));
     }
 
-    WrapperType createDynamicToColumnWrapper(const DataTypePtr & to_type) const
+    WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const
     {
-        return [this, to_type]
+        return [this]
                (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * col_nullable, size_t input_rows_count) -> ColumnPtr
         {
             const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
             const auto & variant_info = column_dynamic.getVariantInfo();
-            auto variant_wrapper = createVariantToColumnWrapper(assert_cast<const DataTypeVariant &>(*variant_info.variant_type), to_type);
+            auto variant_wrapper = createVariantToColumnWrapper(assert_cast<const DataTypeVariant &>(*variant_info.variant_type), result_type);
             ColumnsWithTypeAndName args = {ColumnWithTypeAndName(column_dynamic.getVariantColumnPtr(), variant_info.variant_type, "")};
             return variant_wrapper(args, result_type, col_nullable, input_rows_count);
         };
@@ -4279,8 +4277,6 @@ private:
         size_t max_result_num_variants,
         const ColumnDynamic::Statistics & statistics = {}) const
     {
-        LOG_DEBUG(getLogger("FunctionsConversion"), "getReducedVariant for variant {} with size {}", variant_type->getName(), variant_column.size());
-
         const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_type).getVariants();
         /// First check if we don't exceed the limit in current Variant column.
         if (variant_types.size() < max_result_num_variants || (variant_types.size() == max_result_num_variants && variant_name_to_discriminator.contains("String")))
@@ -4296,12 +4292,11 @@ private:
         {
             /// String variant won't be removed.
             String variant_name = variant_types[i]->getName();
-            LOG_DEBUG(getLogger("FunctionsConversion"), "Variant {}/{} size: {}, statistics: {}", variant_name, i, variant_column.getVariantByGlobalDiscriminator(i).size(), statistics.data.contains(variant_name) ? toString(statistics.data.at(variant_name)) : "none");
 
             if (variant_name == "String")
             {
                 old_string_discriminator = i;
-                /// For simplicity, add this variant to the list that will be converted string,
+                /// For simplicity, add this variant to the list that will be converted to string,
                 /// so we will process it with other variants when constructing the new String variant.
                 variants_to_convert_to_string.push_back(i);
             }
@@ -4361,11 +4356,9 @@ private:
         {
             auto string_type = std::make_shared<DataTypeString>();
             auto string_wrapper = prepareUnpackDictionaries(variant_types[discr], string_type);
-            LOG_DEBUG(getLogger("FunctionsConversion"), "Convert variant {} with size {} to String", variant_types[discr]->getName(), variant_column.getVariantPtrByGlobalDiscriminator(discr)->size());
             auto column_to_convert = ColumnWithTypeAndName(variant_column.getVariantPtrByGlobalDiscriminator(discr), variant_types[discr], "");
             ColumnsWithTypeAndName args = {column_to_convert};
             auto variant_string_column = string_wrapper(args, string_type, nullptr, column_to_convert.column->size());
-            LOG_DEBUG(getLogger("FunctionsConversion"), "Got String column with size {}", variant_string_column->size());
             string_variant_size += variant_string_column->size();
             variants_converted_to_string[discr] = variant_string_column;
         }
@@ -4381,11 +4374,9 @@ private:
         new_offsets_data.reserve(variant_column.size());
         const auto & old_local_discriminators = variant_column.getLocalDiscriminators();
         const auto & old_offsets = variant_column.getOffsets();
-        LOG_DEBUG(getLogger("FunctionsConversion"), "Discriminators size: {}. Offsets size: {}", old_local_discriminators.size(), old_offsets.size());
         for (size_t i = 0; i != old_local_discriminators.size(); ++i)
         {
             auto old_discr = variant_column.globalDiscriminatorByLocal(old_local_discriminators[i]);
-            LOG_DEBUG(getLogger("FunctionsConversion"), "Row {}, discriminator {}", i, UInt64(old_discr));
 
             if (old_discr == ColumnVariant::NULL_DISCRIMINATOR)
             {
@@ -4398,12 +4389,10 @@ private:
             new_discriminators_data.push_back(new_discr);
             if (new_discr != string_variant_discriminator)
             {
-                LOG_DEBUG(getLogger("FunctionsConversion"), "Keep variant {}", UInt64(old_discr));
                 new_offsets_data.push_back(old_offsets[i]);
             }
             else
             {
-                LOG_DEBUG(getLogger("FunctionsConversion"), "Get string value of variant {} with String column with size {} at offset {}", UInt64(old_discr), variants_converted_to_string[old_discr]->size(), old_offsets[i]);
                 new_offsets_data.push_back(string_variant->size());
                 string_variant->insertFrom(*variants_converted_to_string[old_discr], old_offsets[i]);
             }
diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp
index 964c058776e..6752a61b6c3 100644
--- a/src/Functions/dynamicElement.cpp
+++ b/src/Functions/dynamicElement.cpp
@@ -149,24 +149,30 @@ private:
 
 REGISTER_FUNCTION(DynamicElement)
 {
-//    factory.registerFunction<FunctionDynamicElement>(FunctionDocumentation{
-//        .description = R"(
-//Extracts a column with specified type from a `Dynamic` column.
-//)",
-//        .syntax{"dynamicElement(dynamic, type_name)"},
-//        .arguments{{
-//            {"dynamic", "Dynamic column"},
-//            {"type_name", "The name of the variant type to extract"}}},
-//        .examples{{{
-//            "Example",
-//            R"(
-//)",
-//            R"(
-//)"}}},
-//        .categories{"Dynamic"},
-//    });
-
-    factory.registerFunction<FunctionDynamicElement>();
+    factory.registerFunction<FunctionDynamicElement>(FunctionDocumentation{
+        .description = R"(
+Extracts a column with specified type from a `Dynamic` column.
+)",
+        .syntax{"dynamicElement(dynamic, type_name)"},
+        .arguments{
+            {"dynamic", "Dynamic column"},
+            {"type_name", "The name of the variant type to extract"}},
+        .examples{{{
+            "Example",
+            R"(
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;)",
+            R"(
+┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐
+│ ᴺᵁᴸᴸ          │ None           │ ᴺᵁᴸᴸ                        │                       ᴺᵁᴸᴸ │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ 42            │ Int64          │ ᴺᵁᴸᴸ                        │                         42 │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ Hello, World! │ String         │ Hello, World!               │                       ᴺᵁᴸᴸ │ []                                │                      ᴺᵁᴸᴸ │ []                                 │
+│ [1,2,3]       │ Array(Int64)   │ ᴺᵁᴸᴸ                        │                       ᴺᵁᴸᴸ │ [1,2,3]                           │                      ᴺᵁᴸᴸ │ []                                 │
+└───────────────┴────────────────┴─────────────────────────────┴────────────────────────────┴───────────────────────────────────┴───────────────────────────┴────────────────────────────────────┘
+)"}}},
+        .categories{"Dynamic"},
+    });
 }
 
 }
diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp
index b57ccb6fee1..e63afc68b34 100644
--- a/src/Functions/variantElement.cpp
+++ b/src/Functions/variantElement.cpp
@@ -171,10 +171,10 @@ REGISTER_FUNCTION(VariantElement)
 Extracts a column with specified type from a `Variant` column.
 )",
         .syntax{"variantElement(variant, type_name, [, default_value])"},
-        .arguments{{
+        .arguments{
             {"variant", "Variant column"},
             {"type_name", "The name of the variant type to extract"},
-            {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}}},
+            {"default_value", "The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional"}},
         .examples{{{
             "Example",
             R"(
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 9107c67afdd..9ef5b58ff91 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -2392,12 +2392,14 @@ void IMergeTreeDataPart::setBrokenReason(const String & message, int code) const
     exception_code = code;
 }
 
-ColumnPtr IMergeTreeDataPart::readColumnSample(const NameAndTypePair & column) const
+ColumnPtr IMergeTreeDataPart::getColumnSample(const NameAndTypePair & column) const
 {
     const size_t total_mark = getMarksCount();
-    if (!total_mark)
+    /// If column doesn't have dynamic subcolumns or part has no data, just create column using it's type.
+    if (!column.type->hasDynamicSubcolumns() || !total_mark)
         return column.type->createColumn();
 
+    /// Otherwise, read sample column with 0 rows from the part, so it will load dynamic structure.
     NamesAndTypesList cols;
     cols.emplace_back(column);
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 78619f216c0..ddfc66cc622 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -166,7 +166,9 @@ public:
     NameAndTypePair getColumn(const String & name) const;
     std::optional<NameAndTypePair> tryGetColumn(const String & column_name) const;
 
-    ColumnPtr readColumnSample(const NameAndTypePair & column) const;
+    /// Get sample column from part. For ordinary columns it just creates column using it's type.
+    /// For columns with dynamic structure it reads sample column with 0 rows from the part.
+    ColumnPtr getColumnSample(const NameAndTypePair & column) const;
 
     const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index d0a685d95fc..e34822ce6df 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -44,18 +44,29 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
 
         marks_source_hashing = std::make_unique<HashingWriteBuffer>(*marks_compressor);
     }
-}
-
-void MergeTreeDataPartWriterCompact::initStreamsIfNeeded(const Block & block)
-{
-    if (!compressed_streams.empty())
-        return;
 
     auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
         auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
-        addStreams(column, block.getByName(column.name).column, compression);
+        addStreams(column, nullptr, compression);
+    }
+}
+
+void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & block)
+{
+    if (is_dynamic_streams_initialized)
+        return;
+
+    is_dynamic_streams_initialized = true;
+    auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
+    for (const auto & column : columns_list)
+    {
+        if (column.type->hasDynamicSubcolumns())
+        {
+            auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
+            addStreams(column, block.getByName(column.name).column, compression);
+        }
     }
 }
 
@@ -155,7 +166,8 @@ void writeColumnSingleGranule(
 
 void MergeTreeDataPartWriterCompact::write(const Block & block, const IColumn::Permutation * permutation)
 {
-    initStreamsIfNeeded(block);
+    /// On first block of data initialize streams for dynamic subcolumns.
+    initDynamicStreamsIfNeeded(block);
 
     /// Fill index granularity for this block
     /// if it's unknown (in case of insert data or horizontal merge,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 1c748803c52..f35479387f6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -44,7 +44,7 @@ private:
 
     void addStreams(const NameAndTypePair & name_and_type, const ColumnPtr & column, const ASTPtr & effective_codec_desc);
 
-    void initStreamsIfNeeded(const Block & block);
+    void initDynamicStreamsIfNeeded(const Block & block);
 
     Block header;
 
@@ -98,6 +98,8 @@ private:
     /// then finally to 'marks_file'.
     std::unique_ptr<CompressedWriteBuffer> marks_compressor;
     std::unique_ptr<HashingWriteBuffer> marks_source_hashing;
+
+    bool is_dynamic_streams_initialized = false;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index c23a9a81cbc..fb7ee9f7fe8 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -89,19 +89,29 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
            indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
            default_codec_, settings_, index_granularity_)
 {
-}
-
-void MergeTreeDataPartWriterWide::initStreamsIfNeeded(const DB::Block & block)
-{
-    if (!column_streams.empty())
-        return;
-
-    block_sample = block.cloneEmpty();
     auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
         auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
-        addStreams(column, block_sample.getByName(column.name).column, compression);
+        addStreams(column, nullptr, compression);
+    }
+}
+
+void MergeTreeDataPartWriterWide::initDynamicStreamsIfNeeded(const DB::Block & block)
+{
+    if (is_dynamic_streams_initialized)
+        return;
+
+    is_dynamic_streams_initialized = true;
+    block_sample = block.cloneEmpty();
+    auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
+    for (const auto & column : columns_list)
+    {
+        if (column.type->hasDynamicSubcolumns())
+        {
+            auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
+            addStreams(column, block_sample.getByName(column.name).column, compression);
+        }
     }
 }
 
@@ -123,6 +133,10 @@ void MergeTreeDataPartWriterWide::addStreams(
         else
             stream_name = full_stream_name;
 
+        /// Shared offsets for Nested type.
+        if (column_streams.contains(stream_name))
+            return;
+
         auto it = stream_name_to_full_name.find(stream_name);
         if (it != stream_name_to_full_name.end() && it->second != full_stream_name)
             throw Exception(ErrorCodes::INCORRECT_FILE_NAME,
@@ -130,10 +144,6 @@ void MergeTreeDataPartWriterWide::addStreams(
                 " It is a collision between a filename for one column and a hash of filename for another column or a bug",
                 stream_name, it->second, full_stream_name);
 
-        /// Shared offsets for Nested type.
-        if (column_streams.contains(stream_name))
-            return;
-
         const auto & subtype = substream_path.back().data.type;
         CompressionCodecPtr compression_codec;
 
@@ -231,7 +241,8 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
 
 void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Permutation * permutation)
 {
-    initStreamsIfNeeded(block);
+    /// On first block of data initialize streams for dynamic subcolumns.
+    initDynamicStreamsIfNeeded(block);
 
     /// Fill index granularity for this block
     /// if it's unknown (in case of insert data or horizontal merge,
@@ -604,7 +615,6 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
                             " index granularity size {}, last rows {}",
                             column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows);
     }
-
 }
 
 void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index ebdd907914f..8343144f2e1 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -87,7 +87,7 @@ private:
         const ColumnPtr & column,
         const ASTPtr & effective_codec_desc);
 
-    void initStreamsIfNeeded(const Block & block);
+    void initDynamicStreamsIfNeeded(const Block & block);
 
     /// Method for self check (used in debug-build only). Checks that written
     /// data and corresponding marks are consistent. Otherwise throws logical
@@ -135,6 +135,8 @@ private:
     size_t rows_written_in_last_mark = 0;
 
     Block block_sample;
+
+    bool is_dynamic_streams_initialized = false;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index d18d5eec975..64ca6132cc4 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -349,7 +349,7 @@ void MergeTreeReaderWide::prefetchForColumn(
         }
     };
 
-    auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializePrefix(deserialize_binary_bulk_state_map[name_and_type.name]);
+    auto data = ISerialization::SubstreamData(serialization).withType(name_and_type.type).withDeserializeState(deserialize_binary_bulk_state_map[name_and_type.name]);
     ISerialization::EnumerateStreamsSettings settings;
     serialization->enumerateStreams(settings, callback, data);
 }
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 5e388d6a8ac..2bbc5bdb3ae 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -60,6 +60,21 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis
     return true;
 }
 
+static bool haveMutationsOfDynamicColumns(const MergeTreeData::DataPartPtr & data_part, const MutationCommands & commands)
+{
+    for (const auto & command : commands)
+    {
+        if (!command.column_name.empty())
+        {
+            auto column = data_part->tryGetColumn(command.column_name);
+            if (column && column->type->hasDynamicSubcolumns())
+                return true;
+        }
+    }
+
+    return false;
+}
+
 static UInt64 getExistingRowsCount(const Block & block)
 {
     auto column = block.getByName(RowExistsColumn::name).column;
@@ -95,7 +110,7 @@ static void splitAndModifyMutationCommands(
     auto part_columns = part->getColumnsDescription();
     const auto & table_columns = metadata_snapshot->getColumns();
 
-    if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
+    if (haveMutationsOfDynamicColumns(part, commands) || !isWidePart(part) || !isFullPartStorage(part->getDataPartStorage()))
     {
         NameSet mutated_columns;
         NameSet dropped_columns;
@@ -2250,7 +2265,9 @@ bool MutateTask::prepare()
 
     /// All columns from part are changed and may be some more that were missing before in part
     /// TODO We can materialize compact part without copying data
-    if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage())
+    /// Also currently mutations of types with dynamic subcolumns in Wide part are possible only by
+    /// rewriting the whole part.
+    if (MutationHelpers::haveMutationsOfDynamicColumns(ctx->source_part, ctx->commands_for_part) || !isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage())
         || (ctx->interpreter && ctx->interpreter->isAffectingAllColumns()))
     {
         /// In case of replicated merge tree with zero copy replication
diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index b4d32e71d0d..fc06bcac823 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -219,7 +219,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
 
                 auto file_name = *stream_name + ".bin";
                 checksums_data.files[file_name] = checksum_compressed_file(data_part_storage, file_name);
-            });
+            }, column.type, data_part->getColumnSample(column));
         }
     }
     else
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled b/tests/queries/0_stateless/03040_dynamic_type_alters.sh
similarity index 100%
rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh.disabled
rename to tests/queries/0_stateless/03040_dynamic_type_alters.sh

From df92f422376173ba93228760d5c210dc21b4c128 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 30 Apr 2024 18:45:19 +0000
Subject: [PATCH 089/392] Fix tests, improve dynamic/variantElement functions,
 add more comments

---
 src/Columns/ColumnArray.cpp                   |  2 +-
 src/Columns/ColumnConst.cpp                   |  9 -------
 src/Columns/ColumnConst.h                     |  2 --
 src/Columns/ColumnDynamic.cpp                 |  9 +++----
 src/Columns/ColumnDynamic.h                   | 19 ++++++++-----
 src/Columns/ColumnMap.cpp                     |  2 +-
 src/Columns/ColumnNullable.cpp                |  2 +-
 src/Columns/ColumnSparse.cpp                  |  2 +-
 src/Columns/ColumnTuple.cpp                   |  2 +-
 src/Columns/ColumnVariant.cpp                 |  2 +-
 src/Columns/IColumn.h                         |  3 +++
 src/DataTypes/DataTypeDynamic.h               |  3 +++
 src/DataTypes/Serializations/ISerialization.h |  3 ++-
 .../SerializationDynamicElement.cpp           |  3 +++
 .../SerializationDynamicElement.h             |  2 +-
 .../SerializationVariantElement.cpp           |  4 +--
 src/Functions/dynamicElement.cpp              | 26 ++++++------------
 src/Functions/dynamicType.cpp                 | 14 +++++++---
 src/Functions/variantElement.cpp              | 27 +++++++------------
 src/Interpreters/TreeRewriter.cpp             |  9 ++-----
 src/Interpreters/convertFieldToType.cpp       |  3 ---
 src/Parsers/ParserDataType.cpp                |  5 +++-
 src/Processors/Formats/IOutputFormat.h        |  3 +--
 src/Processors/Merges/Algorithms/MergedData.h |  3 +++
 .../Transforms/ColumnGathererTransform.cpp    |  3 +++
 src/Storages/ColumnsDescription.cpp           |  3 +++
 .../MergeTree/MergeTreeReaderWide.cpp         |  1 -
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 -
 .../0_stateless/02941_variant_type_4.sh       |  2 +-
 .../03038_nested_dynamic_merges.reference     | 10 +++----
 .../03038_nested_dynamic_merges.sh            |  8 +++---
 .../03039_dynamic_all_merge_algorithms_1.sh   | 12 ++++-----
 32 files changed, 98 insertions(+), 101 deletions(-)

diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 29773492dc9..b8e2a541f5f 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -1289,7 +1289,7 @@ size_t ColumnArray::getNumberOfDimensions() const
     return 1 + nested_array->getNumberOfDimensions();   /// Every modern C++ compiler optimizes tail recursion.
 }
 
-void ColumnArray::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnArray::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     Columns nested_source_columns;
     nested_source_columns.reserve(source_columns.size());
diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp
index cf3f448516c..f2cea83db0e 100644
--- a/src/Columns/ColumnConst.cpp
+++ b/src/Columns/ColumnConst.cpp
@@ -159,15 +159,6 @@ void ColumnConst::compareColumn(
     std::fill(compare_results.begin(), compare_results.end(), res);
 }
 
-void ColumnConst::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
-{
-    Columns nested_source_columns;
-    nested_source_columns.reserve(source_columns.size());
-    for (const auto & source_column : source_columns)
-        nested_source_columns.push_back(assert_cast<const ColumnConst &>(*source_column).getDataColumnPtr());
-    data->takeDynamicStructureFromSourceColumns(nested_source_columns);
-}
-
 ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value)
 {
     auto data = column->cloneEmpty();
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index 042468cbbcc..c2c0fa3027c 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -308,8 +308,6 @@ public:
     bool isCollationSupported() const override { return data->isCollationSupported(); }
 
     bool hasDynamicStructure() const override { return data->hasDynamicStructure(); }
-
-    void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
 };
 
 ColumnConst::Ptr createColumnConst(const ColumnPtr & column, Field value);
diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index f3dff01af25..a1dd60f4748 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -65,14 +65,14 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant)
     if (variant_info.variant_names.size() >= max_dynamic_types)
     {
         /// ColumnDynamic can have max_dynamic_types number of variants only when it has String as a variant.
-        /// Otherwise we won't be able to add cast new variants to Strings.
+        /// Otherwise we won't be able to cast new variants to Strings.
         if (!variant_info.variant_name_to_discriminator.contains("String"))
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Maximum number of variants reached, but no String variant exists");
 
         return false;
     }
 
-    /// If we have max_dynamic_types - 1 number of variants and don't have String variant, we can add only String variant.
+    /// If we have (max_dynamic_types - 1) number of variants and don't have String variant, we can add only String variant.
     if (variant_info.variant_names.size() == max_dynamic_types - 1 && new_variant->getName() != "String" && !variant_info.variant_name_to_discriminator.contains("String"))
         return false;
 
@@ -218,7 +218,7 @@ void ColumnDynamic::insert(const DB::Field & x)
         return;
 
     /// If we cannot insert field into current variant column, extend it with new variant for this field from its type.
-    if (likely(addNewVariant(applyVisitor(FieldToDataType(), x))))
+    if (addNewVariant(applyVisitor(FieldToDataType(), x)))
     {
         /// Now we should be able to insert this field into extended variant column.
         variant_column->insert(x);
@@ -566,7 +566,6 @@ const char * ColumnDynamic::deserializeAndInsertFromArena(const char * pos)
     }
 
     /// We reached maximum number of variants and couldn't add new variant.
-    /// This case should be really rare in real use cases.
     /// We should always be able to add String variant and cast inserted value to String.
     addStringVariant();
     /// Create temporary column of this variant type and deserialize value into it.
@@ -645,7 +644,7 @@ ColumnPtr ColumnDynamic::compress() const
         });
 }
 
-void ColumnDynamic::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     if (!empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "takeDynamicStructureFromSourceColumns should be called only on empty Dynamic column");
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index b5167f4b9d9..4e9c7edd5f9 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -22,15 +22,18 @@ namespace DB
 class ColumnDynamic final : public COWHelper<IColumnHelper<ColumnDynamic>, ColumnDynamic>
 {
 public:
+    ///
     struct Statistics
     {
         enum class Source
         {
-            READ,
-            MERGE,
+            READ,  /// Statistics were loaded into column during reading from MergeTree.
+            MERGE, /// Statistics were calculated during merge of several MergeTree parts.
         };
 
+        /// Source of the statistics.
         Source source;
+        /// Statistics data: (variant name) -> (total variant size in data part).
         std::unordered_map<String, size_t> data;
     };
 
@@ -42,9 +45,9 @@ private:
         DataTypePtr variant_type;
         /// Name of the whole variant to not call getName() every time.
         String variant_name;
-        /// Store names of variants to not call getName() every time on variants.
+        /// Names of variants to not call getName() every time on variants.
         Names variant_names;
-        /// Store mapping (variant name) -> (global discriminator).
+        /// Mapping (variant name) -> (global discriminator).
         /// It's used during variant extension.
         std::unordered_map<String, UInt8> variant_name_to_discriminator;
     };
@@ -335,7 +338,7 @@ private:
     /// Combine current variant with the other variant and return global discriminators mapping
     /// from other variant to the combined one. It's used for inserting from
     /// different variants.
-    /// Returns nullptr if maximum number of Variants is reached and the new Variant cannot be created.
+    /// Returns nullptr if maximum number of variants is reached and the new variant cannot be created.
     std::vector<UInt8> * combineVariants(const VariantInfo & other_variant_info);
 
     void updateVariantInfoAndExpandVariantColumn(const DataTypePtr & new_variant_type);
@@ -343,7 +346,7 @@ private:
     WrappedPtr variant_column;
     /// Store the type of current variant with some additional information.
     VariantInfo variant_info;
-    /// Maximum number of different types that can be stored in Dynamic.
+    /// The maximum number of different types that can be stored in this Dynamic column.
     /// If exceeded, all new variants will be converted to String.
     size_t max_dynamic_types;
 
@@ -351,7 +354,11 @@ private:
     /// Used in takeDynamicStructureFromSourceColumns and set during deserialization.
     Statistics statistics;
 
+    /// Cache (Variant name) -> (global discriminators mapping from this variant to current variant in Dynamic column).
+    /// Used to avoid mappings recalculation in combineVariants for the same Variant types.
     std::unordered_map<String, std::vector<UInt8>> variant_mappings_cache;
+    /// Cache of Variant types that couldn't be combined with current variant in Dynamic column.
+    /// Used to avoid checking if combination is possible for the same Variant types.
     std::unordered_set<String> variants_with_failed_combination;
 };
 
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 48e8bced23a..eecea1a273f 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -312,7 +312,7 @@ ColumnPtr ColumnMap::compress() const
     });
 }
 
-void ColumnMap::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnMap::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     Columns nested_source_columns;
     nested_source_columns.reserve(source_columns.size());
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 011f3702bdf..bb0e15d39ab 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -835,7 +835,7 @@ ColumnPtr ColumnNullable::getNestedColumnWithDefaultOnNull() const
     return res;
 }
 
-void ColumnNullable::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnNullable::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     Columns nested_source_columns;
     nested_source_columns.reserve(source_columns.size());
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 80e20bb7631..d54801b6e07 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -801,7 +801,7 @@ ColumnSparse::Iterator ColumnSparse::getIterator(size_t n) const
     return Iterator(offsets_data, _size, current_offset, n);
 }
 
-void ColumnSparse::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnSparse::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     Columns values_source_columns;
     values_source_columns.reserve(source_columns.size());
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 4e8e4063157..19f74048d84 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -582,7 +582,7 @@ bool ColumnTuple::hasDynamicStructure() const
     return false;
 }
 
-void ColumnTuple::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     std::vector<Columns> nested_source_columns;
     nested_source_columns.resize(columns.size());
diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp
index 819491f7fd9..ec47f5dfa74 100644
--- a/src/Columns/ColumnVariant.cpp
+++ b/src/Columns/ColumnVariant.cpp
@@ -1539,7 +1539,7 @@ bool ColumnVariant::hasDynamicStructure() const
     return false;
 }
 
-void ColumnVariant::takeDynamicStructureFromSourceColumns(const DB::Columns & source_columns)
+void ColumnVariant::takeDynamicStructureFromSourceColumns(const Columns & source_columns)
 {
     std::vector<Columns> variants_source_columns;
     variants_source_columns.resize(variants.size());
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index 33f398474ed..76f5af5bcd7 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -534,7 +534,10 @@ public:
         return res;
     }
 
+    /// Checks if column has dynamic subcolumns.
     virtual bool hasDynamicStructure() const { return false; }
+    /// For columns with dynamic subcolumns this method takes dynamic structure from source columns
+    /// and creates proper resulting dynamic structure in advance for merge of these source columns.
     virtual void takeDynamicStructureFromSourceColumns(const std::vector<Ptr> & /*source_columns*/) {}
 
     /** Some columns can contain another columns inside.
diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h
index 452e05061a0..9fc727fd9c8 100644
--- a/src/DataTypes/DataTypeDynamic.h
+++ b/src/DataTypes/DataTypeDynamic.h
@@ -8,6 +8,8 @@
 namespace DB
 {
 
+/// Dynamic type allows to store values of any type inside it and to read
+/// subcolumns with any type without knowing all of them in advance.
 class DataTypeDynamic final : public IDataType
 {
 public:
@@ -28,6 +30,7 @@ public:
 
     Field getDefault() const override;
 
+    /// 2 Dynamic types with different max_dynamic_types parameters are considered as different.
     bool equals(const IDataType & rhs) const override
     {
         if (const auto * rhs_dynamic_type = typeid_cast<const DataTypeDynamic *>(&rhs))
diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h
index b233230f9cc..914ff9cf4a2 100644
--- a/src/DataTypes/Serializations/ISerialization.h
+++ b/src/DataTypes/Serializations/ISerialization.h
@@ -151,7 +151,8 @@ public:
 
         /// For types with dynamic subcolumns deserialize state contains information
         /// about current dynamic structure. And this information can be useful
-        /// when we call enumerateStreams to enumerate dynamic streams.
+        /// when we call enumerateStreams after deserializeBinaryBulkStatePrefix
+        /// to enumerate dynamic streams.
         DeserializeBinaryBulkStatePtr deserialize_state;
     };
 
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
index 059a7d57e4e..b0a4e63d0a5 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -97,6 +97,9 @@ void SerializationDynamicElement::deserializeBinaryBulkWithMultipleStreams(
     DeserializeBinaryBulkStatePtr & state,
     SubstreamsCache * cache) const
 {
+    if (!state)
+        return;
+
     auto * dynamic_element_state = checkAndGetState<DeserializeBinaryBulkStateDynamicElement>(state);
 
     if (dynamic_element_state->variant_serialization)
diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.h b/src/DataTypes/Serializations/SerializationDynamicElement.h
index 9e4980e0a27..2ddc3324139 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.h
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.h
@@ -10,7 +10,7 @@ namespace DB
 class SerializationDynamicElement final : public SerializationWrapper
 {
 private:
-    /// To be able to deserialize Dyna,ic element as a subcolumn
+    /// To be able to deserialize Dynamic element as a subcolumn
     /// we need its type name and global discriminator.
     String dynamic_element_name;
 
diff --git a/src/DataTypes/Serializations/SerializationVariantElement.cpp b/src/DataTypes/Serializations/SerializationVariantElement.cpp
index dc7fc3b9b35..1f9a81ac671 100644
--- a/src/DataTypes/Serializations/SerializationVariantElement.cpp
+++ b/src/DataTypes/Serializations/SerializationVariantElement.cpp
@@ -45,8 +45,8 @@ void SerializationVariantElement::enumerateStreams(
                        .withColumn(data.column ? removeNullableOrLowCardinalityNullable(data.column) : nullptr)
                        .withSerializationInfo(data.serialization_info)
                        .withDeserializeState(deserialize_state ? deserialize_state->variant_element_state : nullptr);
-    settings.path.back().data = data;
-    nested_serialization->enumerateStreams(settings, callback, data);
+    settings.path.back().data = nested_data;
+    nested_serialization->enumerateStreams(settings, callback, nested_data);
     removeVariantFromPath(settings.path);
 }
 
diff --git a/src/Functions/dynamicElement.cpp b/src/Functions/dynamicElement.cpp
index 6752a61b6c3..202533dc5c8 100644
--- a/src/Functions/dynamicElement.cpp
+++ b/src/Functions/dynamicElement.cpp
@@ -3,6 +3,7 @@
 #include <Functions/FunctionHelpers.h>
 #include <DataTypes/IDataType.h>
 #include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeDynamic.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/Serializations/SerializationVariantElement.h>
 #include <Columns/ColumnArray.h>
@@ -65,7 +66,7 @@ public:
                             getName(),
                             arguments[0].type->getName());
 
-        auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedElementType(arguments[1].column));
+        auto return_type = makeNullableOrLowCardinalityNullableSafe(getRequestedType(arguments[1].column));
 
         for (; count_arrays; --count_arrays)
             return_type = std::make_shared<DataTypeArray>(return_type);
@@ -97,29 +98,18 @@ public:
         }
 
         const ColumnDynamic * input_col_as_dynamic = checkAndGetColumn<ColumnDynamic>(input_col);
-        if (!input_col_as_dynamic)
+        const DataTypeDynamic * input_type_as_dynamic = checkAndGetDataType<DataTypeDynamic>(input_type);
+        if (!input_col_as_dynamic || !input_type_as_dynamic)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                             "First argument for function {} must be Dynamic or array of Dynamics. Actual {}", getName(), input_arg.type->getName());
 
-        auto element_type = getRequestedElementType(arguments[1].column);
-        const auto & variant_info = input_col_as_dynamic->getVariantInfo();
-        auto it = variant_info.variant_name_to_discriminator.find(element_type->getName());
-        if (it == variant_info.variant_name_to_discriminator.end())
-        {
-            auto result_type = makeNullableOrLowCardinalityNullableSafe(element_type);
-            auto result_column = result_type->createColumn();
-            result_column->insertManyDefaults(input_rows_count);
-            return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count);
-        }
-
-        const auto & variant_column = input_col_as_dynamic->getVariantColumn();
-        auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(variant_column.getLocalDiscriminatorsPtr(), element_type->getName(), it->second, variant_column.localDiscriminatorByGlobal(it->second));
-        auto result_column = subcolumn_creator.create(variant_column.getVariantPtrByGlobalDiscriminator(it->second));
-        return wrapInArraysAndConstIfNeeded(std::move(result_column), array_offsets, input_arg_is_const, input_rows_count);
+        auto type = getRequestedType(arguments[1].column);
+        auto subcolumn = input_type_as_dynamic->getSubcolumn(type->getName(), input_col_as_dynamic->getPtr());
+        return wrapInArraysAndConstIfNeeded(std::move(subcolumn), array_offsets, input_arg_is_const, input_rows_count);
     }
 
 private:
-    DataTypePtr getRequestedElementType(const ColumnPtr & type_name_column) const
+    DataTypePtr getRequestedType(const ColumnPtr & type_name_column) const
     {
         const auto * name_col = checkAndGetColumnConst<ColumnString>(type_name_column.get());
         if (!name_col)
diff --git a/src/Functions/dynamicType.cpp b/src/Functions/dynamicType.cpp
index 8fb2974ceff..e8ca73597d6 100644
--- a/src/Functions/dynamicType.cpp
+++ b/src/Functions/dynamicType.cpp
@@ -21,7 +21,7 @@ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 namespace
 {
 
-/// Return enum with type name for each row in Dynamic column.
+/// Return String with type name for each row in Dynamic column.
 class FunctionDynamicType : public IFunction
 {
 public:
@@ -89,13 +89,21 @@ REGISTER_FUNCTION(DynamicType)
 Returns the variant type name for each row of `Dynamic` column. If row contains NULL, it returns 'None' for it.
 )",
         .syntax = {"dynamicType(variant)"},
-        .arguments = {{"variant", "Variant column"}},
+        .arguments = {{"dynamic", "Dynamic column"}},
         .examples = {{{
             "Example",
             R"(
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('Hello, World!'), ([1, 2, 3]);
+SELECT d, dynamicType(d) FROM test;
 )",
             R"(
-
+┌─d─────────────┬─dynamicType(d)─┐
+│ ᴺᵁᴸᴸ          │ None           │
+│ 42            │ Int64          │
+│ Hello, World! │ String         │
+│ [1,2,3]       │ Array(Int64)   │
+└───────────────┴────────────────┘
 )"}}},
         .categories{"Variant"},
     });
diff --git a/src/Functions/variantElement.cpp b/src/Functions/variantElement.cpp
index e63afc68b34..80d34083d9d 100644
--- a/src/Functions/variantElement.cpp
+++ b/src/Functions/variantElement.cpp
@@ -112,18 +112,15 @@ public:
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                             "First argument for function {} must be Variant or array of Variants. Actual {}", getName(), input_arg.type->getName());
 
-        std::optional<size_t> variant_global_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size());
+        auto variant_discr = getVariantGlobalDiscriminator(arguments[1].column, *input_type_as_variant, arguments.size());
 
-        if (!variant_global_discr.has_value())
+        if (!variant_discr)
             return arguments[2].column;
 
-        auto variant_local_discr = input_col_as_variant->localDiscriminatorByGlobal(*variant_global_discr);
-        const auto & variant_type = input_type_as_variant->getVariant(*variant_global_discr);
-        const auto & variant_column = input_col_as_variant->getVariantPtrByGlobalDiscriminator(*variant_global_discr);
-        auto subcolumn_creator = SerializationVariantElement::VariantSubcolumnCreator(input_col_as_variant->getLocalDiscriminatorsPtr(), variant_type->getName(), *variant_global_discr, variant_local_discr);
-        auto res = subcolumn_creator.create(variant_column);
-        return wrapInArraysAndConstIfNeeded(std::move(res), array_offsets, input_arg_is_const, input_rows_count);
+        auto variant_column = input_type_as_variant->getSubcolumn(input_type_as_variant->getVariant(*variant_discr)->getName(), input_col_as_variant->getPtr());
+        return wrapInArraysAndConstIfNeeded(std::move(variant_column), array_offsets, input_arg_is_const, input_rows_count);
     }
+
 private:
     std::optional<size_t> getVariantGlobalDiscriminator(const ColumnPtr & index_column, const DataTypeVariant & variant_type, size_t argument_size) const
     {
@@ -133,20 +130,16 @@ private:
                             "Second argument to {} with Variant argument must be a constant String",
                             getName());
 
-        String variant_element_name = name_col->getValue<String>();
-        auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name);
-        if (variant_element_type)
+        auto variant_element_name = name_col->getValue<String>();
+        if (auto variant_element_type = DataTypeFactory::instance().tryGet(variant_element_name))
         {
-            const auto & variants = variant_type.getVariants();
-            for (size_t i = 0; i != variants.size(); ++i)
-            {
-                if (variants[i]->getName() == variant_element_type->getName())
-                    return i;
-            }
+            if (auto discr = variant_type.tryGetVariantDiscriminator(variant_element_type->getName()))
+                return discr;
         }
 
         if (argument_size == 2)
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} doesn't contain variant with type {}", variant_type.getName(), variant_element_name);
+
         return std::nullopt;
     }
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index a6cb378243a..a3c5a7ed3ed 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -2,7 +2,7 @@
 #include <memory>
 #include <set>
 
-//#include <Core/Settings.h>
+#include <Core/Settings.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/SettingsEnums.h>
 
@@ -1188,27 +1188,22 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
         }
     }
 
+    /// Check for dynamic subcolums in unknown required columns.
     if (!unknown_required_source_columns.empty())
     {
-
         for (const NameAndTypePair & pair : source_columns_ordinary)
         {
-//            std::cerr << "Check ordinary column " << pair.name << "\n";
             if (!pair.type->hasDynamicSubcolumns())
                 continue;
 
-//            std::cerr << "Check dyamic subcolumns\n";
-
             for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
             {
                 auto [column_name, dynamic_subcolumn_name] = Nested::splitName(*it);
-//                std::cerr << "Check dyamic subcolumn " << dynamic_subcolumn_name << "\n";
 
                 if (column_name == pair.name)
                 {
                     if (auto dynamic_subcolumn_type = pair.type->tryGetSubcolumnType(dynamic_subcolumn_name))
                     {
-//                        std::cerr << "Found\n";
                         source_columns.emplace_back(*it, dynamic_subcolumn_type);
                         it = unknown_required_source_columns.erase(it);
                         continue;
diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp
index 30b7de409f1..9363e3d83eb 100644
--- a/src/Interpreters/convertFieldToType.cpp
+++ b/src/Interpreters/convertFieldToType.cpp
@@ -27,7 +27,6 @@
 #include <Common/FieldVisitorToString.h>
 #include <Common/FieldVisitorConvertToNumber.h>
 #include <Common/DateLUT.h>
-#include <Common/checkStackSize.h>
 
 
 namespace DB
@@ -167,8 +166,6 @@ Field convertDecimalType(const Field & from, const To & type)
 
 Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint)
 {
-    checkStackSize();
-
     if (from_type_hint && from_type_hint->equals(type))
     {
         return src;
diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index 747a9a6f7ba..573430ae9ab 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -7,12 +7,14 @@
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/ParserCreateQuery.h>
 
+
 namespace DB
 {
 
 namespace
 {
 
+/// Parser of Dynamic type arguments: Dynamic(max_types=N)
 class DynamicArgumentsParser : public IParserBase
 {
 private:
@@ -47,7 +49,8 @@ private:
 /// - Nested table elements;
 /// - Enum element in form of 'a' = 1;
 /// - literal;
-/// - another data type (or identifier)
+/// - Dynamic type arguments;
+/// - another data type (or identifier);
 class ParserDataTypeArgument : public IParserBase
 {
 public:
diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h
index 9996bedb20e..cae2ab7691e 100644
--- a/src/Processors/Formats/IOutputFormat.h
+++ b/src/Processors/Formats/IOutputFormat.h
@@ -105,8 +105,6 @@ public:
         }
     }
 
-    virtual void finalizeBuffers() {}
-
 protected:
     friend class ParallelFormattingOutputFormat;
 
@@ -124,6 +122,7 @@ protected:
     virtual void consumeTotals(Chunk) {}
     virtual void consumeExtremes(Chunk) {}
     virtual void finalizeImpl() {}
+    virtual void finalizeBuffers() {}
     virtual void writePrefix() {}
     virtual void writeSuffix() {}
     virtual void resetFormatterImpl() {}
diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h
index 95f915e4478..c5bb074bb0c 100644
--- a/src/Processors/Merges/Algorithms/MergedData.h
+++ b/src/Processors/Merges/Algorithms/MergedData.h
@@ -99,6 +99,9 @@ public:
             {
                 columns[i] = columns[i]->cloneResized(num_rows);
             }
+            /// For columns with Dynamic structure we cannot just take column from input chunk because resulting column may have
+            /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns).
+            /// We should insert into data resulting column using insertRangeFrom.
             else if (columns[i]->hasDynamicStructure())
             {
                 columns[i] = columns[i]->cloneEmpty();
diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp
index 6736cd59e83..b6bcec26c0c 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.cpp
+++ b/src/Processors/Transforms/ColumnGathererTransform.cpp
@@ -60,6 +60,9 @@ IMergingAlgorithm::Status ColumnGathererStream::merge()
     if (source_to_fully_copy) /// Was set on a previous iteration
     {
         Chunk res;
+        /// For columns with Dynamic structure we cannot just take column source_to_fully_copy because resulting column may have
+        /// different Dynamic structure (and have some merge statistics after calling takeDynamicStructureFromSourceColumns).
+        /// We should insert into data resulting column using insertRangeFrom.
         if (result_column->hasDynamicStructure())
         {
             auto col = result_column->cloneEmpty();
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 6f844e31970..3a3ee0d1d14 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -550,6 +550,7 @@ bool ColumnsDescription::hasSubcolumn(const String & column_name) const
     if (subcolumns.get<0>().count(column_name))
         return true;
 
+    /// Check for dynamic subcolumns
     auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
     auto it = columns.get<1>().find(ordinary_column_name);
     if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
@@ -655,6 +656,7 @@ std::optional<NameAndTypePair> ColumnsDescription::tryGetColumn(const GetColumns
             return *jt;
     }
 
+    /// Check for dynmaic subcolumns.
     auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
     it = columns.get<1>().find(ordinary_column_name);
     if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
@@ -752,6 +754,7 @@ bool ColumnsDescription::hasColumnOrSubcolumn(GetColumnsOptions::Kind kind, cons
     if ((it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & kind)) || hasSubcolumn(column_name))
         return true;
 
+    /// Check for dynamic subcolumns.
     auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
     it = columns.get<1>().find(ordinary_column_name);
     if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 64ca6132cc4..de6b742934f 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -1,5 +1,4 @@
 #include <Storages/MergeTree/MergeTreeReaderWide.h>
-#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnSparse.h>
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 375c1e37bae..a00508fd1c1 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -43,7 +43,6 @@ struct Settings;
     M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \
     M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \
     M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \
-    /** M(UInt64, max_types_for_dynamic_serialization, 32, "The maximum number of different types in Dynamic column stored separately in MergeTree tables in wide format. If exceeded, new types will be converted to String", 0) */ \
     \
     /** Merge settings. */ \
     M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh
index f6eaf2fcc9a..ddff3852865 100755
--- a/tests/queries/0_stateless/02941_variant_type_4.sh
+++ b/tests/queries/0_stateless/02941_variant_type_4.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_variant_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 0 --group_by_two_level_threshold 454338 --group_by_two_level_threshold_bytes 50000000 --distributed_aggregation_memory_efficient 1 --fsync_metadata 0 --output_format_parallel_formatting 0 --input_format_parallel_parsing 1 --min_chunk_bytes_for_parallel_parsing 10898151 --max_read_buffer_size 730200 --prefer_localhost_replica 1 --max_block_size 77643 --max_threads 18 --optimize_append_index 0 --optimize_if_chain_to_multiif 0 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 0 --optimize_or_like_chain 0 --optimize_substitute_columns 0 --enable_multiple_prewhere_read_steps 0 --read_in_order_two_level_merge_threshold 20 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 39857781 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 1 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method io_uring --remote_filesystem_read_method threadpool --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 10 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 1 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 8Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 0 --merge_tree_coarse_index_granularity 30 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2279999838 --min_compress_block_size 56847 --max_compress_block_size 2399536 --merge_tree_compact_parts_min_granules_to_multibuffer_read 39 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 2739586 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 0 --min_count_to_compile_sort_description 3 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 7 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.19 --ratio_of_defaults_for_sparse_serialization 0.0 --prefer_fetch_merged_part_size_threshold 1 --vertical_merge_algorithm_min_rows_to_activate 389696 --vertical_merge_algorithm_min_columns_to_activate 100 --allow_vertical_merges_from_compact_to_wide_parts 0 --min_merge_bytes_to_use_direct_io 10737418240 --index_granularity_bytes 16233524 --merge_max_block_size 6455 --index_granularity 16034 --min_bytes_for_wide_part 0 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 15959 --primary_key_compress_block_size 70269 --replace_long_file_name_to_hash 1 --max_file_name_length 123 --min_bytes_for_full_part_storage 0 --compact_parts_max_bytes_to_buffer 511937149 --compact_parts_max_granules_to_buffer 142 --compact_parts_merge_max_bytes_to_prefetch_part 28443027 --cache_populated_by_fetch 0 --concurrent_part_removal_threshold 0 --old_parts_lifetime 480"
 
 function test6_insert()
 {
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference
index f8118ce8b95..65034647775 100644
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges.reference
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.reference
@@ -2,8 +2,8 @@ MergeTree compact + horizontal merge
 test
 16667	Tuple(a Dynamic(max_types=3)):Date
 33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
 50000	Tuple(a Dynamic(max_types=3)):String
+50000	Tuple(a Dynamic(max_types=3)):UInt64
 100000	UInt64:None
 33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
 50000	Tuple(a Dynamic(max_types=3)):UInt64
@@ -25,8 +25,8 @@ MergeTree wide + horizontal merge
 test
 16667	Tuple(a Dynamic(max_types=3)):Date
 33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
-50000	Tuple(a Dynamic(max_types=3)):UInt64
 50000	Tuple(a Dynamic(max_types=3)):String
+50000	Tuple(a Dynamic(max_types=3)):UInt64
 100000	UInt64:None
 33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
 50000	Tuple(a Dynamic(max_types=3)):UInt64
@@ -40,8 +40,8 @@ test
 100000	UInt64:None
 133333	Tuple(a Dynamic(max_types=3)):None
 50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
 100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
 116667	Tuple(a Dynamic(max_types=3)):String
 133333	Tuple(a Dynamic(max_types=3)):None
 MergeTree compact + vertical merge
@@ -59,8 +59,8 @@ test
 33333	Tuple(a Dynamic(max_types=3)):Array(UInt8)
 50000	Tuple(a Dynamic(max_types=3)):UInt64
 66667	Tuple(a Dynamic(max_types=3)):String
-100000	UInt64:None
 100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
 133333	Tuple(a Dynamic(max_types=3)):None
 50000	Tuple(a Dynamic(max_types=3)):UInt64
 100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
@@ -86,7 +86,7 @@ test
 100000	UInt64:None
 133333	Tuple(a Dynamic(max_types=3)):None
 50000	Tuple(a Dynamic(max_types=3)):UInt64
-100000	UInt64:None
 100000	Tuple(a Dynamic(max_types=3)):Tuple(UInt64)
+100000	UInt64:None
 116667	Tuple(a Dynamic(max_types=3)):String
 133333	Tuple(a Dynamic(max_types=3)):None
diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh
index afb167ec20d..b82ddb3813e 100755
--- a/tests/queries/0_stateless/03038_nested_dynamic_merges.sh
+++ b/tests/queries/0_stateless/03038_nested_dynamic_merges.sh
@@ -18,16 +18,16 @@ function test()
     $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)"
     $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
 
     $CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)"
     $CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
-    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type"
 }
 
 $CH_CLIENT -q "drop table if exists test;"
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
index 3384a135307..9298fe28fec 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
@@ -18,9 +18,9 @@ function test()
     $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
     $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "drop table test"
 
     echo "SummingMergeTree"
@@ -29,10 +29,10 @@ function test()
     $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
     $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "select count(), sum from test group by sum"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "select count(), sum from test group by sum"
     $CH_CLIENT -q "drop table test"
 
@@ -42,10 +42,10 @@ function test()
     $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number"
     $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
     $CH_CLIENT -q "drop table test"
 }

From c9b019d392c4fa3e2f25a2921383711fc2c93ce5 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 30 Apr 2024 18:46:38 +0000
Subject: [PATCH 090/392] Mark ColumnDynamic constructor explicit

---
 src/Columns/ColumnDynamic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index 4e9c7edd5f9..c6626433877 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -52,7 +52,7 @@ private:
         std::unordered_map<String, UInt8> variant_name_to_discriminator;
     };
 
-    ColumnDynamic(size_t max_dynamic_types_);
+    explicit ColumnDynamic(size_t max_dynamic_types_);
     ColumnDynamic(MutableColumnPtr variant_column_, const VariantInfo & variant_info_, size_t max_dynamic_types_, const Statistics & statistics_ = {});
 
 public:

From 3b9f593524ba27105864464f41d8b3e858d163f9 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 30 Apr 2024 19:00:32 +0000
Subject: [PATCH 091/392] Fix type in code, add more docs

---
 docs/en/sql-reference/data-types/dynamic.md | 256 +++++++++++++++++++-
 src/Storages/ColumnsDescription.cpp         |   2 +-
 2 files changed, 256 insertions(+), 2 deletions(-)

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
index e20bdad1e79..e3cade25b55 100644
--- a/docs/en/sql-reference/data-types/dynamic.md
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -106,6 +106,7 @@ SELECT toTypeName(d.String), toTypeName(d.Int64), toTypeName(d.`Array(Int64)`),
 
 ```sql
 SELECT d, dynamicType(d), dynamicElement(d, 'String'), dynamicElement(d, 'Int64'), dynamicElement(d, 'Array(Int64)'), dynamicElement(d, 'Date'), dynamicElement(d, 'Array(String)') FROM test;```
+```
 
 ```text
 ┌─d─────────────┬─dynamicType(d)─┬─dynamicElement(d, 'String')─┬─dynamicElement(d, 'Int64')─┬─dynamicElement(d, 'Array(Int64)')─┬─dynamicElement(d, 'Date')─┬─dynamicElement(d, 'Array(String)')─┐
@@ -139,7 +140,7 @@ SELECT dynamicType(d) from test;
 
 There are 4 possible conversions that can be performed with `Dynamic` column.
 
-### Converting an ordinary column to a Variant column
+### Converting an ordinary column to a Dynamic column
 
 ```sql
 SELECT 'Hello, World!'::Dynamic as d, dynamicType(d);
@@ -151,7 +152,260 @@ SELECT 'Hello, World!'::Dynamic as d, dynamicType(d);
 └───────────────┴────────────────┘
 ```
 
+### Converting a String column to a Dynamic column through parsing
 
+To parse `Dynamic` type values from a `String` column you can enable setting `cast_string_to_dynamic_use_inference`:
 
+```sql
+SET cast_string_to_dynamic_use_inference = 1;
+SELECT CAST(materialize(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01')), 'Map(String, Dynamic)') as map_of_dynamic, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamic) as map_of_dynamic_types;
+```
 
+```text
+┌─map_of_dynamic──────────────────────────────┬─map_of_dynamic_types─────────────────────────┐
+│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'Int64','key2':'Bool','key3':'Date'} │
+└─────────────────────────────────────────────┴──────────────────────────────────────────────┘
+```
+
+### Converting a Dynamic column to an ordinary column
+
+It is possible to convert a `Dynamic` column to an ordinary column. In this case all nested types will be converted to a destination type:
+
+```sql
+CREATE TABLE test (d Dynamic) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('42.42'), (true), ('e10');
+SELECT d::Nullable(Float64) FROM test;
+```
+
+```text
+┌─CAST(d, 'Nullable(Float64)')─┐
+│                         ᴺᵁᴸᴸ │
+│                           42 │
+│                        42.42 │
+│                            1 │
+│                            0 │
+└──────────────────────────────┘
+```
+
+### Converting a Variant column to Dynamic column
+
+```sql
+CREATE TABLE test (v Variant(UInt64, String, Array(UInt64))) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), ('String'), ([1, 2, 3]);
+SELECT v::Dynamic as d, dynamicType(d) from test; 
+```
+
+```text
+┌─d───────┬─dynamicType(d)─┐
+│ ᴺᵁᴸᴸ    │ None           │
+│ 42      │ UInt64         │
+│ String  │ String         │
+│ [1,2,3] │ Array(UInt64)  │
+└─────────┴────────────────┘
+```
+
+### Converting a Dynamic(max_types=N) column to another Dynamic(max_types=K)
+
+If `K >= N` than during conversion the data doesn't change:
+
+```sql
+CREATE TABLE test (d Dynamic(max_types=3)) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true);
+SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;
+```
+
+```text
+┌─d─────┬─dynamicType(d)─┐
+│ ᴺᵁᴸᴸ  │ None           │
+│ 42    │ Int64          │
+│ 43    │ Int64          │
+│ 42.42 │ String         │
+│ true  │ Bool           │
+└───────┴────────────────┘
+```
+
+If `K < N`, then the values with the rarest types are converted to `String`:
+```text
+CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
+SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test;
+```
+
+```text
+┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
+│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │
+│ 42      │ Int64          │ 42      │ Int64           │
+│ 43      │ Int64          │ 43      │ Int64           │
+│ 42.42   │ String         │ 42.42   │ String          │
+│ true    │ Bool           │ true    │ String          │
+│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ String          │
+└─────────┴────────────────┴─────────┴─────────────────┘
+```
+
+If `K=1`, all types are converted to `String`:
+
+```text
+CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
+INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
+SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test;
+```
+
+```text
+┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
+│ ᴺᵁᴸᴸ    │ None           │ ᴺᵁᴸᴸ    │ None            │
+│ 42      │ Int64          │ 42      │ String          │
+│ 43      │ Int64          │ 43      │ String          │
+│ 42.42   │ String         │ 42.42   │ String          │
+│ true    │ Bool           │ true    │ String          │
+│ [1,2,3] │ Array(Int64)   │ [1,2,3] │ String          │
+└─────────┴────────────────┴─────────┴─────────────────┘
+```
+
+## Reading Variant type from the data
+
+All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. 
+
+Example:
+
+```sql
+SELECT
+    d,
+    dynamicType(d),
+    dynamicElement(d, 'String') AS str,
+    dynamicElement(d, 'Int64') AS num,
+    dynamicElement(d, 'Float64') AS float,
+    dynamicElement(d, 'Date') AS date,
+    dynamicElement(d, 'Array(Int64)') AS arr
+FROM format(JSONEachRow, 'd Dynamic', $$
+{"d" : "Hello, World!"},
+{"d" : 42},
+{"d" : 42.42},
+{"d" : "2020-01-01"},
+{"d" : [1, 2, 3]}
+$$)
+```
+
+```text
+┌─d─────────────┬─dynamicType(d)─┬─str───────────┬──num─┬─float─┬───────date─┬─arr─────┐
+│ Hello, World! │ String         │ Hello, World! │ ᴺᵁᴸᴸ │  ᴺᵁᴸᴸ │       ᴺᵁᴸᴸ │ []      │
+│ 42            │ Int64          │ ᴺᵁᴸᴸ          │   42 │  ᴺᵁᴸᴸ │       ᴺᵁᴸᴸ │ []      │
+│ 42.42         │ Float64        │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ │ 42.42 │       ᴺᵁᴸᴸ │ []      │
+│ 2020-01-01    │ Date           │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ │  ᴺᵁᴸᴸ │ 2020-01-01 │ []      │
+│ [1,2,3]       │ Array(Int64)   │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ │  ᴺᵁᴸᴸ │       ᴺᵁᴸᴸ │ [1,2,3] │
+└───────────────┴────────────────┴───────────────┴──────┴───────┴────────────┴─────────┘
+```
+
+## Comparing values of Dynamic type
+
+Values of `Dynamic` types are compared similar to values of `Variant` type:
+The result of operator `<` for values `d1` with underlying type `T1` and `d2` with underlying type `T2`  of a type `Dynamic` is defined as follows:
+- If `T1 = T2 = T`, the result will be `d1.T < d2.T` (underlying values will be compared).
+- If `T1 != T2`, the result will be `T1 < T2` (type names will be compared).
+
+Examples:
+```sql
+CREATE TABLE test (d1 Dynamic, d2 Dynamic) ENGINE=Memory;
+INSERT INTO test VALUES (42, 42), (42, 43), (42, 'abc'), (42, [1, 2, 3]), (42, []), (42, NULL);
+```
+
+```sql
+SELECT d2, dynamicType(d2) as d2_type from test order by d2;
+```
+
+```text
+┌─d2──────┬─d2_type──────┐
+│ []      │ Array(Int64) │
+│ [1,2,3] │ Array(Int64) │
+│ 42      │ Int64        │
+│ 43      │ Int64        │
+│ abc     │ String       │
+│ ᴺᵁᴸᴸ    │ None         │
+└─────────┴──────────────┘
+```
+
+```sql
+SELECT d1, dynamicType(d1) as d1_type, d2, dynamicType(d2) as d2_type, d1 = d2, d1 < d2, d1 > d2 from test;
+```
+
+```text
+┌─d1─┬─d1_type─┬─d2──────┬─d2_type──────┬─equals(d1, d2)─┬─less(d1, d2)─┬─greater(d1, d2)─┐
+│ 42 │ Int64   │ 42      │ Int64        │              1 │            0 │               0 │
+│ 42 │ Int64   │ 43      │ Int64        │              0 │            1 │               0 │
+│ 42 │ Int64   │ abc     │ String       │              0 │            1 │               0 │
+│ 42 │ Int64   │ [1,2,3] │ Array(Int64) │              0 │            0 │               1 │
+│ 42 │ Int64   │ []      │ Array(Int64) │              0 │            0 │               1 │
+│ 42 │ Int64   │ ᴺᵁᴸᴸ    │ None         │              0 │            1 │               0 │
+└────┴─────────┴─────────┴──────────────┴────────────────┴──────────────┴─────────────────┘
+```
+
+If you need to find the row with specific `Dynamic` value, you can do one of the following:
+
+- Cast value to the `Dynamic` type:
+
+```sql
+SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic;
+```
+
+```text
+┌─d1─┬─d2──────┐
+│ 42 │ [1,2,3] │
+└────┴─────────┘
+```
+
+- Compare `Dynamic` subcolumn with required type:
+
+```sql
+SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)')
+```
+
+```text
+┌─d1─┬─d2──────┐
+│ 42 │ [1,2,3] │
+└────┴─────────┘
+```
+
+Sometimes it can be useful to make additional check on dynamic type as subcolumns with complex types like `Array/Map/Tuple` cannot be inside `Nullable` and will have default values instead of `NULL` on rows with different types:
+
+```sql
+SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE d2.`Array(Int64)` == [];
+```
+
+```text
+┌─d2───┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐
+│ 42   │ []               │ Int64           │
+│ 43   │ []               │ Int64           │
+│ abc  │ []               │ String          │
+│ []   │ []               │ Array(Int32)    │
+│ ᴺᵁᴸᴸ │ []               │ None            │
+└──────┴──────────────────┴─────────────────┘
+```
+
+```sql
+SELECT d2, d2.`Array(Int64)`, dynamicType(d2) FROM test WHERE dynamicType(d2) == 'Array(Int64)' AND d2.`Array(Int64)` == [];
+```
+
+```text
+┌─d2─┬─d2.Array(UInt32)─┬─dynamicType(d2)─┐
+│ [] │ []               │ Array(Int64)    │
+└────┴──────────────────┴─────────────────┘
+```
+
+**Note:** values of dynamic types with different numeric types are considered as different values and not compared between each other, their type names are compared instead.
+
+Example:
+
+```sql
+CREATE TABLE test (d Dynamic) ENGINE=Memory;
+INSERT INTO test VALUES (1::UInt32), (1::Int64), (100::UInt32), (100::Int64);
+SELECT d, dynamicType(d) FROM test ORDER by d;
+```
+
+```text
+┌─v───┬─dynamicType(v)─┐
+│ 1   │ Int64          │
+│ 100 │ Int64          │
+│ 1   │ UInt32         │
+│ 100 │ UInt32         │
+└─────┴────────────────┘
+```
 
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 3a3ee0d1d14..4cf66649ad1 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -656,7 +656,7 @@ std::optional<NameAndTypePair> ColumnsDescription::tryGetColumn(const GetColumns
             return *jt;
     }
 
-    /// Check for dynmaic subcolumns.
+    /// Check for dynamic subcolumns.
     auto [ordinary_column_name, dynamic_subcolumn_name] = Nested::splitName(column_name);
     it = columns.get<1>().find(ordinary_column_name);
     if (it != columns.get<1>().end() && it->type->hasDynamicSubcolumns())

From ff6fa4bf6e414caa7cd483a3155d38187ceaf3f5 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Fri, 3 May 2024 17:03:16 +0000
Subject: [PATCH 092/392] fix unit tests for asyncloader

---
 src/Common/tests/gtest_async_loader.cpp | 36 ++++++++++++++++---------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp
index 174997ddf14..304fa996934 100644
--- a/src/Common/tests/gtest_async_loader.cpp
+++ b/src/Common/tests/gtest_async_loader.cpp
@@ -262,7 +262,8 @@ TEST(AsyncLoader, CancelPendingJob)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 }
 
@@ -288,7 +289,8 @@ TEST(AsyncLoader, CancelPendingTask)
     }
     catch (Exception & e)
     {
-        ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 
     try
@@ -298,7 +300,8 @@ TEST(AsyncLoader, CancelPendingTask)
     }
     catch (Exception & e)
     {
-        ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 }
 
@@ -325,7 +328,8 @@ TEST(AsyncLoader, CancelPendingDependency)
     }
     catch (Exception & e)
     {
-        ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 
     try
@@ -335,7 +339,8 @@ TEST(AsyncLoader, CancelPendingDependency)
     }
     catch (Exception & e)
     {
-        ASSERT_TRUE(e.code() == ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 }
 
@@ -451,8 +456,9 @@ TEST(AsyncLoader, JobFailure)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_FAILED);
-        ASSERT_TRUE(e.message().find(error_message) != String::npos);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains(error_message));
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_FAILED"));
     }
 }
 
@@ -489,8 +495,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
-        ASSERT_TRUE(e.message().find(error_message) != String::npos);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
+        ASSERT_TRUE(e.message().contains(error_message));
     }
     try
     {
@@ -499,8 +506,9 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
-        ASSERT_TRUE(e.message().find(error_message) != String::npos);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
+        ASSERT_TRUE(e.message().contains(error_message));
     }
 }
 
@@ -531,7 +539,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
     try
     {
@@ -540,7 +549,8 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
     }
     catch (Exception & e)
     {
-        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_CANCELED);
+        ASSERT_EQ(e.code(), ErrorCodes::ASYNC_LOAD_WAIT_FAILED);
+        ASSERT_TRUE(e.message().contains("ASYNC_LOAD_CANCELED"));
     }
 }
 

From c90e04ed4be9c6f8cf274eabf9f0d10c27102c83 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 6 May 2024 11:40:45 +0000
Subject: [PATCH 093/392] fix tests build

---
 src/Common/tests/gtest_async_loader.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp
index 304fa996934..9fda58b9008 100644
--- a/src/Common/tests/gtest_async_loader.cpp
+++ b/src/Common/tests/gtest_async_loader.cpp
@@ -35,6 +35,7 @@ namespace DB::ErrorCodes
     extern const int ASYNC_LOAD_CYCLE;
     extern const int ASYNC_LOAD_FAILED;
     extern const int ASYNC_LOAD_CANCELED;
+    extern const int ASYNC_LOAD_WAIT_FAILED;
 }
 
 struct Initializer {

From 936f94d286f50133cf12ba449245502769a22e40 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 7 May 2024 14:40:45 +0200
Subject: [PATCH 094/392] Add print

---
 utils/keeper-bench/Runner.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index 8b111f5adb9..a893dac3851 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -635,11 +635,14 @@ struct ZooKeeperRequestFromLogReader
                 break;
             }
             case Coordination::OpNum::Check:
+            case Coordination::OpNum::CheckNotExists:
             {
                 auto check_request = std::make_shared<Coordination::ZooKeeperCheckRequest>();
                 check_request->path = current_block->getPath(idx_in_block);
                 if (auto version = current_block->getVersion(idx_in_block))
                     check_request->version = *version;
+                if (op_num == Coordination::OpNum::CheckNotExists)
+                    check_request->not_exists = true;
                 request_from_log.request = check_request;
                 break;
             }
@@ -868,10 +871,20 @@ void Runner::runBenchmarkFromLog()
     }
 
     ZooKeeperRequestFromLogReader request_reader(input_request_log, global_context);
+
+    delay_watch.restart();
     while (auto request_from_log = request_reader.getNextRequest())
     {
         request_from_log->connection = get_zookeeper_connection(request_from_log->session_id);
         push_request(std::move(*request_from_log));
+
+        if (delay > 0 && delay_watch.elapsedSeconds() > delay)
+        {
+            dumpStats("Write", stats.write_requests);
+            dumpStats("Read", stats.read_requests);
+            std::cerr << std::endl;
+            delay_watch.restart();
+        }
     }
 }
 

From 412805c99e0e789d7bc13dcb73fdf8199758ad2a Mon Sep 17 00:00:00 2001
From: Danila Puzov <danila.puzov.lenovo@gmail.com>
Date: Thu, 9 May 2024 19:38:19 +0300
Subject: [PATCH 095/392] Add serial, generateSnowflakeID, generateUUIDv7
 functions

---
 src/Functions/generateSnowflakeID.cpp |  92 ++++++++++++++
 src/Functions/generateUUIDv7.cpp      | 113 +++++++++++++++++
 src/Functions/serial.cpp              | 171 ++++++++++++++++++++++++++
 3 files changed, 376 insertions(+)
 create mode 100644 src/Functions/generateSnowflakeID.cpp
 create mode 100644 src/Functions/generateUUIDv7.cpp
 create mode 100644 src/Functions/serial.cpp

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
new file mode 100644
index 00000000000..e54b720ec98
--- /dev/null
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -0,0 +1,92 @@
+#include <Functions/FunctionFactory.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionsRandom.h>
+#include <Core/ServerUUID.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+class FunctionSnowflakeID : public IFunction
+{
+private:
+    mutable std::atomic<size_t> machine_sequence_number{0};
+    mutable std::atomic<Int64> last_timestamp{0};
+
+public:
+    static constexpr auto name = "generateSnowflakeID";
+
+    static FunctionPtr create(ContextPtr /*context*/)
+    {
+        return std::make_shared<FunctionSnowflakeID>();
+    }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool isDeterministicInScopeOfQuery() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    bool isVariadic() const override { return true; }
+
+    bool isStateful() const override { return true; }
+    bool isDeterministic() const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        if (arguments.size() > 1) {
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.",
+                getName(), arguments.size());
+        }
+
+        return std::make_shared<DataTypeInt64>();
+    }
+
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        auto col_res = ColumnVector<Int64>::create();
+        typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
+        size_t size = input_rows_count;
+        vec_to.resize(size);
+
+        auto serverUUID = ServerUUID::get();
+
+        // hash serverUUID into 32 bytes
+        Int64 h = UUIDHelpers::getHighBytes(serverUUID);
+        Int64 l = UUIDHelpers::getLowBytes(serverUUID);
+        Int64 machine_id = (h * 11) ^ (l * 17);
+
+        for (Int64 & x : vec_to) {
+            const auto tm_point = std::chrono::system_clock::now();
+            Int64 current_timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(
+                   tm_point.time_since_epoch()).count();
+
+            Int64 local_machine_sequence_number = 0;
+
+            if (current_timestamp != last_timestamp.load()) {
+                machine_sequence_number.store(0);
+                last_timestamp.store(current_timestamp);
+            } else {
+                local_machine_sequence_number = machine_sequence_number.fetch_add(1) + 1;
+            }
+
+            x = (current_timestamp << 22) | (machine_id & 0x3ff000ull) | (local_machine_sequence_number & 0xfffull);
+        }
+
+        return col_res;
+    }
+
+};
+
+REGISTER_FUNCTION(GenerateSnowflakeID)
+{
+    factory.registerFunction<FunctionSnowflakeID>();
+}
+
+}
diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp
new file mode 100644
index 00000000000..61d742d2fda
--- /dev/null
+++ b/src/Functions/generateUUIDv7.cpp
@@ -0,0 +1,113 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRandom.h>
+#include <DataTypes/DataTypeUUID.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \
+DECLARE_DEFAULT_CODE      (__VA_ARGS__) \
+DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__)
+
+DECLARE_SEVERAL_IMPLEMENTATIONS(
+
+class FunctionGenerateUUIDv7 : public IFunction
+{
+public:
+    static constexpr auto name = "generateUUIDv7";
+
+    String getName() const override
+    {
+        return name;
+    }
+
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool isDeterministicInScopeOfQuery() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    bool isVariadic() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() > 1)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.",
+                getName(), arguments.size());
+
+        return std::make_shared<DataTypeUUID>();
+    }
+
+    bool isDeterministic() const override { return false; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        auto col_res = ColumnVector<UUID>::create();
+        typename ColumnVector<UUID>::Container & vec_to = col_res->getData();
+
+        size_t size = input_rows_count;
+        vec_to.resize(size);
+
+        /// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces.
+        RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
+
+        for (UUID & uuid : vec_to)
+        {
+            ///  https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#section-5.2
+
+            const auto tm_point = std::chrono::system_clock::now();
+            UInt64 unix_ts_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
+                   tm_point.time_since_epoch()).count();
+
+            UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0x0000000000000fffull) | 0x0000000000007000ull | (unix_ts_ms << 16);
+            UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
+        }
+
+        return col_res;
+    }
+};
+
+) // DECLARE_SEVERAL_IMPLEMENTATIONS
+#undef DECLARE_SEVERAL_IMPLEMENTATIONS
+
+class FunctionGenerateUUIDv7 : public TargetSpecific::Default::FunctionGenerateUUIDv7
+{
+public:
+    explicit FunctionGenerateUUIDv7(ContextPtr context) : selector(context)
+    {
+        selector.registerImplementation<TargetArch::Default,
+            TargetSpecific::Default::FunctionGenerateUUIDv7>();
+
+    #if USE_MULTITARGET_CODE
+        selector.registerImplementation<TargetArch::AVX2,
+            TargetSpecific::AVX2::FunctionGenerateUUIDv7>();
+    #endif
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    {
+        return selector.selectAndExecute(arguments, result_type, input_rows_count);
+    }
+
+    static FunctionPtr create(ContextPtr context)
+    {
+        return std::make_shared<FunctionGenerateUUIDv7>(context);
+    }
+
+private:
+    ImplementationSelector<IFunction> selector;
+};
+
+REGISTER_FUNCTION(GenerateUUIDv7)
+{
+    factory.registerFunction<FunctionGenerateUUIDv7>();
+}
+
+}
+
+
diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp
new file mode 100644
index 00000000000..4f336013ca8
--- /dev/null
+++ b/src/Functions/serial.cpp
@@ -0,0 +1,171 @@
+#include <cmath>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Interpreters/Context.h>
+#include "Common/Logger.h"
+#include <Common/ZooKeeper/ZooKeeper.h>
+
+namespace DB {
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+class FunctionSerial : public IFunction
+{
+private:
+    mutable zkutil::ZooKeeperPtr zk{nullptr};
+    ContextPtr context;
+
+public:
+    static constexpr auto name = "serial";
+
+    explicit FunctionSerial(ContextPtr ctx) : context(ctx)
+    {
+        if (ctx->hasZooKeeper()) {
+            zk = ctx->getZooKeeper();
+        }
+    }
+
+    static FunctionPtr create(ContextPtr context)
+    {
+        return std::make_shared<FunctionSerial>(std::move(context));
+    }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+
+    bool isStateful() const override { return true; }
+    bool isDeterministic() const override { return false; }
+    bool isDeterministicInScopeOfQuery() const override { return false; }
+    bool isSuitableForConstantFolding() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool useDefaultImplementationForNothing() const override { return false; }
+    bool canBeExecutedOnDefaultArguments() const override { return false; }
+    bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; }
+    bool hasInformationAboutMonotonicity() const override { return true; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (arguments.size() != 1)
+            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+                "Number of arguments for function {} doesn't match: passed {}, should be 1.",
+                getName(), arguments.size());
+        if (!isStringOrFixedString(arguments[0])) {
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Type of argument for function {} doesn't match: passed {}, should be string",
+                getName(), arguments[0]->getName());
+        }
+
+        return std::make_shared<DataTypeInt64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        auto col_res = ColumnVector<Int64>::create();
+        typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
+        size_t size = input_rows_count;
+        LOG_INFO(getLogger("Serial Function"), "Size = {}", size);
+        vec_to.resize(size);
+
+        const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString();
+
+        // if serial name used first time
+        zk->createAncestors(serial_path);
+        zk->createIfNotExists(serial_path, "");
+
+        Int64 counter;
+
+        if (zk != nullptr) {
+            // Get Lock in ZooKeeper
+            // https://zookeeper.apache.org/doc/r3.2.2/recipes.html
+
+            // 1.
+            if (zk->expired()) {
+                zk = context->getZooKeeper();
+            }
+
+            std::string lock_path = serial_path + "/lock-";
+            std::string path_created = zk->create(lock_path, "", zkutil::CreateMode::EphemeralSequential);
+            Int64 created_sequence_number = std::stoll(path_created.substr(lock_path.size(), path_created.size() - lock_path.size()));
+
+            while (true) {
+                // 2.
+                zkutil::Strings children = zk->getChildren(serial_path);
+
+                // 3.
+                Int64 lowest_child_sequence_number = -1;
+                for (auto& child : children) {
+                    if (child == "counter") {
+                        continue;
+                    }
+                    std::string child_suffix = child.substr(5, 10);
+                    Int64 seq_number = std::stoll(child_suffix);
+
+                    if (lowest_child_sequence_number == -1 || seq_number < lowest_child_sequence_number) {
+                        lowest_child_sequence_number = seq_number;
+                    }
+                }
+
+                if (lowest_child_sequence_number == created_sequence_number) {
+                    break;
+                    // we have a lock in ZooKeeper, now can get the counter value
+                }
+
+                // 4. and 5.
+                Int64 prev_seq_number = created_sequence_number - 1;
+                std::string to_wait_key = std::to_string(prev_seq_number);
+                while (to_wait_key.size() != 10) {
+                    to_wait_key = "0" + to_wait_key;
+                }
+
+                zk->waitForDisappear(lock_path + to_wait_key);
+            }
+
+            // Now we have a lock
+            // Update counter in ZooKeeper
+            std::string counter_path = serial_path + "/counter";
+            if (zk->exists(counter_path)) {
+                std::string counter_string = zk->get(counter_path, nullptr);
+                counter = std::stoll(counter_string);
+
+                LOG_INFO(getLogger("Serial Function"), "Got counter from Zookeeper = {}", counter);
+            } else {
+                counter = 1;
+            }
+            zk->createOrUpdate(counter_path, std::to_string(counter + input_rows_count), zkutil::CreateMode::Persistent);
+
+            // Unlock = delete node created on step 1.
+            zk->deleteEphemeralNodeIfContentMatches(path_created, "");
+        } else {
+            // ZooKeeper is not available
+            // What to do?
+
+            counter = 1;
+        }
+
+        // Make a result
+        for (auto& val : vec_to) {
+            val = counter;
+            ++counter;
+        }
+
+
+        return col_res;
+    }
+
+};
+
+REGISTER_FUNCTION(Serial)
+{
+    factory.registerFunction<FunctionSerial>();
+}
+
+}

From a31ee9891f610a14513c622bc81dcb25eaf25eb5 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 10 May 2024 10:36:59 +0200
Subject: [PATCH 096/392] Move setting to 24.5 version in
 SettingsChangesHistory

---
 src/Core/SettingsChangesHistory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index e8cf1e98d27..3c1249d29e5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -87,13 +87,13 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
 {
     {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
+              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
               }},
     {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"},
               {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"},
               {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"},
               {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"},
-              {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"},
               {"input_format_json_ignore_unnecessary_fields", false, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields"},
               {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."},

From fbf8dcb7feb480175f76f7fa9252cf80f3ca3cc4 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Fri, 10 May 2024 11:55:24 +0200
Subject: [PATCH 097/392] Apply suggestions from code review

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Columns/ColumnDynamic.cpp | 7 +++----
 src/Columns/ColumnVariant.h   | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index a1dd60f4748..629df476591 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -48,8 +48,8 @@ ColumnDynamic::MutablePtr ColumnDynamic::create(MutableColumnPtr variant_column,
     variant_info.variant_name_to_discriminator.reserve(variants.size());
     for (ColumnVariant::Discriminator discr = 0; discr != variants.size(); ++discr)
     {
-        variant_info.variant_names.push_back(variants[discr]->getName());
-        variant_info.variant_name_to_discriminator[variant_info.variant_names.back()] = discr;
+        const auto & variant_name = variant_info.variant_names.emplace_back(variants[discr]->getName());
+        variant_info.variant_name_to_discriminator[variant_name] = discr;
     }
 
     return create(std::move(variant_column), variant_info, max_dynamic_types_, statistics_);
@@ -133,8 +133,7 @@ void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePt
 
     for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr)
     {
-        String name = new_variants[discr]->getName();
-        new_variant_names.push_back(name);
+        const auto & name = new_variant_names.emplace_back(new_variants[discr]->getName());
         new_variant_name_to_discriminator[name] = discr;
 
         auto current_it = variant_info.variant_name_to_discriminator.find(name);
diff --git a/src/Columns/ColumnVariant.h b/src/Columns/ColumnVariant.h
index 8f703ea17d9..e5a4498f340 100644
--- a/src/Columns/ColumnVariant.h
+++ b/src/Columns/ColumnVariant.h
@@ -189,7 +189,7 @@ public:
     void insertRangeFrom(const IColumn & src_, size_t start, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
     void insertManyFrom(const IColumn & src_, size_t position, size_t length, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
 
-    /// Methods for insertrion into a specific variant.
+    /// Methods for insertion into a specific variant.
     void insertIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t n);
     void insertRangeIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t start, size_t length);
     void insertManyIntoVariantFrom(Discriminator global_discr, const IColumn & src_, size_t position, size_t length);

From e7c7eb159a44beb52cd3c7f2634fd8f13214ad71 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 10 May 2024 11:32:27 +0000
Subject: [PATCH 098/392] Apply suggestions from the code review

---
 src/Columns/ColumnDynamic.cpp                 | 41 ++++---------------
 src/Columns/tests/gtest_column_dynamic.cpp    | 26 ++++++------
 src/DataTypes/DataTypeDynamic.h               |  5 +--
 .../Serializations/SerializationDynamic.cpp   |  7 +++-
 src/Functions/FunctionsConversion.cpp         |  9 ++--
 src/Interpreters/InterpreterInsertQuery.cpp   |  6 ++-
 .../Algorithms/CollapsingSortedAlgorithm.cpp  |  8 +++-
 7 files changed, 44 insertions(+), 58 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 629df476591..76f536a3409 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -80,41 +80,14 @@ bool ColumnDynamic::addNewVariant(const DB::DataTypePtr & new_variant)
     DataTypes all_variants = current_variants;
     all_variants.push_back(new_variant);
     auto new_variant_type = std::make_shared<DataTypeVariant>(all_variants);
-    const auto & new_variants = assert_cast<const DataTypeVariant &>(*new_variant_type).getVariants();
-
-    std::vector<ColumnVariant::Discriminator> current_to_new_discriminators;
-    current_to_new_discriminators.resize(variant_info.variant_names.size());
-    Names new_variant_names;
-    new_variant_names.reserve(new_variants.size());
-    std::unordered_map<String, ColumnVariant::Discriminator> new_variant_name_to_discriminator;
-    new_variant_name_to_discriminator.reserve(new_variants.size());
-    std::vector<std::pair<MutableColumnPtr, ColumnVariant::Discriminator>> new_variant_columns_and_discriminators_to_add;
-    new_variant_columns_and_discriminators_to_add.reserve(new_variants.size() - current_variants.size());
-
-    for (ColumnVariant::Discriminator discr = 0; discr != new_variants.size(); ++discr)
-    {
-        String name = new_variants[discr]->getName();
-        new_variant_names.push_back(name);
-        new_variant_name_to_discriminator[name] = discr;
-        auto it = variant_info.variant_name_to_discriminator.find(name);
-        if (it == variant_info.variant_name_to_discriminator.end())
-            new_variant_columns_and_discriminators_to_add.emplace_back(new_variants[discr]->createColumn(), discr);
-        else
-            current_to_new_discriminators[it->second] = discr;
-    }
-
-    variant_info.variant_type = new_variant_type;
-    variant_info.variant_name = new_variant_type->getName();
-    variant_info.variant_names = new_variant_names;
-    variant_info.variant_name_to_discriminator = new_variant_name_to_discriminator;
-    assert_cast<ColumnVariant &>(*variant_column).extend(current_to_new_discriminators, std::move(new_variant_columns_and_discriminators_to_add));
-    variant_mappings_cache.clear();
+    updateVariantInfoAndExpandVariantColumn(new_variant_type);
     return true;
 }
 
 void ColumnDynamic::addStringVariant()
 {
-    addNewVariant(std::make_shared<DataTypeString>());
+    if (!addNewVariant(std::make_shared<DataTypeString>()))
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add String variant to Dynamic column, it's a bug");
 }
 
 void ColumnDynamic::updateVariantInfoAndExpandVariantColumn(const DB::DataTypePtr & new_variant_type)
@@ -704,13 +677,13 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
         result_variants.reserve(max_dynamic_types);
         /// Add String variant in advance.
         result_variants.push_back(std::make_shared<DataTypeString>());
-        size_t i = 0;
-        while (result_variants.size() != max_dynamic_types && i < variants_with_sizes.size())
+        for (const auto & [_, variant] : variants_with_sizes)
         {
-            const auto & variant = variants_with_sizes[i].second;
+            if (result_variants.size() == max_dynamic_types)
+                break;
+
             if (variant->getName() != "String")
                 result_variants.push_back(variant);
-            ++i;
         }
 
         result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
diff --git a/src/Columns/tests/gtest_column_dynamic.cpp b/src/Columns/tests/gtest_column_dynamic.cpp
index 4c209f7d8a9..a2862b09de1 100644
--- a/src/Columns/tests/gtest_column_dynamic.cpp
+++ b/src/Columns/tests/gtest_column_dynamic.cpp
@@ -195,7 +195,7 @@ TEST(ColumnDynamic, InsertFromOverflow1)
 
     column_to->insertFrom(*column_from, 1);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, "42.42");
@@ -220,7 +220,7 @@ TEST(ColumnDynamic, InsertFromOverflow2)
     ASSERT_EQ(field, 42);
 
     column_to->insertFrom(*column_from, 1);
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     field = (*column_to)[column_to->size() - 1];
     ASSERT_EQ(field, "42.42");
@@ -299,7 +299,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow1)
 
     column_to->insertManyFrom(*column_from, 1, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, "42.42");
@@ -332,7 +332,7 @@ TEST(ColumnDynamic, InsertManyFromOverflow2)
 
     column_to->insertManyFrom(*column_from, 1, 2);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
     field = (*column_to)[column_to->size() - 2];
     ASSERT_EQ(field, "42.42");
@@ -406,7 +406,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow1)
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     auto field = (*column_to)[column_to->size() - 4];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 3];
@@ -429,7 +429,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow2)
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     auto field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 2];
@@ -451,7 +451,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow3)
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     auto field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 2];
@@ -470,9 +470,9 @@ TEST(ColumnDynamic, InsertRangeFromOverflow4)
     auto column_to = getDynamicWithManyVariants(254);
     column_to->insertRangeFrom(*column_from, 0, 3);
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     auto field = (*column_to)[column_to->size() - 3];
     ASSERT_EQ(field, Field("42"));
     field = (*column_to)[column_to->size() - 2];
@@ -495,7 +495,7 @@ TEST(ColumnDynamic, InsertRangeFromOverflow5)
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     auto field = (*column_to)[column_to->size() - 4];
     ASSERT_EQ(field, Field(42));
     field = (*column_to)[column_to->size() - 3];
@@ -522,8 +522,8 @@ TEST(ColumnDynamic, InsertRangeFromOverflow6)
     ASSERT_EQ(column_to->getVariantInfo().variant_names.size(), 255);
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Array(Int8)"));
     auto field = (*column_to)[column_to->size() - 5];
 
     ASSERT_EQ(field, Field("44"));
@@ -620,7 +620,7 @@ TEST(ColumnDynamic, SerializeDeserializeFromArenaOverflow)
     ASSERT_EQ((*column_from)[column_from->size() - 2], "str");
     ASSERT_EQ((*column_from)[column_from->size() - 1], Null());
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Int8"));
-    ASSERT_TRUE(!column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
+    ASSERT_FALSE(column_to->getVariantInfo().variant_name_to_discriminator.contains("Float64"));
     ASSERT_TRUE(column_to->getVariantInfo().variant_name_to_discriminator.contains("String"));
 }
 
diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h
index 9fc727fd9c8..bd3d822fbb6 100644
--- a/src/DataTypes/DataTypeDynamic.h
+++ b/src/DataTypes/DataTypeDynamic.h
@@ -2,9 +2,6 @@
 
 #include <DataTypes/IDataType.h>
 
-#define DEFAULT_MAX_DYNAMIC_TYPES 32
-
-
 namespace DB
 {
 
@@ -46,6 +43,8 @@ public:
     size_t getMaxDynamicTypes() const { return max_dynamic_types; }
 
 private:
+    static constexpr size_t DEFAULT_MAX_DYNAMIC_TYPES = 32;
+
     SerializationPtr doGetDefaultSerialization() const override;
     String doGetName() const override;
 
diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index 5e6106f560f..d0ecc3b80a2 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -118,7 +118,12 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix(
         for (size_t i = 0; i != variant_info.variant_names.size(); ++i)
         {
             size_t size = 0;
-            /// Use statistics from column if it was created during merge.
+            /// Check if we can use statistics stored in the column. There are 2 possible sources
+            /// of this statistics:
+            ///   - statistics calculated during merge of some data parts (Statistics::Source::MERGE)
+            ///   - statistics read from the data part during deserialization of Dynamic column (Statistics::Source::READ).
+            /// We can rely only on statistics calculated during the merge, because column with statistics that was read
+            /// during deserialization from some data part could be filtered/limited/transformed/etc and so the statistics can be outdated.
             if (!statistics.data.empty() && statistics.source == ColumnDynamic::Statistics::Source::MERGE)
                 size = statistics.data.at(variant_info.variant_names[i]);
             /// Otherwise we can use only variant sizes from current column.
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index b01643a9532..910168d8010 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -575,7 +575,7 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col)
 template <typename StringColumnType>
 struct ConvertImplGenericToString
 {
-    static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/)
+    static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const FormatSettings & format_settings)
     {
         static_assert(std::is_same_v<StringColumnType, ColumnString> || std::is_same_v<StringColumnType, ColumnFixedString>,
                 "Can be used only to serialize to ColumnString or ColumnFixedString");
@@ -596,7 +596,6 @@ struct ConvertImplGenericToString
 
             auto & write_buffer = write_helper.getWriteBuffer();
 
-            FormatSettings format_settings;
             auto serialization = type.getDefaultSerialization();
             for (size_t row = 0; row < size; ++row)
             {
@@ -2299,7 +2298,7 @@ private:
         if constexpr (std::is_same_v<ToDataType, DataTypeString>)
         {
             if (from_type->getCustomSerialization())
-                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count);
+                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
         }
 
         bool done = false;
@@ -2332,7 +2331,7 @@ private:
             /// Generic conversion of any type to String.
             if (std::is_same_v<ToDataType, DataTypeString>)
             {
-                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count);
+                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
             }
             else
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
@@ -5060,7 +5059,7 @@ private:
                 {
                     ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
                     {
-                        return ConvertImplGenericToString<typename ToDataType::ColumnType>::execute(arguments, result_type, input_rows_count);
+                        return ConvertImplGenericToString<typename ToDataType::ColumnType>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
                     };
                     return true;
                 }
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 6c8e662477d..128854e87ba 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -552,7 +552,11 @@ BlockIO InterpreterInsertQuery::execute()
                     {
                         /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with
                         /// default column values (in AddingDefaultsTransform), so all values will be cast correctly.
-                        if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) && !isVariant(query_columns[col_idx].type) && !isDynamic(query_columns[col_idx].type) && output_columns.has(query_columns[col_idx].name))
+                        if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type)
+                            && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type)
+                            && !isVariant(query_columns[col_idx].type)
+                            && !isDynamic(query_columns[col_idx].type)
+                            && output_columns.has(query_columns[col_idx].name))
                             query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name));
                     }
                 }
diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
index f5e4c88fcd0..07ee8f4ddef 100644
--- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp
@@ -31,7 +31,13 @@ CollapsingSortedAlgorithm::CollapsingSortedAlgorithm(
     LoggerPtr log_,
     WriteBuffer * out_row_sources_buf_,
     bool use_average_block_sizes)
-    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs, std::make_unique<MergedData>(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_))
+    : IMergingAlgorithmWithSharedChunks(
+        header_,
+        num_inputs,
+        std::move(description_),
+        out_row_sources_buf_,
+        max_row_refs,
+        std::make_unique<MergedData>(use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_))
     , sign_column_number(header_.getPositionByName(sign_column))
     , only_positive_sign(only_positive_sign_)
     , log(log_)

From 4f1a97644ef6a6f462c01a0fb4046d07448d1d8c Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 10 May 2024 11:34:16 +0000
Subject: [PATCH 099/392] Use nested column properly in
 SerializationSparse::enumerateStreams

---
 src/DataTypes/Serializations/SerializationSparse.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationSparse.cpp b/src/DataTypes/Serializations/SerializationSparse.cpp
index f9228069b90..73488d308bb 100644
--- a/src/DataTypes/Serializations/SerializationSparse.cpp
+++ b/src/DataTypes/Serializations/SerializationSparse.cpp
@@ -170,7 +170,7 @@ void SerializationSparse::enumerateStreams(
 
     auto next_data = SubstreamData(nested)
         .withType(data.type)
-        .withColumn(column_sparse ? column_sparse->getValuesPtr() : nullptr)
+        .withColumn(column_sparse ? column_sparse->getValuesPtr() : data.column)
         .withSerializationInfo(data.serialization_info);
 
     nested->enumerateStreams(settings, callback, next_data);

From fa5898a3cd5a9b4276eb75e39c4475dfdf722e3b Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 10 May 2024 13:46:56 +0200
Subject: [PATCH 100/392] Refactor data part writer

---
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  21 ++--
 .../MergeTree/IMergeTreeDataPartWriter.cpp    | 119 +++++++++++++++++-
 .../MergeTree/IMergeTreeDataPartWriter.h      |  57 ++++++++-
 .../MergeTree/IMergedBlockOutputStream.cpp    |  17 ++-
 .../MergeTree/IMergedBlockOutputStream.h      |  15 ++-
 src/Storages/MergeTree/MergeTask.cpp          |   3 +-
 src/Storages/MergeTree/MergeTreeData.cpp      |   2 +-
 .../MergeTree/MergeTreeDataPartCompact.cpp    |  48 ++++---
 .../MergeTree/MergeTreeDataPartCompact.h      |  17 +--
 .../MergeTree/MergeTreeDataPartWide.cpp       |  18 ++-
 .../MergeTree/MergeTreeDataPartWide.h         |  17 +--
 .../MergeTreeDataPartWriterCompact.cpp        |  27 ++--
 .../MergeTreeDataPartWriterCompact.h          |   9 +-
 .../MergeTreeDataPartWriterOnDisk.cpp         |  32 +++--
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h |   9 +-
 .../MergeTree/MergeTreeDataPartWriterWide.cpp |  69 +++++-----
 .../MergeTree/MergeTreeDataPartWriterWide.h   |   9 +-
 .../MergeTree/MergeTreeDataWriter.cpp         |   4 +-
 src/Storages/MergeTree/MergeTreePartition.cpp |  13 +-
 src/Storages/MergeTree/MergeTreePartition.h   |   4 +-
 .../MergeTree/MergedBlockOutputStream.cpp     |  29 +++--
 .../MergeTree/MergedBlockOutputStream.h       |   2 +-
 .../MergedColumnOnlyOutputStream.cpp          |  11 +-
 src/Storages/MergeTree/MutateTask.cpp         |   2 +-
 24 files changed, 409 insertions(+), 145 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index ba2ff2ed6fe..4ec5b3f5f8a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -74,7 +74,7 @@ public:
     using VirtualFields = std::unordered_map<String, Field>;
 
     using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
-    using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
+//    using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
 
     using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
     using NameToNumber = std::unordered_map<std::string, size_t>;
@@ -106,15 +106,16 @@ public:
         const ValueSizeMap & avg_value_size_hints_,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
 
-    virtual MergeTreeWriterPtr getWriter(
-        const NamesAndTypesList & columns_list,
-        const StorageMetadataPtr & metadata_snapshot,
-        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-        const Statistics & stats_to_recalc_,
-        const CompressionCodecPtr & default_codec_,
-        const MergeTreeWriterSettings & writer_settings,
-        const MergeTreeIndexGranularity & computed_index_granularity) = 0;
+////    virtual MergeTreeWriterPtr getWriter(
+////        const NamesAndTypesList & columns_list,
+////        const StorageMetadataPtr & metadata_snapshot,
+////        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+////        const Statistics & stats_to_recalc_,
+////        const CompressionCodecPtr & default_codec_,
+////        const MergeTreeWriterSettings & writer_settings,
+////        const MergeTreeIndexGranularity & computed_index_granularity) = 0;
 
+// TODO: remove?
     virtual bool isStoredOnDisk() const = 0;
 
     virtual bool isStoredOnRemoteDisk() const = 0;
@@ -168,6 +169,8 @@ public:
 
     const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; }
 
+    const SerializationByName & getSerializations() const { return serializations; }
+
     SerializationPtr getSerialization(const String & column_name) const;
     SerializationPtr tryGetSerialization(const String & column_name) const;
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index 2488c63e309..c67e148d011 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -1,8 +1,15 @@
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
+#include "Storages/MergeTree/MergeTreeSettings.h"
 
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NO_SUCH_COLUMN_IN_TABLE;
+}
+
+
 Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation)
 {
     Block result;
@@ -38,13 +45,23 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per
 }
 
 IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
-    const MergeTreeMutableDataPartPtr & data_part_,
+//    const MergeTreeMutableDataPartPtr & data_part_,
+    const String & data_part_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+
+    const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
-    : data_part(data_part_)
-    , storage(data_part_->storage)
+    : data_part_name(data_part_name_)
+    , serializations(serializations_)
+    , data_part_storage(data_part_storage_)
+    , index_granularity_info(index_granularity_info_)
+
+    , storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
     , columns_list(columns_list_)
     , settings(settings_)
@@ -60,6 +77,102 @@ Columns IMergeTreeDataPartWriter::releaseIndexColumns()
         std::make_move_iterator(index_columns.end()));
 }
 
+SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const
+{
+    auto it = serializations.find(column_name);
+    if (it == serializations.end())
+        throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
+            "There is no column or subcolumn {} in part {}", column_name, data_part_name);
+
+    return it->second;
+}
+
+ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
+{
+    auto get_codec_or_default = [&](const auto & column_desc)
+    {
+        return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc();
+    };
+
+    const auto & columns = metadata_snapshot->getColumns();
+    if (const auto * column_desc = columns.tryGet(column_name))
+        return get_codec_or_default(*column_desc);
+
+///// TODO: is this needed?
+//    if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name))
+//        return get_codec_or_default(*virtual_desc);
+//
+    return default_codec->getFullCodecDesc();
+}
+
+
 IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default;
 
+
+MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
+        const NamesAndTypesList & columns_list,
+        const StorageMetadataPtr & metadata_snapshot,
+        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
+        const String & marks_file_extension_,
+        const CompressionCodecPtr & default_codec_,
+        const MergeTreeWriterSettings & writer_settings,
+        const MergeTreeIndexGranularity & computed_index_granularity);
+
+MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
+        const NamesAndTypesList & columns_list,
+        const StorageMetadataPtr & metadata_snapshot,
+        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
+        const String & marks_file_extension_,
+        const CompressionCodecPtr & default_codec_,
+        const MergeTreeWriterSettings & writer_settings,
+        const MergeTreeIndexGranularity & computed_index_granularity);
+
+
+
+MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
+        MergeTreeDataPartType part_type,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
+        const NamesAndTypesList & columns_list,
+        const StorageMetadataPtr & metadata_snapshot,
+        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
+        const String & marks_file_extension_,
+        const CompressionCodecPtr & default_codec_,
+        const MergeTreeWriterSettings & writer_settings,
+        const MergeTreeIndexGranularity & computed_index_granularity)
+{
+    if (part_type == MergeTreeDataPartType::Compact)
+        return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
+    else if (part_type == MergeTreeDataPartType::Wide)
+        return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
+    else
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString());
+}
+
 }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 3f359904ddd..ec04fd5f8a8 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -7,6 +7,8 @@
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Disks/IDisk.h>
+#include "Storages/MergeTree/MergeTreeDataPartType.h"
+#include "Storages/MergeTree/MergeTreeSettings.h"
 
 
 namespace DB
@@ -22,7 +24,15 @@ class IMergeTreeDataPartWriter : private boost::noncopyable
 {
 public:
     IMergeTreeDataPartWriter(
-        const MergeTreeMutableDataPartPtr & data_part_,
+//        const MergeTreeMutableDataPartPtr & data_part_,
+
+        const String & data_part_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+
+        const MergeTreeSettingsPtr & storage_settings_,
+
         const NamesAndTypesList & columns_list_,
         const StorageMetadataPtr & metadata_snapshot_,
         const MergeTreeWriterSettings & settings_,
@@ -39,10 +49,30 @@ public:
     Columns releaseIndexColumns();
     const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
 
+    SerializationPtr getSerialization(const String & column_name) const;
+
+    ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
+
+    IDataPartStorage & getDataPartStorage() { return *data_part_storage; }
+
 protected:
 
-    const MergeTreeMutableDataPartPtr data_part;
-    const MergeTreeData & storage;
+//    const MergeTreeMutableDataPartPtr data_part;  // TODO: remove
+
+    /// Serializations for every columns and subcolumns by their names.
+    String data_part_name;
+    SerializationByName serializations;
+    MutableDataPartStoragePtr data_part_storage;
+    MergeTreeIndexGranularityInfo index_granularity_info;
+
+
+//    const MergeTreeData & storage; // TODO: remove
+
+    const MergeTreeSettingsPtr storage_settings;
+    const size_t low_cardinality_max_dictionary_size = 0;  // TODO: pass it in ctor
+    const bool low_cardinality_use_single_dictionary_for_part = true;  // TODO: pass it in ctor
+
+
     const StorageMetadataPtr metadata_snapshot;
     const NamesAndTypesList columns_list;
     const MergeTreeWriterSettings settings;
@@ -52,4 +82,25 @@ protected:
     MutableColumns index_columns;
 };
 
+using MergeTreeDataPartWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
+
+MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
+        MergeTreeDataPartType part_type,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
+        const NamesAndTypesList & columns_list,
+        const StorageMetadataPtr & metadata_snapshot,
+        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
+        const String & marks_file_extension,
+        const CompressionCodecPtr & default_codec_,
+        const MergeTreeWriterSettings & writer_settings,
+        const MergeTreeIndexGranularity & computed_index_granularity);
+
+
 }
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
index c8d6aa0ba65..f99adf7c4db 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
@@ -2,25 +2,30 @@
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
 #include <Common/logger_useful.h>
+#include "Storages/MergeTree/IDataPartStorage.h"
+#include "Storages/StorageSet.h"
 
 namespace DB
 {
 
 IMergedBlockOutputStream::IMergedBlockOutputStream(
-    const MergeTreeMutableDataPartPtr & data_part,
+//    const MergeTreeMutableDataPartPtr & data_part,
+    const MergeTreeSettingsPtr & storage_settings_,
+    MutableDataPartStoragePtr data_part_storage_,
     const StorageMetadataPtr & metadata_snapshot_,
     const NamesAndTypesList & columns_list,
     bool reset_columns_)
-    : storage(data_part->storage)
+    //: storage(data_part->storage)
+    : storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
-    , data_part_storage(data_part->getDataPartStoragePtr())
+    , data_part_storage(data_part_storage_)//data_part->getDataPartStoragePtr())
     , reset_columns(reset_columns_)
 {
     if (reset_columns)
     {
         SerializationInfo::Settings info_settings =
         {
-            .ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
+            .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,//storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
             .choose_kind = false,
         };
 
@@ -42,7 +47,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
         return {};
 
     for (const auto & column : empty_columns)
-        LOG_TRACE(storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name);
+        LOG_TRACE(data_part->storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name);
 
     /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes.
     std::map<String, size_t> stream_counts;
@@ -91,7 +96,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
         }
         else /// If we have no file in checksums it doesn't exist on disk
         {
-            LOG_TRACE(storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr);
+            LOG_TRACE(data_part->storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr);
             itr = remove_files.erase(itr);
         }
     }
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h
index ca4e3899b29..b6f279e6d58 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h
@@ -1,10 +1,12 @@
 #pragma once
 
 #include "Storages/MergeTree/IDataPartStorage.h"
+#include "Storages/MergeTree/MergeTreeSettings.h"
 #include <Storages/MergeTree/MergeTreeIndexGranularity.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
+#include "Common/Logger.h"
 
 namespace DB
 {
@@ -13,7 +15,9 @@ class IMergedBlockOutputStream
 {
 public:
     IMergedBlockOutputStream(
-        const MergeTreeMutableDataPartPtr & data_part,
+//        const MergeTreeMutableDataPartPtr & data_part,
+        const MergeTreeSettingsPtr & storage_settings_,
+        MutableDataPartStoragePtr data_part_storage_,
         const StorageMetadataPtr & metadata_snapshot_,
         const NamesAndTypesList & columns_list,
         bool reset_columns_);
@@ -39,11 +43,16 @@ protected:
         SerializationInfoByName & serialization_infos,
         MergeTreeData::DataPart::Checksums & checksums);
 
-    const MergeTreeData & storage;
+//    const MergeTreeData & storage; // TODO: remove
+////
+    MergeTreeSettingsPtr storage_settings;
+    LoggerPtr log;
+////
+
     StorageMetadataPtr metadata_snapshot;
 
     MutableDataPartStoragePtr data_part_storage;
-    IMergeTreeDataPart::MergeTreeWriterPtr writer;
+    MergeTreeDataPartWriterPtr writer;
 
     bool reset_columns = false;
     SerializationInfoByName new_serialization_infos;
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 34e17e40a74..1b5ad0d81a7 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -34,6 +34,7 @@
 #include <Processors/Transforms/DistinctTransform.h>
 #include <Processors/QueryPlan/CreatingSetsStep.h>
 #include <Interpreters/PreparedSets.h>
+#include <Interpreters/MergeTreeTransaction.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 namespace DB
@@ -378,7 +379,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
         MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
         MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
         ctx->compression_codec,
-        global_ctx->txn,
+        global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID,
         /*reset_columns=*/ true,
         ctx->blocks_are_granules_size,
         global_ctx->context->getWriteSettings());
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 440c62213a3..8a96e4c9f04 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -8423,7 +8423,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::createE
     MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
         index_factory.getMany(metadata_snapshot->getSecondaryIndices()),
         Statistics{},
-        compression_codec, txn);
+        compression_codec, txn ? txn->tid : Tx::PrehistoricTID);
 
     bool sync_on_insert = settings->fsync_after_insert;
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index 418b2d8f81b..eebbe3110c0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -47,27 +47,37 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
         avg_value_size_hints, profile_callback, CLOCK_MONOTONIC_COARSE);
 }
 
-IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
-    const NamesAndTypesList & columns_list,
-    const StorageMetadataPtr & metadata_snapshot,
-    const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-    const Statistics & stats_to_recalc_,
-    const CompressionCodecPtr & default_codec_,
-    const MergeTreeWriterSettings & writer_settings,
-    const MergeTreeIndexGranularity & computed_index_granularity)
+MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
+        const NamesAndTypesList & columns_list,
+        const StorageMetadataPtr & metadata_snapshot,
+        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+        const Statistics & stats_to_recalc_,
+        const String & marks_file_extension_,
+        const CompressionCodecPtr & default_codec_,
+        const MergeTreeWriterSettings & writer_settings,
+        const MergeTreeIndexGranularity & computed_index_granularity)
 {
-    NamesAndTypesList ordered_columns_list;
-    std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
-        [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; });
-
-    /// Order of writing is important in compact format
-    ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
-        { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
-
+////// TODO: fix the order of columns
+////
+////    NamesAndTypesList ordered_columns_list;
+////    std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
+////        [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; });
+////
+////    /// Order of writing is important in compact format
+////    ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
+////        { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
+////
     return std::make_unique<MergeTreeDataPartWriterCompact>(
-        shared_from_this(), ordered_columns_list, metadata_snapshot,
-        indices_to_recalc, stats_to_recalc_, getMarksFileExtension(),
-        default_codec_, writer_settings, computed_index_granularity);
+            data_part_name_, logger_name_, serializations_, data_part_storage_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index 3a4e7b95f33..5a57d778b7d 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -40,15 +40,16 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-    MergeTreeWriterPtr getWriter(
-        const NamesAndTypesList & columns_list,
-        const StorageMetadataPtr & metadata_snapshot,
-        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-        const Statistics & stats_to_recalc_,
-        const CompressionCodecPtr & default_codec_,
-        const MergeTreeWriterSettings & writer_settings,
-        const MergeTreeIndexGranularity & computed_index_granularity) override;
+//    MergeTreeWriterPtr getWriter(
+//        const NamesAndTypesList & columns_list,
+//        const StorageMetadataPtr & metadata_snapshot,
+//        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+//        const Statistics & stats_to_recalc_,
+//        const CompressionCodecPtr & default_codec_,
+//        const MergeTreeWriterSettings & writer_settings,
+//        const MergeTreeIndexGranularity & computed_index_granularity) override;
 
+// TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
     bool isStoredOnRemoteDisk() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index fc3108e522a..c99cff258e0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -53,20 +53,26 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
         profile_callback);
 }
 
-IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(
+MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
+        const String & marks_file_extension_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
 {
-    return std::make_unique<MergeTreeDataPartWriterWide>(
-        shared_from_this(), columns_list,
-        metadata_snapshot, indices_to_recalc, stats_to_recalc_,
-        getMarksFileExtension(),
-        default_codec_, writer_settings, computed_index_granularity);
+     return std::make_unique<MergeTreeDataPartWriterWide>(data_part_name_, logger_name_, serializations_, data_part_storage_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index 84eeec4211b..45d0fbbebec 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -35,15 +35,16 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-    MergeTreeWriterPtr getWriter(
-        const NamesAndTypesList & columns_list,
-        const StorageMetadataPtr & metadata_snapshot,
-        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-        const Statistics & stats_to_recalc_,
-        const CompressionCodecPtr & default_codec_,
-        const MergeTreeWriterSettings & writer_settings,
-        const MergeTreeIndexGranularity & computed_index_granularity) override;
+//    MergeTreeWriterPtr getWriter(
+//        const NamesAndTypesList & columns_list,
+//        const StorageMetadataPtr & metadata_snapshot,
+//        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+//        const Statistics & stats_to_recalc_,
+//        const CompressionCodecPtr & default_codec_,
+//        const MergeTreeWriterSettings & writer_settings,
+//        const MergeTreeIndexGranularity & computed_index_granularity) override;
 
+// TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
     bool isStoredOnRemoteDisk() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 1605e5cdb9a..6e8ea1a915b 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -10,7 +10,14 @@ namespace ErrorCodes
 }
 
 MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
-    const MergeTreeMutableDataPartPtr & data_part_,
+//    const MergeTreeMutableDataPartPtr & data_part_,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@@ -19,23 +26,26 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
-    : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
+    : MergeTreeDataPartWriterOnDisk(
+        data_part_name_, logger_name_, serializations_,
+        data_part_storage_, index_granularity_info_, storage_settings_,
+        columns_list_, metadata_snapshot_,
         indices_to_recalc_, stats_to_recalc, marks_file_extension_,
         default_codec_, settings_, index_granularity_)
-    , plain_file(data_part_->getDataPartStorage().writeFile(
+    , plain_file(getDataPartStorage().writeFile(
             MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION,
             settings.max_compress_block_size,
             settings_.query_write_settings))
     , plain_hashing(*plain_file)
 {
-    marks_file = data_part_->getDataPartStorage().writeFile(
+    marks_file = getDataPartStorage().writeFile(
             MergeTreeDataPartCompact::DATA_FILE_NAME + marks_file_extension_,
             4096,
             settings_.query_write_settings);
 
     marks_file_hashing = std::make_unique<HashingWriteBuffer>(*marks_file);
 
-    if (data_part_->index_granularity_info.mark_type.compressed)
+    if (index_granularity_info.mark_type.compressed)
     {
         marks_compressor = std::make_unique<CompressedWriteBuffer>(
             *marks_file_hashing,
@@ -45,10 +55,9 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
         marks_source_hashing = std::make_unique<HashingWriteBuffer>(*marks_compressor);
     }
 
-    auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
-        auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
+        auto compression = getCodecDescOrDefault(column.name, default_codec);
         addStreams(column, compression);
     }
 }
@@ -81,7 +90,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column,
         compressed_streams.emplace(stream_name, stream);
     };
 
-    data_part->getSerialization(column.name)->enumerateStreams(callback, column.type);
+    getSerialization(column.name)->enumerateStreams(callback, column.type);
 }
 
 namespace
@@ -230,7 +239,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
             writeBinaryLittleEndian(static_cast<UInt64>(0), marks_out);
 
             writeColumnSingleGranule(
-                block.getByName(name_and_type->name), data_part->getSerialization(name_and_type->name),
+                block.getByName(name_and_type->name), getSerialization(name_and_type->name),
                 stream_getter, granule.start_row, granule.rows_to_write);
 
             /// Each type always have at least one substream
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index ddb6178dce6..3bec4c7e988 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -11,7 +11,14 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk
 {
 public:
     MergeTreeDataPartWriterCompact(
-        const MergeTreeMutableDataPartPtr & data_part,
+//        const MergeTreeMutableDataPartPtr & data_part,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 491d2399b82..13892c17577 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -140,7 +140,13 @@ void MergeTreeDataPartWriterOnDisk::Stream<only_plain_file>::addToChecksums(Merg
 
 
 MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
-    const MergeTreeMutableDataPartPtr & data_part_,
+    const String & data_part_name_,
+    const String & logger_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+    const MergeTreeSettingsPtr & storage_settings_,
+
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const MergeTreeIndices & indices_to_recalc_,
@@ -149,7 +155,9 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
-    : IMergeTreeDataPartWriter(data_part_, columns_list_, metadata_snapshot_, settings_, index_granularity_)
+    : IMergeTreeDataPartWriter(
+        data_part_name_, serializations_, data_part_storage_, index_granularity_info_,
+        storage_settings_, columns_list_, metadata_snapshot_, settings_, index_granularity_)
     , skip_indices(indices_to_recalc_)
     , stats(stats_to_recalc_)
     , marks_file_extension(marks_file_extension_)
@@ -157,14 +165,14 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     , compute_granularity(index_granularity.empty())
     , compress_primary_key(settings.compress_primary_key)
     , execution_stats(skip_indices.size(), stats.size())
-    , log(getLogger(storage.getLogName() + " (DataPartWriter)"))
+    , log(getLogger(logger_name_ + " (DataPartWriter)"))
 {
     if (settings.blocks_are_granules_size && !index_granularity.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR,
                         "Can't take information about index granularity from blocks, when non empty index_granularity array specified");
 
-    if (!data_part->getDataPartStorage().exists())
-        data_part->getDataPartStorage().createDirectories();
+    if (!getDataPartStorage().exists())
+        getDataPartStorage().createDirectories();
 
     if (settings.rewrite_primary_key)
         initPrimaryIndex();
@@ -223,7 +231,7 @@ static size_t computeIndexGranularityImpl(
 
 size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const
 {
-    const auto storage_settings = storage.getSettings();
+//    const auto storage_settings = storage.getSettings();
     return computeIndexGranularityImpl(
             block,
             storage_settings->index_granularity_bytes,
@@ -237,7 +245,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex()
     if (metadata_snapshot->hasPrimaryKey())
     {
         String index_name = "primary" + getIndexExtension(compress_primary_key);
-        index_file_stream = data_part->getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings);
+        index_file_stream = getDataPartStorage().writeFile(index_name, DBMS_DEFAULT_BUFFER_SIZE, settings.query_write_settings);
         index_file_hashing_stream = std::make_unique<HashingWriteBuffer>(*index_file_stream);
 
         if (compress_primary_key)
@@ -256,7 +264,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics()
         String stats_name = stat_ptr->getFileName();
         stats_streams.emplace_back(std::make_unique<MergeTreeDataPartWriterOnDisk::Stream<true>>(
                                        stats_name,
-                                       data_part->getDataPartStoragePtr(),
+                                       data_part_storage,
                                        stats_name, STAT_FILE_SUFFIX,
                                        default_codec, settings.max_compress_block_size,
                                        settings.query_write_settings));
@@ -275,7 +283,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
         skip_indices_streams.emplace_back(
                 std::make_unique<MergeTreeDataPartWriterOnDisk::Stream<false>>(
                         stream_name,
-                        data_part->getDataPartStoragePtr(),
+                        data_part_storage,
                         stream_name, skip_index->getSerializedFileExtension(),
                         stream_name, marks_file_extension,
                         default_codec, settings.max_compress_block_size,
@@ -285,7 +293,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
         GinIndexStorePtr store = nullptr;
         if (typeid_cast<const MergeTreeIndexFullText *>(&*skip_index) != nullptr)
         {
-            store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
+            store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, /*storage.getSettings()*/storage_settings->max_digestion_size_per_segment);
             gin_index_stores[stream_name] = store;
         }
         skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings));
@@ -498,7 +506,7 @@ void MergeTreeDataPartWriterOnDisk::finishStatisticsSerialization(bool sync)
     }
 
     for (size_t i = 0; i < stats.size(); ++i)
-        LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part->name);
+        LOG_DEBUG(log, "Spent {} ms calculating statistics {} for the part {}", execution_stats.statistics_build_us[i] / 1000, stats[i]->columnName(), data_part_name);
 }
 
 void MergeTreeDataPartWriterOnDisk::fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums)
@@ -524,7 +532,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync)
         store.second->finalize();
 
     for (size_t i = 0; i < skip_indices.size(); ++i)
-        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part->name);
+        LOG_DEBUG(log, "Spent {} ms calculating index {} for the part {}", execution_stats.skip_indices_build_us[i] / 1000, skip_indices[i]->index.name, data_part_name);
 
     gin_index_stores.clear();
     skip_indices_streams.clear();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index 9f2cc3970fa..39f33217b57 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -104,7 +104,14 @@ public:
     using StatisticStreamPtr = std::unique_ptr<Stream<true>>;
 
     MergeTreeDataPartWriterOnDisk(
-        const MergeTreeMutableDataPartPtr & data_part_,
+//        const MergeTreeMutableDataPartPtr & data_part_,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 6a3b08d4d65..1f68a9d31a1 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -76,7 +76,14 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
 }
 
 MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
-    const MergeTreeMutableDataPartPtr & data_part_,
+//    const MergeTreeMutableDataPartPtr & data_part_,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@@ -85,14 +92,16 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
-    : MergeTreeDataPartWriterOnDisk(data_part_, columns_list_, metadata_snapshot_,
-           indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
-           default_codec_, settings_, index_granularity_)
+    : MergeTreeDataPartWriterOnDisk(
+            data_part_name_, logger_name_, serializations_,
+            data_part_storage_, index_granularity_info_, storage_settings_,
+            columns_list_, metadata_snapshot_,
+            indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
+            default_codec_, settings_, index_granularity_)
 {
-    auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
-        auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
+        auto compression = getCodecDescOrDefault(column.name, default_codec);
         addStreams(column, compression);
     }
 }
@@ -105,7 +114,7 @@ void MergeTreeDataPartWriterWide::addStreams(
     {
         assert(!substream_path.empty());
 
-        auto storage_settings = storage.getSettings();
+//        auto storage_settings = storage.getSettings();
         auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path);
 
         String stream_name;
@@ -149,7 +158,7 @@ void MergeTreeDataPartWriterWide::addStreams(
 
         column_streams[stream_name] = std::make_unique<Stream<false>>(
             stream_name,
-            data_part->getDataPartStoragePtr(),
+            data_part_storage,
             stream_name, DATA_FILE_EXTENSION,
             stream_name, marks_file_extension,
             compression_codec,
@@ -163,7 +172,7 @@ void MergeTreeDataPartWriterWide::addStreams(
     };
 
     ISerialization::SubstreamPath path;
-    data_part->getSerialization(column.name)->enumerateStreams(callback, column.type);
+    getSerialization(column.name)->enumerateStreams(callback, column.type);
 }
 
 const String & MergeTreeDataPartWriterWide::getStreamName(
@@ -264,7 +273,7 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
     {
         auto & column = block_to_write.getByName(it->name);
 
-        if (data_part->getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE)
+        if (getSerialization(it->name)->getKind() != ISerialization::Kind::SPARSE)
             column.column = recursiveRemoveSparse(column.column);
 
         if (permutation)
@@ -334,7 +343,7 @@ StreamsWithMarks MergeTreeDataPartWriterWide::getCurrentMarksForColumn(
             min_compress_block_size = value->safeGet<UInt64>();
     if (!min_compress_block_size)
         min_compress_block_size = settings.min_compress_block_size;
-    data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
+    getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
     {
         bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
         auto stream_name = getStreamName(column, substream_path);
@@ -368,7 +377,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule(
     ISerialization::SerializeBinaryBulkSettings & serialize_settings,
     const Granule & granule)
 {
-    const auto & serialization = data_part->getSerialization(name_and_type.name);
+    const auto & serialization = getSerialization(name_and_type.name);
     serialization->serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state);
 
     /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
@@ -398,7 +407,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
 
     const auto & [name, type] = name_and_type;
     auto [it, inserted] = serialization_states.emplace(name, nullptr);
-    auto serialization = data_part->getSerialization(name_and_type.name);
+    auto serialization = getSerialization(name_and_type.name);
 
     if (inserted)
     {
@@ -407,11 +416,11 @@ void MergeTreeDataPartWriterWide::writeColumn(
         serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
     }
 
-    const auto & global_settings = storage.getContext()->getSettingsRef();
+//    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
     serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
-    serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
 
     for (const auto & granule : granules)
     {
@@ -460,7 +469,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
 void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePair & name_type)
 {
     const auto & [name, type] = name_type;
-    const auto & serialization = data_part->getSerialization(name_type.name);
+    const auto & serialization = getSerialization(name_type.name);
 
     if (!type->isValueRepresentedByNumber() || type->haveSubtypes() || serialization->getKind() != ISerialization::Kind::DEFAULT)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot validate column of non fixed type {}", type->getName());
@@ -470,21 +479,21 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
     String bin_path = escaped_name + DATA_FILE_EXTENSION;
 
     /// Some columns may be removed because of ttl. Skip them.
-    if (!data_part->getDataPartStorage().exists(mrk_path))
+    if (!getDataPartStorage().exists(mrk_path))
         return;
 
-    auto mrk_file_in = data_part->getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt);
+    auto mrk_file_in = getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt);
     std::unique_ptr<ReadBuffer> mrk_in;
-    if (data_part->index_granularity_info.mark_type.compressed)
+    if (index_granularity_info.mark_type.compressed)
         mrk_in = std::make_unique<CompressedReadBufferFromFile>(std::move(mrk_file_in));
     else
         mrk_in = std::move(mrk_file_in);
 
-    DB::CompressedReadBufferFromFile bin_in(data_part->getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt));
+    DB::CompressedReadBufferFromFile bin_in(getDataPartStorage().readFile(bin_path, {}, std::nullopt, std::nullopt));
     bool must_be_last = false;
     UInt64 offset_in_compressed_file = 0;
     UInt64 offset_in_decompressed_block = 0;
-    UInt64 index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity;
+    UInt64 index_granularity_rows = index_granularity_info.fixed_index_granularity;
 
     size_t mark_num;
 
@@ -500,7 +509,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
         if (settings.can_use_adaptive_granularity)
             readBinaryLittleEndian(index_granularity_rows, *mrk_in);
         else
-            index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity;
+            index_granularity_rows = index_granularity_info.fixed_index_granularity;
 
         if (must_be_last)
         {
@@ -533,7 +542,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
                             ErrorCodes::LOGICAL_ERROR,
                             "Incorrect mark rows for part {} for mark #{}"
                             " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}",
-                            data_part->getDataPartStorage().getFullPath(),
+                            getDataPartStorage().getFullPath(),
                             mark_num, offset_in_compressed_file, offset_in_decompressed_block,
                             index_granularity.getMarkRows(mark_num), index_granularity_rows,
                             index_granularity.getMarksCount());
@@ -596,10 +605,10 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
 
 void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove)
 {
-    const auto & global_settings = storage.getContext()->getSettingsRef();
+//    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
-    serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
     WrittenOffsetColumns offset_columns;
     if (rows_written_in_last_mark > 0)
     {
@@ -622,7 +631,7 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum
             if (!serialization_states.empty())
             {
                 serialize_settings.getter = createStreamGetter(*it, written_offset_columns ? *written_offset_columns : offset_columns);
-                data_part->getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]);
+                getSerialization(it->name)->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[it->name]);
             }
 
             if (write_final_mark)
@@ -665,7 +674,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync)
     {
         if (column.type->isValueRepresentedByNumber()
             && !column.type->haveSubtypes()
-            && data_part->getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT)
+            && getSerialization(column.name)->getKind() == ISerialization::Kind::DEFAULT)
         {
             validateColumnOfFixedSize(column);
         }
@@ -708,7 +717,7 @@ void MergeTreeDataPartWriterWide::writeFinalMark(
 {
     writeSingleMark(column, offset_columns, 0);
     /// Memoize information about offsets
-    data_part->getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
+    getSerialization(column.name)->enumerateStreams([&] (const ISerialization::SubstreamPath & substream_path)
     {
         bool is_offsets = !substream_path.empty() && substream_path.back().type == ISerialization::Substream::ArraySizes;
         if (is_offsets)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index f5ff323563d..ef9c4ab17dc 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -21,7 +21,14 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk
 {
 public:
     MergeTreeDataPartWriterWide(
-        const MergeTreeMutableDataPartPtr & data_part,
+//        const MergeTreeMutableDataPartPtr & data_part,
+        const String & data_part_name_,
+        const String & logger_name_,
+        const SerializationByName & serializations_,
+        MutableDataPartStoragePtr data_part_storage_,
+        const MergeTreeIndexGranularityInfo & index_granularity_info_,
+        const MergeTreeSettingsPtr & storage_settings_,
+
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index daa163d741c..0f05c171230 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -600,7 +600,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
         indices,
         MergeTreeStatisticsFactory::instance().getMany(metadata_snapshot->getColumns()),
         compression_codec,
-        context->getCurrentTransaction(),
+        context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID,
         false,
         false,
         context->getWriteSettings());
@@ -738,7 +738,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
         MergeTreeIndices{},
         Statistics{}, /// TODO(hanfei): It should be helpful to write statistics for projection result.
         compression_codec,
-        NO_TRANSACTION_PTR,
+        Tx::PrehistoricTID,
         false, false, data.getContext()->getWriteSettings());
 
     out->writeWithPermutation(block, perm_ptr);
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index ddeaf69136a..c2ef7f98388 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -12,6 +12,7 @@
 #include <Common/FieldVisitorToString.h>
 #include <Common/FieldVisitorHash.h>
 #include <Common/typeid_cast.h>
+#include "Interpreters/Context_fwd.h"
 #include <base/hex.h>
 #include <Core/Block.h>
 
@@ -413,12 +414,14 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM
         partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {});
 }
 
-std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const
+std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(/*const MergeTreeData & storage,*/
+    StorageMetadataPtr metadata_snapshot, ContextPtr storage_context,
+    IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const
 {
-    auto metadata_snapshot = storage.getInMemoryMetadataPtr();
-    const auto & context = storage.getContext();
-    const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block;
-    return store(partition_key_sample, data_part_storage, checksums, context->getWriteSettings());
+//    auto metadata_snapshot = storage.getInMemoryMetadataPtr();
+//    const auto & context = storage.getContext();
+    const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block;
+    return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings());
 }
 
 std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const
diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h
index 78b141f26ec..04175d6f927 100644
--- a/src/Storages/MergeTree/MergeTreePartition.h
+++ b/src/Storages/MergeTree/MergeTreePartition.h
@@ -44,7 +44,9 @@ public:
 
     /// Store functions return write buffer with written but not finalized data.
     /// User must call finish() for returned object.
-    [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const MergeTreeData & storage, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const;
+    [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(//const MergeTreeData & storage,
+        StorageMetadataPtr metadata_snapshot, ContextPtr storage_context,
+        IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const;
     [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const;
 
     void assign(const MergeTreePartition & other) { value = other.value; }
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 9f641fd8eb5..2441d941952 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -21,35 +21,40 @@ MergedBlockOutputStream::MergedBlockOutputStream(
     const MergeTreeIndices & skip_indices,
     const Statistics & statistics,
     CompressionCodecPtr default_codec_,
-    const MergeTreeTransactionPtr & txn,
+    TransactionID tid,
     bool reset_columns_,
     bool blocks_are_granules_size,
     const WriteSettings & write_settings_,
     const MergeTreeIndexGranularity & computed_index_granularity)
-    : IMergedBlockOutputStream(data_part, metadata_snapshot_, columns_list_, reset_columns_)
+    : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, reset_columns_)
     , columns_list(columns_list_)
     , default_codec(default_codec_)
     , write_settings(write_settings_)
 {
     MergeTreeWriterSettings writer_settings(
-        storage.getContext()->getSettings(),
+        data_part->storage.getContext()->getSettings(),
         write_settings,
-        storage.getSettings(),
+        storage_settings,
         data_part->index_granularity_info.mark_type.adaptive,
         /* rewrite_primary_key = */ true,
         blocks_are_granules_size);
 
+// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart
     if (data_part->isStoredOnDisk())
         data_part_storage->createDirectories();
 
-    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
-    TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID;
+//    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
+//    TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID;
     /// NOTE do not pass context for writing to system.transactions_info_log,
     /// because part may have temporary name (with temporary block numbers). Will write it later.
     data_part->version.setCreationTID(tid, nullptr);
     data_part->storeVersionMetadata();
 
-    writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, statistics, default_codec, writer_settings, computed_index_granularity);
+    writer = createMergeTreeDataPartWriter(data_part->getType(),
+            data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
+            data_part_storage, data_part->index_granularity_info,
+            storage_settings,
+            columns_list, metadata_snapshot, skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity);
 }
 
 /// If data is pre-sorted.
@@ -208,7 +213,7 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
 
     if (new_part->isProjectionPart())
     {
-        if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part))
+        if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part))
         {
             auto count_out = new_part->getDataPartStorage().writeFile("count.txt", 4096, write_settings);
             HashingWriteBuffer count_out_hashing(*count_out);
@@ -234,14 +239,16 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
             written_files.emplace_back(std::move(out));
         }
 
-        if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+        if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
         {
-            if (auto file = new_part->partition.store(storage, new_part->getDataPartStorage(), checksums))
+            if (auto file = new_part->partition.store(//storage,
+                new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(),
+             new_part->getDataPartStorage(), checksums))
                 written_files.emplace_back(std::move(file));
 
             if (new_part->minmax_idx->initialized)
             {
-                auto files = new_part->minmax_idx->store(storage, new_part->getDataPartStorage(), checksums);
+                auto files = new_part->minmax_idx->store(new_part->storage, new_part->getDataPartStorage(), checksums);
                 for (auto & file : files)
                     written_files.emplace_back(std::move(file));
             }
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h
index 540b3b3bffa..c1e3d75fefc 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.h
@@ -22,7 +22,7 @@ public:
         const MergeTreeIndices & skip_indices,
         const Statistics & statistics,
         CompressionCodecPtr default_codec_,
-        const MergeTreeTransactionPtr & txn,
+        TransactionID tid,
         bool reset_columns_ = false,
         bool blocks_are_granules_size = false,
         const WriteSettings & write_settings = {},
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 728b2e38833..51853384012 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -20,11 +20,11 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
     WrittenOffsetColumns * offset_columns_,
     const MergeTreeIndexGranularity & index_granularity,
     const MergeTreeIndexGranularityInfo * index_granularity_info)
-    : IMergedBlockOutputStream(data_part, metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true)
+    : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, header_.getNamesAndTypesList(), /*reset_columns=*/ true)
     , header(header_)
 {
     const auto & global_settings = data_part->storage.getContext()->getSettings();
-    const auto & storage_settings = data_part->storage.getSettings();
+//    const auto & storage_settings = data_part->storage.getSettings();
 
     MergeTreeWriterSettings writer_settings(
         global_settings,
@@ -33,11 +33,16 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
         index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(),
         /* rewrite_primary_key = */ false);
 
-    writer = data_part->getWriter(
+    writer = createMergeTreeDataPartWriter(
+            data_part->getType(),
+            data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
+            data_part_storage, data_part->index_granularity_info,
+            storage_settings,
         header.getNamesAndTypesList(),
         metadata_snapshot_,
         indices_to_recalc,
         stats_to_recalc_,
+        data_part->getMarksFileExtension(),
         default_codec,
         writer_settings,
         index_granularity);
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 55d845dfbb9..54077055d96 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1660,7 +1660,7 @@ private:
             skip_indices,
             stats_to_rewrite,
             ctx->compression_codec,
-            ctx->txn,
+            ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID,
             /*reset_columns=*/ true,
             /*blocks_are_granules_size=*/ false,
             ctx->context->getWriteSettings(),

From 32b8aba8ef1bf9a0b890065a5d719a002cee8bb5 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 10 May 2024 14:12:34 +0200
Subject: [PATCH 101/392] Style

---
 src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index c67e148d011..b46fbc5fc9e 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -6,6 +6,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int LOGICAL_ERROR;
     extern const int NO_SUCH_COLUMN_IN_TABLE;
 }
 
@@ -144,7 +145,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
         const MergeTreeIndexGranularity & computed_index_granularity);
 
 
-
 MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         MergeTreeDataPartType part_type,
         const String & data_part_name_,

From 60c721c21b645bad32dbe361b502e9132474793a Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 10 May 2024 12:20:27 +0000
Subject: [PATCH 102/392] Fix build after conflict resolution

---
 src/Functions/FunctionsConversion.cpp          |  3 ++-
 src/Storages/MergeTree/MergeTreeReaderWide.cpp | 11 +++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 90703947182..8f5d11b05ee 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -44,6 +44,7 @@
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/Serializations/SerializationDecimal.h>
 #include <Formats/FormatSettings.h>
+#include <Formats/FormatFactory.h>
 #include <Functions/CastOverloadResolver.h>
 #include <Functions/DateTimeTransforms.h>
 #include <Functions/FunctionFactory.h>
@@ -5057,7 +5058,7 @@ private:
                 }
                 else if (from_type->getCustomSerialization())
                 {
-                    ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
+                    ret = [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
                     {
                         return ConvertImplGenericToString<typename ToDataType::ColumnType>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
                     };
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index 9468cffd25d..b7eefab112c 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -249,7 +249,7 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const
     auto marks_loader = std::make_shared<MergeTreeMarksLoader>(
         data_part_info_for_read,
         mark_cache,
-        data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(*stream_name),
+        data_part_info_for_read->getIndexGranularityInfo().getMarksFilePath(stream_name),
         num_marks_in_part,
         data_part_info_for_read->getIndexGranularityInfo(),
         settings.save_marks_in_cache,
@@ -257,24 +257,23 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const
         load_marks_threadpool,
         /*num_columns_in_mark=*/ 1);
 
-    has_any_stream = true;
     auto stream_settings = settings;
     stream_settings.is_low_cardinality_dictionary = substream_path.size() > 1 && substream_path[substream_path.size() - 2].type == ISerialization::Substream::Type::DictionaryKeys;
 
     auto create_stream = [&]<typename Stream>()
     {
         return std::make_unique<Stream>(
-            data_part_info_for_read->getDataPartStorage(), *stream_name, DATA_FILE_EXTENSION,
+            data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION,
             num_marks_in_part, all_mark_ranges, stream_settings,
-            uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(*stream_name + DATA_FILE_EXTENSION),
+            uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION),
             std::move(marks_loader), profile_callback, clock_type);
     };
 
     if (read_without_marks)
-        return streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>());
+        return streams.emplace(stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>()).first;
     
     marks_loader->startAsyncLoad();
-    return streams.emplace(*stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumn>());
+    return streams.emplace(stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumn>()).first;
 }
 
 ReadBuffer * MergeTreeReaderWide::getStream(

From fb20e80db417f63ed7a12036488accb9f418f261 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 10 May 2024 13:23:19 +0000
Subject: [PATCH 103/392] Better test, fix style

---
 src/Functions/FunctionsConversion.cpp         | 62 ++++++++++++-------
 .../MergeTree/MergeTreeReaderWide.cpp         |  2 +-
 ...9_dynamic_all_merge_algorithms_2.reference | 20 +++---
 .../03039_dynamic_all_merge_algorithms_2.sh   |  8 +--
 4 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 8f5d11b05ee..5bb6fa065de 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -576,7 +576,7 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col)
 template <typename StringColumnType>
 struct ConvertImplGenericToString
 {
-    static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const FormatSettings & format_settings)
+    static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/, const ContextPtr & context)
     {
         static_assert(std::is_same_v<StringColumnType, ColumnString> || std::is_same_v<StringColumnType, ColumnFixedString>,
                 "Can be used only to serialize to ColumnString or ColumnFixedString");
@@ -597,6 +597,7 @@ struct ConvertImplGenericToString
 
             auto & write_buffer = write_helper.getWriteBuffer();
 
+            FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{};
             auto serialization = type.getDefaultSerialization();
             for (size_t row = 0; row < size; ++row)
             {
@@ -1820,7 +1821,7 @@ struct ConvertImpl
 template <bool throw_on_error>
 struct ConvertImplGenericFromString
 {
-    static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count)
+    static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count, const ContextPtr & context)
     {
         const IColumn & column_from = *arguments[0].column;
         const IDataType & data_type_to = *result_type;
@@ -1828,7 +1829,7 @@ struct ConvertImplGenericFromString
         auto serialization = data_type_to.getDefaultSerialization();
         const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr;
 
-        executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get());
+        executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get(), context);
         return res;
     }
 
@@ -1838,11 +1839,12 @@ struct ConvertImplGenericFromString
         const ISerialization & serialization_from,
         size_t input_rows_count,
         const PaddedPODArray<UInt8> * null_map,
-        const IDataType * result_type)
+        const IDataType * result_type,
+        const ContextPtr & context)
     {
         column_to.reserve(input_rows_count);
 
-        FormatSettings format_settings;
+        FormatSettings format_settings = context ? getFormatSettings(context) : FormatSettings{};
         for (size_t i = 0; i < input_rows_count; ++i)
         {
             if (null_map && (*null_map)[i])
@@ -2299,7 +2301,7 @@ private:
         if constexpr (std::is_same_v<ToDataType, DataTypeString>)
         {
             if (from_type->getCustomSerialization())
-                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
+                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context);
         }
 
         bool done = false;
@@ -2332,7 +2334,7 @@ private:
             /// Generic conversion of any type to String.
             if (std::is_same_v<ToDataType, DataTypeString>)
             {
-                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
+                return ConvertImplGenericToString<ColumnString>::execute(arguments, result_type, input_rows_count, context);
             }
             else
                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
@@ -3288,8 +3290,17 @@ private:
         if (checkAndGetDataType<DataTypeString>(from_type.get()))
         {
             if (cast_type == CastType::accurateOrNull)
-                return &ConvertImplGenericFromString<false>::execute;
-            return &ConvertImplGenericFromString<true>::execute;
+            {
+                return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
+                {
+                    return ConvertImplGenericFromString<false>::execute(arguments, result_type, column_nullable, input_rows_count, context);
+                };
+            }
+            
+            return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
+            {
+                return ConvertImplGenericFromString<true>::execute(arguments, result_type, column_nullable, input_rows_count, context);
+            };
         }
 
         return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
@@ -3452,7 +3463,10 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString<true>::execute;
+            return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
+            {
+                return ConvertImplGenericFromString<true>::execute(arguments, result_type, column_nullable, input_rows_count, context);
+            };
         }
         else if (const auto * agg_type = checkAndGetDataType<DataTypeAggregateFunction>(from_type_untyped.get()))
         {
@@ -3495,7 +3509,10 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString<true>::execute;
+            return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
+            {
+                return ConvertImplGenericFromString<true>::execute(arguments, result_type, column_nullable, input_rows_count, context);
+            };
         }
 
         DataTypePtr from_type_holder;
@@ -3586,7 +3603,10 @@ private:
         /// Conversion from String through parsing.
         if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
         {
-            return &ConvertImplGenericFromString<true>::execute;
+            return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
+            {
+                return ConvertImplGenericFromString<true>::execute(arguments, result_type, column_nullable, input_rows_count, context);
+            };
         }
 
         const auto * from_type = checkAndGetDataType<DataTypeTuple>(from_type_untyped.get());
@@ -3929,9 +3949,9 @@ private:
         }
         else if (checkAndGetDataType<DataTypeString>(from_type.get()))
         {
-            return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
+            return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count)
             {
-                auto res = ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable();
+                auto res = ConvertImplGenericFromString<true>::execute(arguments, result_type, nullable_source, input_rows_count, context)->assumeMutable();
                 res->finalize();
                 return res;
             };
@@ -4104,8 +4124,8 @@ private:
             args[0].type = removeNullable(removeLowCardinality(args[0].type));
 
             if (cast_type == CastType::accurateOrNull)
-                return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count);
-            return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count);
+                return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count, context);
+            return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count, context);
         };
     }
 
@@ -4265,8 +4285,8 @@ private:
             args[0].type = removeNullable(removeLowCardinality(args[0].type));
 
             if (cast_type == CastType::accurateOrNull)
-                return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count);
-            return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count);
+                return ConvertImplGenericFromString<false>::execute(args, result_type, column_nullable, input_rows_count, context);
+            return ConvertImplGenericFromString<true>::execute(args, result_type, column_nullable, input_rows_count, context);
         };
     }
 
@@ -5020,9 +5040,9 @@ private:
                             wrapped_result_type = makeNullable(result_type);
                         if (this->cast_type == CastType::accurateOrNull)
                             return ConvertImplGenericFromString<false>::execute(
-                                arguments, wrapped_result_type, column_nullable, input_rows_count);
+                                arguments, wrapped_result_type, column_nullable, input_rows_count, context);
                         return ConvertImplGenericFromString<true>::execute(
-                            arguments, wrapped_result_type, column_nullable, input_rows_count);
+                            arguments, wrapped_result_type, column_nullable, input_rows_count, context);
                     };
                     return true;
                 }
@@ -5060,7 +5080,7 @@ private:
                 {
                     ret = [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
                     {
-                        return ConvertImplGenericToString<typename ToDataType::ColumnType>::execute(arguments, result_type, input_rows_count, context ? getFormatSettings(context) : FormatSettings());
+                        return ConvertImplGenericToString<typename ToDataType::ColumnType>::execute(arguments, result_type, input_rows_count, context);
                     };
                     return true;
                 }
diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
index b7eefab112c..b6882fdced9 100644
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@@ -271,7 +271,7 @@ MergeTreeReaderWide::FileStreams::iterator MergeTreeReaderWide::addStream(const
 
     if (read_without_marks)
         return streams.emplace(stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumnWholePart>()).first;
-    
+
     marks_loader->startAsyncLoad();
     return streams.emplace(stream_name, create_stream.operator()<MergeTreeReaderStreamSingleColumn>()).first;
 }
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
index 03c8b4564fa..af6c7d8d567 100644
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
@@ -2,8 +2,8 @@ MergeTree compact + horizontal merge
 CollapsingMergeTree
 100000	String
 100000	UInt64
-50000	UInt64
 50000	String
+50000	UInt64
 VersionedCollapsingMergeTree
 100000	String
 100000	UInt64
@@ -11,34 +11,34 @@ VersionedCollapsingMergeTree
 75000	UInt64
 MergeTree wide + horizontal merge
 CollapsingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 50000	String
 50000	UInt64
 VersionedCollapsingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 75000	String
 75000	UInt64
 MergeTree compact + vertical merge
 CollapsingMergeTree
-100000	UInt64
 100000	String
-50000	UInt64
+100000	UInt64
 50000	String
+50000	UInt64
 VersionedCollapsingMergeTree
-100000	UInt64
 100000	String
-75000	UInt64
+100000	UInt64
 75000	String
+75000	UInt64
 MergeTree wide + vertical merge
 CollapsingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 50000	String
 50000	UInt64
 VersionedCollapsingMergeTree
-100000	UInt64
 100000	String
-75000	UInt64
+100000	UInt64
 75000	String
+75000	UInt64
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
index 5dae9228d0a..f067a99ca19 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
@@ -18,9 +18,9 @@ function test()
     $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
     $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "drop table test"
 
     echo "VersionedCollapsingMergeTree"
@@ -29,9 +29,9 @@ function test()
     $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)"
     $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)"
 
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count()"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
     $CH_CLIENT -q "drop table test"
 }
 

From b20d60858f1286a5e406e2c74036e6ad244fda2b Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 10 May 2024 15:48:32 +0200
Subject: [PATCH 104/392] Pass low cardinality settings

---
 src/Storages/MergeTree/IMergeTreeDataPartWriter.h      | 2 --
 src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp | 8 ++++----
 src/Storages/MergeTree/MergeTreeIOSettings.h           | 5 +++++
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index ec04fd5f8a8..52e21bed2f2 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -69,8 +69,6 @@ protected:
 //    const MergeTreeData & storage; // TODO: remove
 
     const MergeTreeSettingsPtr storage_settings;
-    const size_t low_cardinality_max_dictionary_size = 0;  // TODO: pass it in ctor
-    const bool low_cardinality_use_single_dictionary_for_part = true;  // TODO: pass it in ctor
 
 
     const StorageMetadataPtr metadata_snapshot;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 1f68a9d31a1..713dee87fa8 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -419,8 +419,8 @@ void MergeTreeDataPartWriterWide::writeColumn(
 //    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
     serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
-    serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
 
     for (const auto & granule : granules)
     {
@@ -607,8 +607,8 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum
 {
 //    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
-    serialize_settings.low_cardinality_max_dictionary_size = low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
     WrittenOffsetColumns offset_columns;
     if (rows_written_in_last_mark > 0)
     {
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index 12a83703148..421c62887da 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -74,6 +74,8 @@ struct MergeTreeWriterSettings
         , blocks_are_granules_size(blocks_are_granules_size_)
         , query_write_settings(query_write_settings_)
         , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation)
+        , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size)
+        , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part)
     {
     }
 
@@ -93,6 +95,9 @@ struct MergeTreeWriterSettings
     WriteSettings query_write_settings;
 
     size_t max_threads_for_annoy_index_creation;
+
+    size_t low_cardinality_max_dictionary_size;
+    bool low_cardinality_use_single_dictionary_for_part;
 };
 
 }

From cd3604f23543cbd07f650c1446d54606d06a81cf Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 10 May 2024 14:14:17 +0000
Subject: [PATCH 105/392] Remove trailing whitespaces

---
 src/Functions/FunctionsConversion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 5bb6fa065de..09d0025860a 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -3296,7 +3296,7 @@ private:
                     return ConvertImplGenericFromString<false>::execute(arguments, result_type, column_nullable, input_rows_count, context);
                 };
             }
-            
+
             return [this](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) -> ColumnPtr
             {
                 return ConvertImplGenericFromString<true>::execute(arguments, result_type, column_nullable, input_rows_count, context);

From 5004c225831c1fa1cf8c213673148a1ca299d4e1 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Fri, 10 May 2024 15:25:21 +0200
Subject: [PATCH 106/392] Fix Array and Map support with Keyed hashing

When working with materialized key columns and rows containing Arrays
or Maps (implemented as Tuple's Arrays) with multiple values,
the keyed hash functions were erroneously refusing to proceed, because
they misinterpreted the output vector size.

Close #61497

which was reported as a security issue, but it didn't actually have any
security impact.
The usefulness of keyed hashing over Maps is also questionable, but
we support it for completeness.
---
 src/Functions/FunctionsHashing.h              | 24 ++++++++++++++++++-
 .../0_stateless/02534_keyed_siphash.reference |  3 +++
 .../0_stateless/02534_keyed_siphash.sql       |  7 ++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index 79b33e2f75b..bccdba5ee69 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -49,6 +49,8 @@
 #include <base/bit_cast.h>
 #include <base/unaligned.h>
 
+#include <algorithm>
+
 namespace DB
 {
 
@@ -75,17 +77,29 @@ namespace impl
         ColumnPtr key0;
         ColumnPtr key1;
         bool is_const;
+        const ColumnArray::Offsets * offsets{};
 
         size_t size() const
         {
             assert(key0 && key1);
             assert(key0->size() == key1->size());
+            assert(offsets == nullptr || offsets->size() == key0->size());
+            if (offsets != nullptr)
+                return offsets->back();
             return key0->size();
         }
         SipHashKey getKey(size_t i) const
         {
             if (is_const)
                 i = 0;
+            if (offsets != nullptr)
+            {
+                const auto begin = offsets->begin();
+                auto upper = std::upper_bound(begin, offsets->end(), i);
+                if (upper == offsets->end())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i);
+                i = upper - begin;
+            }
             const auto & key0data = assert_cast<const ColumnUInt64 &>(*key0).getData();
             const auto & key1data = assert_cast<const ColumnUInt64 &>(*key1).getData();
             return {key0data[i], key1data[i]};
@@ -1112,7 +1126,15 @@ private:
 
             typename ColumnVector<ToType>::Container vec_temp(nested_size);
             bool nested_is_first = true;
-            executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
+
+            if constexpr (Keyed)
+            {
+                KeyColumnsType key_cols_tmp{key_cols};
+                key_cols_tmp.offsets = &offsets;
+                executeForArgument(key_cols_tmp, nested_type, nested_column, vec_temp, nested_is_first);
+            }
+            else
+                executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first);
 
             const size_t size = offsets.size();
 
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference
index e3fae07333a..3f478218ff1 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.reference
+++ b/tests/queries/0_stateless/02534_keyed_siphash.reference
@@ -236,3 +236,6 @@ Check asan bug
 0
 Check bug found fuzzing
 9042C6691B1A75F0EA3314B6F55728BB
+Check bug 2 found fuzzing
+608E1FF030C9E206185B112C2A25F1A7
+ABB65AE97711A2E053E324ED88B1D08B
diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql
index 112ae15bf46..fb707109c83 100644
--- a/tests/queries/0_stateless/02534_keyed_siphash.sql
+++ b/tests/queries/0_stateless/02534_keyed_siphash.sql
@@ -338,3 +338,10 @@ SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY s
 SELECT 'Check bug found fuzzing';
 SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL;  -- { serverError NOT_IMPLEMENTED }
 SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1])));
+
+SELECT 'Check bug 2 found fuzzing';
+DROP TABLE IF EXISTS sipHashKeyed_keys;
+CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory;
+INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'});
+SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a;
+DROP TABLE sipHashKeyed_keys;

From a3aff6939c0b3afeeb9e4ab9c6f2992a2c61b543 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 10 May 2024 19:21:16 +0200
Subject: [PATCH 107/392] Protected methods

---
 src/Storages/MergeTree/IMergeTreeDataPartWriter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 52e21bed2f2..6854668a01e 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -49,13 +49,13 @@ public:
     Columns releaseIndexColumns();
     const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
 
+protected:
     SerializationPtr getSerialization(const String & column_name) const;
 
     ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
 
     IDataPartStorage & getDataPartStorage() { return *data_part_storage; }
 
-protected:
 
 //    const MergeTreeMutableDataPartPtr data_part;  // TODO: remove
 

From 9d0ad7ba67b6855344512398b5f924bdad4ece9e Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 14 Jan 2024 11:25:12 +0800
Subject: [PATCH 108/392] original parquet reader

Change-Id: I83a8ec8271edefcd96cb5b3bcd12f6b545d9dec0
---
 .../Impl/Parquet/ParquetColumnReader.h        |  29 +
 .../Formats/Impl/Parquet/ParquetDataBuffer.h  | 179 ++++++
 .../Impl/Parquet/ParquetDataValuesReader.cpp  | 553 ++++++++++++++++++
 .../Impl/Parquet/ParquetDataValuesReader.h    | 263 +++++++++
 .../Impl/Parquet/ParquetLeafColReader.cpp     | 506 ++++++++++++++++
 .../Impl/Parquet/ParquetLeafColReader.h       |  63 ++
 .../Impl/Parquet/ParquetRecordReader.cpp      | 225 +++++++
 .../Impl/Parquet/ParquetRecordReader.h        |  48 ++
 8 files changed, 1866 insertions(+)
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
 create mode 100644 src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h

diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
new file mode 100644
index 00000000000..cfd9d3ba5bd
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+
+namespace parquet
+{
+
+class PageReader;
+class ColumnChunkMetaData;
+class DataPageV1;
+class DataPageV2;
+
+}
+
+namespace DB
+{
+
+class ParquetColumnReader
+{
+public:
+    virtual ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) = 0;
+
+    virtual ~ParquetColumnReader() = default;
+};
+
+using ParquetColReaderPtr = std::unique_ptr<ParquetColumnReader>;
+using ParquetColReaders = std::vector<ParquetColReaderPtr>;
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
new file mode 100644
index 00000000000..1f83c74f9ad
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -0,0 +1,179 @@
+#pragma once
+
+#include <Core/Types.h>
+
+#include <arrow/util/bit_stream_utils.h>
+#include <arrow/util/decimal.h>
+#include <parquet/types.h>
+
+namespace DB
+{
+
+template <typename T> struct ToArrowDecimal;
+
+template <> struct ToArrowDecimal<Decimal<wide::integer<128, signed>>>
+{
+    using ArrowDecimal = arrow::Decimal128;
+};
+
+template <> struct ToArrowDecimal<Decimal<wide::integer<256, signed>>>
+{
+    using ArrowDecimal = arrow::Decimal256;
+};
+
+
+class ParquetDataBuffer
+{
+private:
+
+public:
+    ParquetDataBuffer(const uint8_t * data_, UInt64 avaible_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale)
+        : data(reinterpret_cast<const Int8 *>(data_)), avaible(avaible_), datetime64_scale(datetime64_scale_) {}
+
+    template <typename TValue>
+    void ALWAYS_INLINE readValue(TValue & dst)
+    {
+        checkAvaible(sizeof(TValue));
+        dst = *reinterpret_cast<const TValue *>(data);
+        consume(sizeof(TValue));
+    }
+
+    void ALWAYS_INLINE readBytes(void * dst, size_t bytes)
+    {
+        checkAvaible(bytes);
+        memcpy(dst, data, bytes);
+        consume(bytes);
+    }
+
+    void ALWAYS_INLINE readDateTime64(DateTime64 & dst)
+    {
+        static const int max_scale_num = 9;
+        static const UInt64 pow10[max_scale_num + 1]
+            = {1000000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1};
+        static const UInt64 spd = 60 * 60 * 24;
+        static const UInt64 scaled_day[max_scale_num + 1]
+            = {spd,
+               10 * spd,
+               100 * spd,
+               1000 * spd,
+               10000 * spd,
+               100000 * spd,
+               1000000 * spd,
+               10000000 * spd,
+               100000000 * spd,
+               1000000000 * spd};
+
+        checkAvaible(sizeof(parquet::Int96));
+        auto decoded = parquet::DecodeInt96Timestamp(*reinterpret_cast<const parquet::Int96 *>(data));
+
+        uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale];
+        dst = static_cast<Int64>(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano);
+
+        consume(sizeof(parquet::Int96));
+    }
+
+    /**
+     * This method should only be used to read string whose elements size is small.
+     * Because memcpySmallAllowReadWriteOverflow15 instead of memcpy is used according to ColumnString::indexImpl
+     */
+    void ALWAYS_INLINE readString(ColumnString & column, size_t cursor)
+    {
+        // refer to: PlainByteArrayDecoder::DecodeArrowDense in encoding.cc
+        //           deserializeBinarySSE2 in SerializationString.cpp
+        checkAvaible(4);
+        auto value_len = ::arrow::util::SafeLoadAs<Int32>(getArrowData());
+        if (unlikely(value_len < 0 || value_len > INT32_MAX - 4))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid or corrupted value_len '{}'", value_len);
+        }
+        consume(4);
+        checkAvaible(value_len);
+
+        auto chars_cursor = column.getChars().size();
+        column.getChars().resize(chars_cursor + value_len + 1);
+
+        memcpySmallAllowReadWriteOverflow15(&column.getChars()[chars_cursor], data, value_len);
+        column.getChars().back() = 0;
+
+        column.getOffsets().data()[cursor] = column.getChars().size();
+        consume(value_len);
+    }
+
+    template <is_over_big_decimal TDecimal>
+    void ALWAYS_INLINE readOverBigDecimal(TDecimal * out, Int32 elem_bytes_num)
+    {
+        using TArrowDecimal = typename ToArrowDecimal<TDecimal>::ArrowDecimal;
+
+        checkAvaible(elem_bytes_num);
+
+        // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc
+        auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num);
+        if (unlikely(!status.ok()))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Read parquet decimal failed: {}", status.status().ToString());
+        }
+        status.ValueUnsafe().ToBytes(reinterpret_cast<uint8_t *>(out));
+        consume(elem_bytes_num);
+    }
+
+private:
+    const Int8 * data;
+    UInt64 avaible;
+    const UInt8 datetime64_scale;
+
+    void ALWAYS_INLINE checkAvaible(UInt64 num)
+    {
+        if (unlikely(avaible < num))
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Consuming {} bytes while {} avaible", num, avaible);
+        }
+    }
+
+    const uint8_t * ALWAYS_INLINE getArrowData() { return reinterpret_cast<const uint8_t *>(data); }
+
+    void ALWAYS_INLINE consume(UInt64 num)
+    {
+        data += num;
+        avaible -= num;
+    }
+};
+
+
+class LazyNullMap
+{
+public:
+    LazyNullMap(UInt32 size_) : size(size_), col_nullable(nullptr) {}
+
+    void setNull(UInt32 cursor)
+    {
+        initialize();
+        null_map[cursor] = 1;
+    }
+
+    void setNull(UInt32 cursor, UInt32 count)
+    {
+        initialize();
+        memset(null_map + cursor, 1, count);
+    }
+
+    ColumnPtr getNullableCol() { return col_nullable; }
+
+private:
+    UInt32 size;
+    UInt8 * null_map;
+    ColumnPtr col_nullable;
+
+    void initialize()
+    {
+        if (likely(col_nullable))
+        {
+            return;
+        }
+        auto col = ColumnVector<UInt8>::create(size);
+        null_map = col->getData().data();
+        col_nullable = std::move(col);
+        memset(null_map, 0, size);
+    }
+};
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
new file mode 100644
index 00000000000..659a7a11969
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -0,0 +1,553 @@
+#include "ParquetDataValuesReader.h"
+
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnsNumber.h>
+
+#include <arrow/util/decimal.h>
+
+namespace DB
+{
+
+void RleValuesReader::nextGroup()
+{
+    // refer to:
+    // RleDecoder::NextCounts in rle_encoding.h and VectorizedRleValuesReader::readNextGroup in Spark
+    UInt32 indicator_value = 0;
+    [[maybe_unused]] auto read_res = bit_reader->GetVlqInt(&indicator_value);
+    assert(read_res);
+
+    cur_group_is_packed = indicator_value & 1;
+    cur_group_size = indicator_value >> 1;
+
+    if (cur_group_is_packed)
+    {
+        cur_group_size *= 8;
+        cur_packed_bit_values.resize(cur_group_size);
+        bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size);
+    }
+    else
+    {
+        cur_value = 0;
+        read_res = bit_reader->GetAligned((bit_width + 7) / 8, &cur_value);
+        assert(read_res);
+    }
+    cur_group_cursor = 0;
+
+}
+
+template <typename IndividualVisitor, typename RepeatedVisitor>
+void RleValuesReader::visitValues(
+    UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor)
+{
+    // refer to: VisitNullBitmapInline in visitor_inline.h
+    while (num_values)
+    {
+        nextGroupIfNecessary();
+        auto cur_count = std::min(num_values, curGroupLeft());
+
+        if (cur_group_is_packed)
+        {
+            for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++)
+            {
+                individual_visitor(cur_packed_bit_values[i]);
+            }
+        }
+        else
+        {
+            repeated_visitor(cur_count, cur_value);
+        }
+        cur_group_cursor += cur_count;
+        num_values -= cur_count;
+    }
+}
+
+template <typename IndividualVisitor, typename RepeatedVisitor>
+void RleValuesReader::visitNullableValues(
+    size_t cursor,
+    UInt32 num_values,
+    Int32 max_def_level,
+    LazyNullMap & null_map,
+    IndividualVisitor && individual_visitor,
+    RepeatedVisitor && repeated_visitor)
+{
+    while (num_values)
+    {
+        nextGroupIfNecessary();
+        auto cur_count = std::min(num_values, curGroupLeft());
+
+        if (cur_group_is_packed)
+        {
+            for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++)
+            {
+                if (cur_packed_bit_values[i] == max_def_level)
+                {
+                    individual_visitor(cursor);
+                }
+                else
+                {
+                    null_map.setNull(cursor);
+                }
+                cursor++;
+            }
+        }
+        else
+        {
+            if (cur_value == max_def_level)
+            {
+                repeated_visitor(cursor, cur_count);
+            }
+            else
+            {
+                null_map.setNull(cursor, cur_count);
+            }
+            cursor += cur_count;
+        }
+        cur_group_cursor += cur_count;
+        num_values -= cur_count;
+    }
+}
+
+template <typename IndividualNullVisitor, typename SteppedValidVisitor, typename RepeatedVisitor>
+void RleValuesReader::visitNullableBySteps(
+    size_t cursor,
+    UInt32 num_values,
+    Int32 max_def_level,
+    IndividualNullVisitor && individual_null_visitor,
+    SteppedValidVisitor && stepped_valid_visitor,
+    RepeatedVisitor && repeated_visitor)
+{
+    // refer to:
+    // RleDecoder::GetBatch in rle_encoding.h and TypedColumnReaderImpl::ReadBatchSpaced in column_reader.cc
+    // VectorizedRleValuesReader::readBatchInternal in Spark
+    while (num_values > 0)
+    {
+        nextGroupIfNecessary();
+        auto cur_count = std::min(num_values, curGroupLeft());
+
+        if (cur_group_is_packed)
+        {
+            valid_index_steps.resize(cur_count + 1);
+            valid_index_steps[0] = 0;
+            auto step_idx = 0;
+            auto null_map_cursor = cursor;
+
+            for (auto i = cur_group_cursor; i < cur_group_cursor + cur_count; i++)
+            {
+                if (cur_packed_bit_values[i] == max_def_level)
+                {
+                    valid_index_steps[++step_idx] = 1;
+                }
+                else
+                {
+                    individual_null_visitor(null_map_cursor);
+                    if (unlikely(valid_index_steps[step_idx] == UINT8_MAX))
+                    {
+                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported packed values number");
+                    }
+                    valid_index_steps[step_idx]++;
+                }
+                null_map_cursor++;
+            }
+            valid_index_steps.resize(step_idx + 1);
+            stepped_valid_visitor(cursor, valid_index_steps);
+        }
+        else
+        {
+            repeated_visitor(cur_value == max_def_level, cursor, cur_count);
+        }
+
+        cursor += cur_count;
+        cur_group_cursor += cur_count;
+        num_values -= cur_count;
+    }
+}
+
+template <typename TValue, typename ValueGetter>
+void RleValuesReader::setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter)
+{
+    visitValues(
+        num_values,
+        /* individual_visitor */ [&](Int32 val)
+        {
+            *(res_values++) = val_getter(val);
+        },
+        /* repeated_visitor */ [&](UInt32 count, Int32 val)
+        {
+            std::fill(res_values, res_values + count, val_getter(val));
+            res_values += count;
+        }
+    );
+}
+
+template <typename TValue, typename ValueGetter>
+void RleValuesReader::setValueBySteps(
+    TValue * res_values,
+    const std::vector<UInt8> & col_data_steps,
+    ValueGetter && val_getter)
+{
+    auto step_iterator = col_data_steps.begin();
+    res_values += *(step_iterator++);
+
+    visitValues(
+        col_data_steps.size() - 1,
+        /* individual_visitor */ [&](Int32 val)
+        {
+            *res_values = val_getter(val);
+            res_values += *(step_iterator++);
+        },
+        /* repeated_visitor */ [&](UInt32 count, Int32 val)
+        {
+            auto getted_val = val_getter(val);
+            for (UInt32 i = 0; i < count; i++)
+            {
+                *res_values = getted_val;
+                res_values += *(step_iterator++);
+            }
+        }
+    );
+}
+
+
+namespace
+{
+
+template <typename TColumn, typename TValue = typename TColumn::ValueType>
+TValue * getResizedPrimitiveData(TColumn & column, size_t size)
+{
+    auto old_size = column.size();
+    column.getData().resize(size);
+    memset(column.getData().data() + old_size, 0, sizeof(TValue) * (size - old_size));
+    return column.getData().data();
+}
+
+} // anoynomous namespace
+
+
+template <>
+void ParquetPlainValuesReader<ColumnString>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto & column = *assert_cast<ColumnString *>(col_ptr.get());
+    auto cursor = column.size();
+
+    column.getOffsets().resize(cursor + num_values);
+    auto * offset_data = column.getOffsets().data();
+    auto & chars = column.getChars();
+
+    def_level_reader->visitValues(
+        num_values,
+        /* individual_visitor */ [&](Int32 val)
+        {
+            if (val == max_def_level)
+            {
+                plain_data_buffer.readString(column, cursor);
+            }
+            else
+            {
+                chars.push_back(0);
+                offset_data[cursor] = chars.size();
+                null_map.setNull(cursor);
+            }
+            cursor++;
+        },
+        /* repeated_visitor */ [&](UInt32 count, Int32 val)
+        {
+            if (val == max_def_level)
+            {
+                for (UInt32 i = 0; i < count; i++)
+                {
+                    plain_data_buffer.readString(column, cursor);
+                    cursor++;
+                }
+            }
+            else
+            {
+                null_map.setNull(cursor, count);
+
+                auto chars_size_bak = chars.size();
+                chars.resize(chars_size_bak + count);
+                memset(&chars[chars_size_bak], 0, count);
+
+                auto idx = cursor;
+                cursor += count;
+                // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is avaible
+                for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++)
+                {
+                    offset_data[idx] = ++val_offset;
+                }
+            }
+        }
+    );
+}
+
+
+template <>
+void ParquetPlainValuesReader<ColumnDecimal<DateTime64>>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto cursor = col_ptr->size();
+    auto * column_data = getResizedPrimitiveData(
+        *assert_cast<ColumnDecimal<DateTime64> *>(col_ptr.get()), cursor + num_values);
+
+    def_level_reader->visitNullableValues(
+        cursor,
+        num_values,
+        max_def_level,
+        null_map,
+        /* individual_visitor */ [&](size_t nest_cursor)
+        {
+            plain_data_buffer.readDateTime64(column_data[nest_cursor]);
+        },
+        /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
+        {
+            auto col_data_pos = column_data + nest_cursor;
+            for (UInt32 i = 0; i < count; i++)
+            {
+                plain_data_buffer.readDateTime64(col_data_pos[i]);
+            }
+        }
+    );
+}
+
+template <typename TColumn>
+void ParquetPlainValuesReader<TColumn>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto cursor = col_ptr->size();
+    auto * column_data = getResizedPrimitiveData(*assert_cast<TColumn *>(col_ptr.get()), cursor + num_values);
+    using TValue = std::decay_t<decltype(*column_data)>;
+
+    def_level_reader->visitNullableValues(
+        cursor,
+        num_values,
+        max_def_level,
+        null_map,
+        /* individual_visitor */ [&](size_t nest_cursor)
+        {
+            plain_data_buffer.readValue(column_data[nest_cursor]);
+        },
+        /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
+        {
+            plain_data_buffer.readBytes(column_data + nest_cursor, count * sizeof(TValue));
+        }
+    );
+}
+
+
+template <typename TColumn>
+void ParquetFixedLenPlainReader<TColumn>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    if constexpr (std::same_as<TColumn, ColumnDecimal<Decimal128>> || std::same_as<TColumn, ColumnDecimal<Decimal256>>)
+    {
+        readOverBigDecimal(col_ptr, null_map, num_values);
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported type");
+    }
+}
+
+template <typename TColumnOverBigDecimal>
+void ParquetFixedLenPlainReader<TColumnOverBigDecimal>::readOverBigDecimal(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto cursor = col_ptr->size();
+    auto * column_data = getResizedPrimitiveData(
+        *assert_cast<TColumnOverBigDecimal *>(col_ptr.get()), cursor + num_values);
+
+    def_level_reader->visitNullableValues(
+        cursor,
+        num_values,
+        max_def_level,
+        null_map,
+        /* individual_visitor */ [&](size_t nest_cursor)
+        {
+            plain_data_buffer.readOverBigDecimal(column_data + nest_cursor, elem_bytes_num);
+        },
+        /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
+        {
+            auto col_data_pos = column_data + nest_cursor;
+            for (UInt32 i = 0; i < count; i++)
+            {
+                plain_data_buffer.readOverBigDecimal(col_data_pos + i, elem_bytes_num);
+            }
+        }
+    );
+}
+
+
+template <typename TColumnVector>
+void ParquetRleLCReader<TColumnVector>::readBatch(
+    MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto cursor = index_col->size();
+    auto * column_data = getResizedPrimitiveData(*assert_cast<TColumnVector *>(index_col.get()), cursor + num_values);
+
+    bool has_null = false;
+
+    // in ColumnLowCardinality, first element in dictionary is null
+    // so we should increase each value by 1 in parquet index
+    auto val_getter = [&](Int32 val) { return val + 1; };
+
+    def_level_reader->visitNullableBySteps(
+        cursor,
+        num_values,
+        max_def_level,
+        /* individual_null_visitor */ [&](UInt32 nest_cursor) {
+            column_data[nest_cursor] = 0;
+            has_null = true;
+        },
+        /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+            rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
+        },
+        /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) {
+            if (is_valid)
+            {
+                rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
+            }
+            else
+            {
+                auto data_pos = column_data + nest_cursor;
+                std::fill(data_pos, data_pos + count, 0);
+                has_null = true;
+            }
+        }
+    );
+    if (has_null)
+    {
+        null_map.setNull(0);
+    }
+}
+
+template <>
+void ParquetRleDictReader<ColumnString>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto & column = *assert_cast<ColumnString *>(col_ptr.get());
+    auto cursor = column.size();
+    std::vector<Int32> value_cache;
+
+    const auto & dict_chars = static_cast<const ColumnString &>(page_dictionary).getChars();
+    const auto & dict_offsets = static_cast<const ColumnString &>(page_dictionary).getOffsets();
+
+    column.getOffsets().resize(cursor + num_values);
+    auto * offset_data = column.getOffsets().data();
+    auto & chars = column.getChars();
+
+    auto append_nulls = [&](UInt8 num) {
+        for (auto limit = cursor + num; cursor < limit; cursor++)
+        {
+            chars.push_back(0);
+            offset_data[cursor] = chars.size();
+            null_map.setNull(cursor);
+        }
+    };
+
+    auto append_string = [&](Int32 dict_idx) {
+        auto dict_chars_cursor = dict_offsets[dict_idx - 1];
+        auto value_len = dict_offsets[dict_idx] - dict_chars_cursor;
+        auto chars_cursor = chars.size();
+        chars.resize(chars_cursor + value_len);
+
+        memcpySmallAllowReadWriteOverflow15(&chars[chars_cursor], &dict_chars[dict_chars_cursor], value_len);
+        offset_data[cursor] = chars.size();
+        cursor++;
+    };
+
+    auto val_getter = [&](Int32 val) { return val + 1; };
+
+    def_level_reader->visitNullableBySteps(
+        cursor,
+        num_values,
+        max_def_level,
+        /* individual_null_visitor */ [&](UInt32) {},
+        /* stepped_valid_visitor */ [&](UInt32, const std::vector<UInt8> & valid_index_steps) {
+            value_cache.resize(valid_index_steps.size());
+            rle_data_reader->setValues(value_cache.data() + 1, valid_index_steps.size() - 1, val_getter);
+
+            append_nulls(valid_index_steps[0]);
+            for (size_t i = 1; i < valid_index_steps.size(); i++)
+            {
+                append_string(value_cache[i]);
+                append_nulls(valid_index_steps[i] - 1);
+            }
+        },
+        /* repeated_visitor */ [&](bool is_valid, UInt32, UInt32 count) {
+            if (is_valid)
+            {
+                value_cache.resize(count);
+                rle_data_reader->setValues(value_cache.data(), count, val_getter);
+                for (UInt32 i = 0; i < count; i++)
+                {
+                    append_string(value_cache[i]);
+                }
+            }
+            else
+            {
+                append_nulls(count);
+            }
+        }
+    );
+}
+
+template <typename TColumnVector>
+void ParquetRleDictReader<TColumnVector>::readBatch(
+    MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
+{
+    auto cursor = col_ptr->size();
+    auto * column_data = getResizedPrimitiveData(*assert_cast<TColumnVector *>(col_ptr.get()), cursor + num_values);
+    const auto & dictionary_array = static_cast<const TColumnVector &>(page_dictionary).getData();
+
+    auto val_getter = [&](Int32 val) { return dictionary_array[val]; };
+    def_level_reader->visitNullableBySteps(
+        cursor,
+        num_values,
+        max_def_level,
+        /* individual_null_visitor */ [&](UInt32 nest_cursor) {
+            null_map.setNull(nest_cursor);
+        },
+        /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+            rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
+        },
+        /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) {
+            if (is_valid)
+            {
+                rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
+            }
+            else
+            {
+                null_map.setNull(nest_cursor, count);
+            }
+        }
+    );
+}
+
+
+template class ParquetPlainValuesReader<ColumnInt32>;
+template class ParquetPlainValuesReader<ColumnInt64>;
+template class ParquetPlainValuesReader<ColumnFloat32>;
+template class ParquetPlainValuesReader<ColumnFloat64>;
+template class ParquetPlainValuesReader<ColumnDecimal<Decimal32>>;
+template class ParquetPlainValuesReader<ColumnDecimal<Decimal64>>;
+template class ParquetPlainValuesReader<ColumnString>;
+
+template class ParquetFixedLenPlainReader<ColumnDecimal<Decimal128>>;
+template class ParquetFixedLenPlainReader<ColumnDecimal<Decimal256>>;
+
+template class ParquetRleLCReader<ColumnUInt8>;
+template class ParquetRleLCReader<ColumnUInt16>;
+template class ParquetRleLCReader<ColumnUInt32>;
+
+template class ParquetRleDictReader<ColumnInt32>;
+template class ParquetRleDictReader<ColumnInt64>;
+template class ParquetRleDictReader<ColumnFloat32>;
+template class ParquetRleDictReader<ColumnFloat64>;
+template class ParquetRleDictReader<ColumnDecimal<Decimal32>>;
+template class ParquetRleDictReader<ColumnDecimal<Decimal64>>;
+template class ParquetRleDictReader<ColumnDecimal<Decimal128>>;
+template class ParquetRleDictReader<ColumnDecimal<Decimal256>>;
+template class ParquetRleDictReader<ColumnDecimal<DateTime64>>;
+template class ParquetRleDictReader<ColumnString>;
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
new file mode 100644
index 00000000000..2c95f495339
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -0,0 +1,263 @@
+#pragma once
+
+#include <functional>
+#include <concepts>
+
+#include <base/logger_useful.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnVector.h>
+#include <Columns/IColumn.h>
+#include <Core/Types.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <IO/ReadBuffer.h>
+#include "ParquetDataBuffer.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int PARQUET_EXCEPTION;
+}
+
+class RleValuesReader
+{
+public:
+    RleValuesReader(std::unique_ptr<arrow::BitUtil::BitReader> bit_reader_, Int32 bit_width_)
+        : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {}
+
+    /**
+     * @brief Used when the bit_width is 0, so all elements have same value.
+     */
+    RleValuesReader(UInt32 total_size, Int32 val = 0)
+        : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false)
+        {}
+
+    void nextGroup();
+
+    void nextGroupIfNecessary() { if (cur_group_cursor >= cur_group_size) nextGroup(); }
+
+    UInt32 curGroupLeft() const { return cur_group_size - cur_group_cursor; }
+
+    /**
+     * @brief Visit num_values elements.
+     * For RLE encoding, for same group, the value is same, so they can be visited repeatedly.
+     * For BitPacked encoding, the values may be different with each other, so they must be visited individual.
+     * 
+     * @tparam IndividualVisitor A callback with signature: void(Int32 val)
+     * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val)
+     */
+    template <typename IndividualVisitor, typename RepeatedVisitor>
+    void visitValues(UInt32 num_values, IndividualVisitor && individual_visitor, RepeatedVisitor && repeated_visitor);
+
+    /**
+     * @brief Visit num_values elements by parsed nullability.
+     * If the parsed value is same as max_def_level, then it is processed as null value.
+     * 
+     * @tparam IndividualVisitor A callback with signature: void(size_t cursor)
+     * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count)
+     * 
+     * Because the null map is processed, so only the callbacks only need to process the valid data.
+     */
+    template <typename IndividualVisitor, typename RepeatedVisitor>
+    void visitNullableValues(
+        size_t cursor,
+        UInt32 num_values,
+        Int32 max_def_level,
+        LazyNullMap & null_map,
+        IndividualVisitor && individual_visitor,
+        RepeatedVisitor && repeated_visitor);
+
+    /**
+     * @brief Visit num_values elements by parsed nullability.
+     * It may be inefficient to process the valid data individually like in visitNullableValues,
+     * so a valid_index_steps index array is generated first, in order to process valid data continuously.
+     * 
+     * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value
+     * @tparam SteppedValidVisitor  A callback with signature:
+     *  void(size_t cursor, const std::vector<UInt8> & valid_index_steps)
+     *  for n valid elements with null value interleaved in a BitPacked group,
+     *  i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element.
+     * 
+     *  take following BitPacked group with 2 valid elements for example:
+     *      null valid null null valid null
+     *  then the valid_index_steps has values [1, 3, 2].
+     *  Please note that the the sum of valid_index_steps is same as elements number in this group.
+     * 
+     * @tparam RepeatedVisitor  A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count)
+     */
+    template <typename IndividualNullVisitor, typename SteppedValidVisitor, typename RepeatedVisitor>
+    void visitNullableBySteps(
+        size_t cursor,
+        UInt32 num_values,
+        Int32 max_def_level,
+        IndividualNullVisitor && null_visitor,
+        SteppedValidVisitor && stepped_valid_visitor,
+        RepeatedVisitor && repeated_visitor);
+
+    /**
+     * @brief Set the Values to column_data directly
+     * 
+     * @tparam TValue The type of column data.
+     * @tparam ValueGetter A callback with signature: TValue(Int32 val)
+     */
+    template <typename TValue, typename ValueGetter>
+    void setValues(TValue * column_data, UInt32 num_values, ValueGetter && val_getter);
+
+    /**
+     * @brief Set the value by valid_index_steps generated in visitNullableBySteps.
+     *  According to visitNullableBySteps, the elements number is valid_index_steps.size()-1,
+     *  so valid_index_steps.size()-1 elements are read, and set to column_data with steps in valid_index_steps
+     */
+    template <typename TValue, typename ValueGetter>
+    void setValueBySteps(
+        TValue * column_data,
+        const std::vector<UInt8> & col_data_steps,
+        ValueGetter && val_getter);
+
+private:
+    std::unique_ptr<arrow::BitUtil::BitReader> bit_reader;
+
+    std::vector<Int32> cur_packed_bit_values;
+    std::vector<UInt8> valid_index_steps;
+
+    Int32 bit_width;
+
+    UInt32 cur_group_size = 0;
+    UInt32 cur_group_cursor = 0;
+    Int32 cur_value;
+    bool cur_group_is_packed;
+};
+
+using RleValuesReaderPtr = std::unique_ptr<RleValuesReader>;
+
+
+class ParquetDataValuesReader
+{
+public:
+    virtual void readBatch(MutableColumnPtr & column, LazyNullMap & null_map, UInt32 num_values) = 0;
+
+    virtual ~ParquetDataValuesReader() = default;
+};
+
+using ParquetDataValuesReaderPtr = std::unique_ptr<ParquetDataValuesReader>;
+
+
+/**
+ * The definition level is RLE or BitPacked encoding, while data is read directly
+ */
+template <typename TColumn>
+class ParquetPlainValuesReader : public ParquetDataValuesReader
+{
+public:
+
+    ParquetPlainValuesReader(
+        Int32 max_def_level_,
+        std::unique_ptr<RleValuesReader> def_level_reader_,
+        ParquetDataBuffer data_buffer_)
+        : max_def_level(max_def_level_)
+        , def_level_reader(std::move(def_level_reader_))
+        , plain_data_buffer(std::move(data_buffer_))
+    {}
+
+    void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override;
+
+private:
+    Int32 max_def_level;
+    std::unique_ptr<RleValuesReader> def_level_reader;
+    ParquetDataBuffer plain_data_buffer;
+};
+
+/**
+ * The data and definition level encoding are same as ParquetPlainValuesReader.
+ * But the element size is const and bigger than primitive data type.
+ */
+template <typename TColumn>
+class ParquetFixedLenPlainReader : public ParquetDataValuesReader
+{
+public:
+
+    ParquetFixedLenPlainReader(
+        Int32 max_def_level_,
+        Int32 elem_bytes_num_,
+        std::unique_ptr<RleValuesReader> def_level_reader_,
+        ParquetDataBuffer data_buffer_)
+        : max_def_level(max_def_level_)
+        , elem_bytes_num(elem_bytes_num_)
+        , def_level_reader(std::move(def_level_reader_))
+        , plain_data_buffer(std::move(data_buffer_))
+    {}
+
+    void readOverBigDecimal(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values);
+
+    void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override;
+
+private:
+    Int32 max_def_level;
+    Int32 elem_bytes_num;
+    std::unique_ptr<RleValuesReader> def_level_reader;
+    ParquetDataBuffer plain_data_buffer;
+};
+
+/**
+ * Read data according to the format of ColumnLowCardinality format.
+ * 
+ * Only index and null column are processed in this class.
+ * And all null value is mapped to first index in dictionary,
+ * so the result index valued is added by one.
+*/
+template <typename TColumnVector>
+class ParquetRleLCReader : public ParquetDataValuesReader
+{
+public:
+    ParquetRleLCReader(
+        Int32 max_def_level_,
+        std::unique_ptr<RleValuesReader> def_level_reader_,
+        std::unique_ptr<RleValuesReader> rle_data_reader_)
+        : max_def_level(max_def_level_)
+        , def_level_reader(std::move(def_level_reader_))
+        , rle_data_reader(std::move(rle_data_reader_))
+    {}
+
+    void readBatch(MutableColumnPtr & index_col, LazyNullMap & null_map, UInt32 num_values) override;
+
+private:
+    Int32 max_def_level;
+    std::unique_ptr<RleValuesReader> def_level_reader;
+    std::unique_ptr<RleValuesReader> rle_data_reader;
+};
+
+/**
+ * The definition level is RLE or BitPacked encoded,
+ * and the index of dictionary is also RLE or BitPacked encoded.
+ * 
+ * while the result is not parsed as a low cardinality column,
+ * instead, a normal column is generated.
+ */
+template <typename TColumn>
+class ParquetRleDictReader : public ParquetDataValuesReader
+{
+public:
+    ParquetRleDictReader(
+        Int32 max_def_level_,
+        std::unique_ptr<RleValuesReader> def_level_reader_,
+        std::unique_ptr<RleValuesReader> rle_data_reader_,
+        const IColumn & page_dictionary_)
+        : max_def_level(max_def_level_)
+        , def_level_reader(std::move(def_level_reader_))
+        , rle_data_reader(std::move(rle_data_reader_))
+        , page_dictionary(page_dictionary_)
+    {}
+
+    void readBatch(MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values) override;
+
+private:
+    Int32 max_def_level;
+    std::unique_ptr<RleValuesReader> def_level_reader;
+    std::unique_ptr<RleValuesReader> rle_data_reader;
+    const IColumn & page_dictionary;
+};
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
new file mode 100644
index 00000000000..00dee9074fe
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -0,0 +1,506 @@
+#include "ParquetLeafColReader.h"
+
+#include <utility>
+
+#include <Columns/ColumnLowCardinality.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnUnique.h>
+#include <Columns/ColumnsNumber.h>
+#include <Core/ColumnWithTypeAndName.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/NumberTraits.h>
+#include <IO/ReadBufferFromMemory.h>
+
+#include <arrow/util/bit_util.h>
+#include <parquet/column_page.h>
+#include <parquet/column_reader.h>
+#include <parquet/metadata.h>
+#include <parquet/schema.h>
+#include <parquet/types.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+    extern const int BAD_ARGUMENTS;
+    extern const int PARQUET_EXCEPTION;
+}
+
+namespace
+{
+
+template <typename TypeVisitor>
+void visitColStrIndexType(size_t data_size, TypeVisitor && visitor)
+{
+    // refer to: DataTypeLowCardinality::createColumnUniqueImpl
+    if (data_size < (1ull << 8))
+    {
+        visitor(static_cast<ColumnUInt8 *>(nullptr));
+    }
+    else if (data_size < (1ull << 16))
+    {
+        visitor(static_cast<ColumnUInt16 *>(nullptr));
+    }
+    else if (data_size < (1ull << 32))
+    {
+        visitor(static_cast<ColumnUInt32 *>(nullptr));
+    }
+    else
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported data size {}", data_size);
+    }
+}
+
+void reserveColumnStrRows(MutableColumnPtr & col, UInt32 rows_num)
+{
+    col->reserve(rows_num);
+
+    /// Never reserve for too big size according to SerializationString::deserializeBinaryBulk
+    if (rows_num < 256 * 1024 * 1024)
+    {
+        try
+        {
+            static_cast<ColumnString *>(col.get())->getChars().reserve(rows_num);
+        }
+        catch (Exception & e)
+        {
+            e.addMessage("(limit = " + toString(rows_num) + ")");
+            throw;
+        }
+    }
+};
+
+
+template <typename TColumn>
+ColumnPtr readDictPage(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & col_des,
+    const DataTypePtr & /* data_type */);
+
+template <>
+ColumnPtr readDictPage<ColumnString>(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & /* col_des */,
+    const DataTypePtr & /* data_type */)
+{
+    auto col = ColumnString::create();
+    col->getOffsets().resize(page.num_values() + 1);
+    col->getChars().reserve(page.num_values());
+    ParquetDataBuffer buffer(page.data(), page.size());
+
+    // will be read as low cardinality column
+    // in which case, the null key is set to first position, so the first string should be empty
+    col->getChars().push_back(0);
+    col->getOffsets()[0] = 1;
+    for (auto i = 1; i <= page.num_values(); i++)
+    {
+        buffer.readString(*col, i);
+    }
+    return col;
+}
+
+template <>
+ColumnPtr readDictPage<ColumnDecimal<DateTime64>>(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & /* col_des */,
+    const DataTypePtr & data_type)
+{
+    auto & datetime_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
+    auto dict_col = ColumnDecimal<DateTime64>::create(page.num_values(), datetime_type.getScale());
+    auto * col_data = dict_col->getData().data();
+    ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale());
+    for (auto i = 0; i < page.num_values(); i++)
+    {
+        buffer.readDateTime64(col_data[i]);
+    }
+    return dict_col;
+}
+
+template <is_col_over_big_decimal TColumnDecimal>
+ColumnPtr readDictPage(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & col_des,
+    const DataTypePtr & /* data_type */)
+{
+    auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale());
+    auto * col_data = dict_col->getData().data();
+    ParquetDataBuffer buffer(page.data(), page.size());
+    for (auto i = 0; i < page.num_values(); i++)
+    {
+        buffer.readOverBigDecimal(col_data + i, col_des.type_length());
+    }
+    return dict_col;
+}
+
+template <is_col_int_decimal TColumnDecimal> requires (!std::is_same_v<typename TColumnDecimal::ValueType, DateTime64>)
+ColumnPtr readDictPage(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & col_des,
+    const DataTypePtr & /* data_type */)
+{
+    auto dict_col = TColumnDecimal::create(page.num_values(), col_des.type_scale());
+    ParquetDataBuffer buffer(page.data(), page.size());
+    buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnDecimal::ValueType));
+    return dict_col;
+}
+
+template <is_col_vector TColumnVector>
+ColumnPtr readDictPage(
+    const parquet::DictionaryPage & page,
+    const parquet::ColumnDescriptor & /* col_des */,
+    const DataTypePtr & /* data_type */)
+{
+    auto dict_col = TColumnVector::create(page.num_values());
+    ParquetDataBuffer buffer(page.data(), page.size());
+    buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(typename TColumnVector::ValueType));
+    return dict_col;
+}
+
+
+template <typename TColumn>
+std::unique_ptr<ParquetDataValuesReader> createPlainReader(
+    const parquet::ColumnDescriptor & col_des,
+    RleValuesReaderPtr def_level_reader,
+    ParquetDataBuffer buffer);
+
+template <is_col_over_big_decimal TColumnDecimal>
+std::unique_ptr<ParquetDataValuesReader> createPlainReader(
+    const parquet::ColumnDescriptor & col_des,
+    RleValuesReaderPtr def_level_reader,
+    ParquetDataBuffer buffer)
+{
+    return std::make_unique<ParquetFixedLenPlainReader<TColumnDecimal>>(
+        col_des.max_definition_level(),
+        col_des.type_length(),
+        std::move(def_level_reader),
+        std::move(buffer));
+}
+
+template <typename TColumn>
+std::unique_ptr<ParquetDataValuesReader> createPlainReader(
+    const parquet::ColumnDescriptor & col_des,
+    RleValuesReaderPtr def_level_reader,
+    ParquetDataBuffer buffer)
+{
+    return std::make_unique<ParquetPlainValuesReader<TColumn>>(
+        col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer));
+}
+
+
+} // anonymous namespace
+
+
+template <typename TColumn>
+ParquetLeafColReader<TColumn>::ParquetLeafColReader(
+    const parquet::ColumnDescriptor & col_descriptor_,
+    DataTypePtr base_type_,
+    std::unique_ptr<parquet::ColumnChunkMetaData> meta_,
+    std::unique_ptr<parquet::PageReader> reader_)
+    : col_descriptor(col_descriptor_)
+    , base_data_type(base_type_)
+    , col_chunk_meta(std::move(meta_))
+    , parquet_page_reader(std::move(reader_))
+    , log(&Poco::Logger::get("ParquetLeafColReader"))
+{
+}
+
+template <typename TColumn>
+ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt32 rows_num, const String & name)
+{
+    reading_rows_num = rows_num;
+    auto readPageIfEmpty = [&]() {
+        while (!cur_page_values) readPage();
+    };
+
+    // make sure the dict page has been read, and the status is updated
+    readPageIfEmpty();
+    resetColumn(rows_num);
+
+    while (rows_num)
+    {
+        // if dictionary page encountered, another page should be read
+        readPageIfEmpty();
+
+        auto read_values = std::min(rows_num, cur_page_values);
+        data_values_reader->readBatch(column, *null_map, read_values);
+
+        cur_page_values -= read_values;
+        rows_num -= read_values;
+    }
+
+    return releaseColumn(name);
+}
+
+template <>
+void ParquetLeafColReader<ColumnString>::resetColumn(UInt32 rows_num)
+{
+    if (reading_low_cardinality)
+    {
+        assert(dictionary);
+        visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *) {
+            column = TColVec::create();
+        });
+
+        // only first position is used
+        null_map = std::make_unique<LazyNullMap>(1);
+        column->reserve(rows_num);
+    }
+    else
+    {
+        null_map = std::make_unique<LazyNullMap>(rows_num);
+        column = ColumnString::create();
+        reserveColumnStrRows(column, rows_num);
+    }
+}
+
+template <typename TColumn>
+void ParquetLeafColReader<TColumn>::resetColumn(UInt32 rows_num)
+{
+    assert(!reading_low_cardinality);
+
+    column = base_data_type->createColumn();
+    column->reserve(rows_num);
+    null_map = std::make_unique<LazyNullMap>(rows_num);
+}
+
+template <typename TColumn>
+void ParquetLeafColReader<TColumn>::degradeDictionary()
+{
+    assert(dictionary && column->size());
+    null_map = std::make_unique<LazyNullMap>(reading_rows_num);
+    auto col_existing = std::move(column);
+    column = ColumnString::create();
+
+    ColumnString & col_dest = *static_cast<ColumnString *>(column.get());
+    const ColumnString & col_dict_str = *static_cast<const ColumnString *>(dictionary.get());
+
+    visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *) {
+        const TColVec & col_src = *static_cast<const TColVec *>(col_existing.get());
+        reserveColumnStrRows(column, reading_rows_num);
+
+        col_dest.getOffsets().resize(col_src.size());
+        for (size_t i = 0; i < col_src.size(); i++)
+        {
+            auto src_idx = col_src.getData()[i];
+            if (0 == src_idx)
+            {
+                null_map->setNull(i);
+            }
+            auto dict_chars_cursor = col_dict_str.getOffsets()[src_idx - 1];
+            auto str_len = col_dict_str.getOffsets()[src_idx] - dict_chars_cursor;
+            auto dst_chars_cursor = col_dest.getChars().size();
+            col_dest.getChars().resize(dst_chars_cursor + str_len);
+
+            memcpySmallAllowReadWriteOverflow15(
+                &col_dest.getChars()[dst_chars_cursor], &col_dict_str.getChars()[dict_chars_cursor], str_len);
+            col_dest.getOffsets()[i] = col_dest.getChars().size();
+        }
+    });
+    LOG_INFO(log, "degraded dictionary to normal column");
+}
+
+template <typename TColumn>
+ColumnWithTypeAndName ParquetLeafColReader<TColumn>::releaseColumn(const String & name)
+{
+    DataTypePtr data_type = base_data_type;
+    if (reading_low_cardinality)
+    {
+        MutableColumnPtr col_unique;
+        if (null_map->getNullableCol())
+        {
+            data_type = std::make_shared<DataTypeNullable>(data_type);
+            col_unique = ColumnUnique<TColumn>::create(dictionary->assumeMutable(), true);
+        }
+        else
+        {
+            col_unique = ColumnUnique<TColumn>::create(dictionary->assumeMutable(), false);
+        }
+        column = ColumnLowCardinality::create(std::move(col_unique), std::move(column), true);
+        data_type = std::make_shared<DataTypeLowCardinality>(data_type);
+    }
+    else
+    {
+        if (null_map->getNullableCol())
+        {
+            column = ColumnNullable::create(std::move(column), null_map->getNullableCol()->assumeMutable());
+            data_type = std::make_shared<DataTypeNullable>(data_type);
+        }
+    }
+    ColumnWithTypeAndName res = {std::move(column), data_type, name};
+    column = nullptr;
+    null_map = nullptr;
+
+    return res;
+}
+
+template <typename TColumn>
+void ParquetLeafColReader<TColumn>::readPage()
+{
+    // refer to: ColumnReaderImplBase::ReadNewPage in column_reader.cc
+    auto cur_page = parquet_page_reader->NextPage();
+    switch (cur_page->type())
+    {
+        case parquet::PageType::DATA_PAGE:
+            readPageV1(*std::static_pointer_cast<parquet::DataPageV1>(cur_page));
+            break;
+        case parquet::PageType::DATA_PAGE_V2:
+            readPageV2(*std::static_pointer_cast<parquet::DataPageV2>(cur_page));
+            break;
+        case parquet::PageType::DICTIONARY_PAGE:
+        {
+            const parquet::DictionaryPage & dict_page = *std::static_pointer_cast<parquet::DictionaryPage>(cur_page);
+            if (unlikely(
+                dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY
+                && dict_page.encoding() != parquet::Encoding::PLAIN))
+            {
+                throw new Exception(
+                    ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding());
+            }
+            LOG_INFO(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name());
+
+            dictionary = readDictPage<TColumn>(dict_page, col_descriptor, base_data_type);
+            if (std::is_same_v<TColumn, ColumnString>)
+            {
+                reading_low_cardinality = true;
+            }
+            break;
+        }
+        default:
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported page type: {}", cur_page->type());
+    }
+}
+
+template <typename TColumn>
+void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
+{
+    static parquet::LevelDecoder repetition_level_decoder;
+
+    cur_page_values = page.num_values();
+
+    // refer to: VectorizedColumnReader::readPageV1 in Spark and LevelDecoder::SetData in column_reader.cc
+    if (page.definition_level_encoding() != parquet::Encoding::RLE && col_descriptor.max_definition_level() != 0)
+    {
+        throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.definition_level_encoding());
+    }
+    const auto * buffer =  page.data();
+    auto max_size = page.size();
+
+    if (col_descriptor.max_repetition_level() > 0)
+    {
+        auto rep_levels_bytes = repetition_level_decoder.SetData(
+            page.repetition_level_encoding(), col_descriptor.max_repetition_level(), 0, buffer, max_size);
+        buffer += rep_levels_bytes;
+        max_size -= rep_levels_bytes;
+    }
+
+    assert(col_descriptor.max_definition_level() >= 0);
+    std::unique_ptr<RleValuesReader> def_level_reader;
+    if (col_descriptor.max_definition_level() > 0) {
+        auto bit_width = arrow::BitUtil::Log2(col_descriptor.max_definition_level() + 1);
+        auto num_bytes = ::arrow::util::SafeLoadAs<int32_t>(buffer);
+        auto bit_reader = std::make_unique<arrow::BitUtil::BitReader>(buffer + 4, num_bytes);
+        num_bytes += 4;
+        buffer += num_bytes;
+        max_size -= num_bytes;
+        def_level_reader = std::make_unique<RleValuesReader>(std::move(bit_reader), bit_width);
+    }
+    else
+    {
+        def_level_reader = std::make_unique<RleValuesReader>(page.num_values());
+    }
+
+    switch (page.encoding())
+    {
+        case parquet::Encoding::PLAIN:
+        {
+            if (reading_low_cardinality)
+            {
+                reading_low_cardinality = false;
+                degradeDictionary();
+            }
+
+            ParquetDataBuffer parquet_buffer = [&]() {
+                if constexpr (!std::is_same_v<ColumnDecimal<DateTime64>, TColumn>)
+                    return ParquetDataBuffer(buffer, max_size);
+
+                auto scale = assert_cast<const DataTypeDateTime64 &>(*base_data_type).getScale();
+                return ParquetDataBuffer(buffer, max_size, scale);
+            }();
+            data_values_reader = createPlainReader<TColumn>(
+                col_descriptor, std::move(def_level_reader), std::move(parquet_buffer));
+            break;
+        }
+        case parquet::Encoding::RLE_DICTIONARY:
+        case parquet::Encoding::PLAIN_DICTIONARY:
+        {
+            if (unlikely(!dictionary))
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "dictionary should be existed");
+            }
+
+            // refer to: DictDecoderImpl::SetData in encoding.cc
+            auto bit_width = *buffer;
+            auto bit_reader = std::make_unique<arrow::BitUtil::BitReader>(++buffer, --max_size);
+            data_values_reader = createDictReader(
+                std::move(def_level_reader), std::make_unique<RleValuesReader>(std::move(bit_reader), bit_width));
+            break;
+        }
+        case parquet::Encoding::BYTE_STREAM_SPLIT:
+        case parquet::Encoding::DELTA_BINARY_PACKED:
+        case parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY:
+        case parquet::Encoding::DELTA_BYTE_ARRAY:
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unsupported encoding: {}", page.encoding());
+
+        default:
+          throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Unknown encoding type: {}", page.encoding());
+    }
+}
+
+template <typename TColumn>
+void ParquetLeafColReader<TColumn>::readPageV2(const parquet::DataPageV2 & /*page*/)
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "read page V2 is not implemented yet");
+}
+
+template <typename TColumn>
+std::unique_ptr<ParquetDataValuesReader> ParquetLeafColReader<TColumn>::createDictReader(
+    std::unique_ptr<RleValuesReader> def_level_reader, std::unique_ptr<RleValuesReader> rle_data_reader)
+{
+    if (reading_low_cardinality && std::same_as<TColumn, ColumnString>)
+    {
+        std::unique_ptr<ParquetDataValuesReader> res;
+        visitColStrIndexType(dictionary->size(), [&]<typename TCol>(TCol *) {
+            res = std::make_unique<ParquetRleLCReader<TCol>>(
+                col_descriptor.max_definition_level(),
+                std::move(def_level_reader),
+                std::move(rle_data_reader));
+        });
+        return res;
+    }
+    return std::make_unique<ParquetRleDictReader<TColumn>>(
+        col_descriptor.max_definition_level(),
+        std::move(def_level_reader),
+        std::move(rle_data_reader),
+        *assert_cast<const TColumn *>(dictionary.get()));
+}
+
+
+template class ParquetLeafColReader<ColumnInt32>;
+template class ParquetLeafColReader<ColumnInt64>;
+template class ParquetLeafColReader<ColumnFloat32>;
+template class ParquetLeafColReader<ColumnFloat64>;
+template class ParquetLeafColReader<ColumnString>;
+template class ParquetLeafColReader<ColumnDecimal<Decimal32>>;
+template class ParquetLeafColReader<ColumnDecimal<Decimal64>>;
+template class ParquetLeafColReader<ColumnDecimal<Decimal128>>;
+template class ParquetLeafColReader<ColumnDecimal<Decimal256>>;
+template class ParquetLeafColReader<ColumnDecimal<DateTime64>>;
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
new file mode 100644
index 00000000000..f730afe40ed
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include <base/logger_useful.h>
+#include <Columns/IColumn.h>
+#include <DataTypes/Serializations/ISerialization.h>
+
+#include "ParquetColumnReader.h"
+#include "ParquetDataValuesReader.h"
+
+namespace parquet
+{
+
+class ColumnDescriptor;
+
+}
+
+
+namespace DB
+{
+
+template <typename TColumn>
+class ParquetLeafColReader : public ParquetColumnReader
+{
+public:
+    ParquetLeafColReader(
+        const parquet::ColumnDescriptor & col_descriptor_,
+        DataTypePtr base_type_,
+        std::unique_ptr<parquet::ColumnChunkMetaData> meta_,
+        std::unique_ptr<parquet::PageReader> reader_);
+
+    ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) override;
+
+private:
+    const parquet::ColumnDescriptor & col_descriptor;
+    DataTypePtr base_data_type;
+    std::unique_ptr<parquet::ColumnChunkMetaData> col_chunk_meta;
+    std::unique_ptr<parquet::PageReader> parquet_page_reader;
+    std::unique_ptr<ParquetDataValuesReader> data_values_reader;
+
+    MutableColumnPtr column;
+    std::unique_ptr<LazyNullMap> null_map;
+
+    ColumnPtr dictionary;
+
+    UInt32 cur_page_values = 0;
+    UInt32 reading_rows_num = 0;
+    bool reading_low_cardinality = false;
+
+    Poco::Logger * log;
+
+    void resetColumn(UInt32 rows_num);
+    void degradeDictionary();
+    ColumnWithTypeAndName releaseColumn(const String & name);
+
+    void readPage();
+    void readPageV1(const parquet::DataPageV1 & page);
+    void readPageV2(const parquet::DataPageV2 & page);
+
+    std::unique_ptr<ParquetDataValuesReader> createDictReader(
+        std::unique_ptr<RleValuesReader> def_level_reader, std::unique_ptr<RleValuesReader> rle_data_reader);
+};
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
new file mode 100644
index 00000000000..a5744b85174
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -0,0 +1,225 @@
+#include "ParquetRecordReader.h"
+
+#include <bit>
+
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
+#include <Common/Stopwatch.h>
+#include <Core/Types.h>
+#include <DataTypes/DataTypeDate32.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/castColumn.h>
+
+#include <arrow/status.h>
+#include <parquet/arrow/reader.h>
+#include <parquet/column_reader.h>
+#include <parquet/properties.h>
+
+#include "ParquetLeafColReader.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int PARQUET_EXCEPTION;
+}
+
+// #define THROW_ARROW_NOT_OK(status)                                     \
+//     do                                                                 \
+//     {                                                                  \
+//         if (::arrow::Status _s = (status); !_s.ok())                   \
+//             throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \
+//     } while (false)
+
+
+#define THROW_PARQUET_EXCEPTION(s)                                      \
+    do                                                                  \
+    {                                                                   \
+        try { (s); }                                                    \
+        catch (const ::parquet::ParquetException & e)                   \
+        {                                                               \
+            throw Exception(e.what(), ErrorCodes::PARQUET_EXCEPTION);   \
+        }                                                               \
+    } while (false)
+
+namespace
+{
+
+Int64 getTotalRows(const parquet::FileMetaData & meta_data)
+{
+    Int64 res = 0;
+    for (int i = 0; i < meta_data.num_row_groups(); i++)
+    {
+        res += meta_data.RowGroup(i)->num_rows();
+    }
+    return res;
+}
+
+std::unique_ptr<ParquetColumnReader> createReader(
+    const parquet::ColumnDescriptor & col_descriptor,
+    DataTypePtr ch_type,
+    std::unique_ptr<parquet::ColumnChunkMetaData> meta,
+    std::unique_ptr<parquet::PageReader> reader)
+{
+    if (col_descriptor.logical_type()->is_date() && parquet::Type::INT32 == col_descriptor.physical_type())
+    {
+        return std::make_unique<ParquetLeafColReader<ColumnInt32>>(
+            col_descriptor, std::make_shared<DataTypeDate32>(), std::move(meta), std::move(reader));
+    }
+    else if (col_descriptor.logical_type()->is_decimal())
+    {
+        switch (col_descriptor.physical_type())
+        {
+            case parquet::Type::INT32:
+            {
+                auto data_type = std::make_shared<DataTypeDecimal32>(
+                    col_descriptor.type_precision(), col_descriptor.type_scale());
+                return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal32>>>(
+                    col_descriptor, data_type, std::move(meta), std::move(reader));
+            }
+            case parquet::Type::INT64:
+            {
+                auto data_type = std::make_shared<DataTypeDecimal64>(
+                    col_descriptor.type_precision(), col_descriptor.type_scale());
+                return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal64>>>(
+                    col_descriptor, data_type, std::move(meta), std::move(reader));
+            }
+            case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+            {
+                if (col_descriptor.type_length() <= static_cast<int>(DecimalUtils::max_precision<Decimal128>))
+                {
+                    auto data_type = std::make_shared<DataTypeDecimal128>(
+                        col_descriptor.type_precision(), col_descriptor.type_scale());
+                    return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal128>>>(
+                        col_descriptor, data_type, std::move(meta), std::move(reader));
+                }
+                else
+                {
+                    auto data_type = std::make_shared<DataTypeDecimal256>(
+                        col_descriptor.type_precision(), col_descriptor.type_scale());
+                    return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal256>>>(
+                        col_descriptor, data_type, std::move(meta), std::move(reader));
+                }
+            }
+            default:
+                throw Exception(
+                    ErrorCodes::PARQUET_EXCEPTION,
+                    "Type not supported for decimal: {}",
+                    col_descriptor.physical_type());
+        }
+    }
+    else
+    {
+        switch (col_descriptor.physical_type())
+        {
+            case parquet::Type::INT32:
+                return std::make_unique<ParquetLeafColReader<ColumnInt32>>(
+                    col_descriptor, std::make_shared<DataTypeInt32>(), std::move(meta), std::move(reader));
+            case parquet::Type::INT64:
+                return std::make_unique<ParquetLeafColReader<ColumnInt64>>(
+                    col_descriptor, std::make_shared<DataTypeInt64>(), std::move(meta), std::move(reader));
+            case parquet::Type::FLOAT:
+                return std::make_unique<ParquetLeafColReader<ColumnFloat32>>(
+                    col_descriptor, std::make_shared<DataTypeFloat32>(), std::move(meta), std::move(reader));
+            case parquet::Type::INT96:
+            {
+                DataTypePtr read_type = ch_type;
+                if (!isDateTime64(ch_type))
+                {
+                    read_type = std::make_shared<DataTypeDateTime64>(ParquetRecordReader::default_datetime64_scale);
+                }
+                return std::make_unique<ParquetLeafColReader<ColumnDecimal<DateTime64>>>(
+                    col_descriptor, read_type, std::move(meta), std::move(reader));
+            }
+            case parquet::Type::DOUBLE:
+                return std::make_unique<ParquetLeafColReader<ColumnFloat64>>(
+                    col_descriptor, std::make_shared<DataTypeFloat64>(), std::move(meta), std::move(reader));
+            case parquet::Type::BYTE_ARRAY:
+                return std::make_unique<ParquetLeafColReader<ColumnString>>(
+                    col_descriptor, std::make_shared<DataTypeString>(), std::move(meta), std::move(reader));
+            default:
+                throw Exception(
+                    ErrorCodes::PARQUET_EXCEPTION, "Type not supported: {}", col_descriptor.physical_type());
+        }
+    }
+}
+
+} // anonymouse namespace
+
+ParquetRecordReader::ParquetRecordReader(
+    Block header_,
+    std::shared_ptr<::arrow::io::RandomAccessFile> file,
+    const parquet::ReaderProperties& properties)
+    : header(std::move(header_))
+{
+    // Only little endian system is supported currently
+    static_assert(std::endian::native == std::endian::little);
+
+    log = &Poco::Logger::get("ParquetRecordReader");
+    THROW_PARQUET_EXCEPTION(file_reader = parquet::ParquetFileReader::Open(std::move(file), properties));
+    left_rows = getTotalRows(*file_reader->metadata());
+
+    parquet_col_indice.reserve(header.columns());
+    column_readers.reserve(header.columns());
+    for (const auto & col_with_name : header)
+    {
+        auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name);
+        if (idx < 0)
+        {
+            throw Exception("can not find column with name: " + col_with_name.name, ErrorCodes::BAD_ARGUMENTS);
+        }
+        parquet_col_indice.push_back(idx);
+    }
+}
+
+Chunk ParquetRecordReader::readChunk(UInt32 num_rows)
+{
+    if (!left_rows)
+    {
+        return Chunk{};
+    }
+    if (!cur_row_group_left_rows)
+    {
+        loadNextRowGroup();
+    }
+
+    Columns columns(header.columns());
+    auto num_rows_read = std::min(static_cast<UInt64>(num_rows), cur_row_group_left_rows);
+    for (size_t i = 0; i < header.columns(); i++)
+    {
+        columns[i] = castColumn(
+            column_readers[i]->readBatch(num_rows_read, header.getByPosition(i).name),
+            header.getByPosition(i).type);
+    }
+    left_rows -= num_rows_read;
+    cur_row_group_left_rows -= num_rows_read;
+
+    return Chunk{std::move(columns), num_rows_read};
+}
+
+void ParquetRecordReader::loadNextRowGroup()
+{
+    Stopwatch watch(CLOCK_MONOTONIC);
+    cur_row_group_reader = file_reader->RowGroup(next_row_group_idx);
+
+    column_readers.clear();
+    for (size_t i = 0; i < parquet_col_indice.size(); i++)
+    {
+        column_readers.emplace_back(createReader(
+            *file_reader->metadata()->schema()->Column(parquet_col_indice[i]),
+            header.getByPosition(i).type,
+            cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]),
+            cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])));
+    }
+    LOG_DEBUG(log, "reading row group {} consumed {} ms", next_row_group_idx, watch.elapsedNanoseconds() / 1e6);
+    ++next_row_group_idx;
+    cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows();
+}
+
+}
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
new file mode 100644
index 00000000000..d77cab6553b
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include <base/logger_useful.h>
+#include <Core/Block.h>
+#include <Processors/Chunk.h>
+#include <Storages/Parquet/ParquetColumnReader.h>
+
+#include <arrow/io/interfaces.h>
+#include <parquet/file_reader.h>
+#include <parquet/properties.h>
+
+#include "ParquetColumnReader.h"
+
+namespace DB
+{
+
+class ParquetRecordReader
+{
+public:
+    ParquetRecordReader(
+        Block header_,
+        std::shared_ptr<::arrow::io::RandomAccessFile> file,
+        const parquet::ReaderProperties& properties);
+
+    Chunk readChunk(UInt32 num_rows);
+    
+    // follow the scale generated by spark
+    static constexpr UInt8 default_datetime64_scale = 9;
+
+private:
+    std::unique_ptr<parquet::ParquetFileReader> file_reader;
+
+    Block header;
+
+    std::shared_ptr<parquet::RowGroupReader> cur_row_group_reader;
+    ParquetColReaders column_readers;
+
+    std::vector<int> parquet_col_indice;
+    UInt64 left_rows;
+    UInt64 cur_row_group_left_rows = 0;
+    int next_row_group_idx = 0;
+
+    Poco::Logger * log;
+
+    void loadNextRowGroup();
+};
+
+}

From 8fb89cec9f28d6a12c2216ccd849fe0ead3ccd33 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 14 Jan 2024 12:01:23 +0800
Subject: [PATCH 109/392] fix build

Change-Id: I57f025b17a04e2c5dded3f18e7f477841287a2c2
---
 base/base/Decimal_fwd.h                       |  4 ++++
 src/Columns/ColumnDecimal.h                   |  8 +++++++
 src/Columns/ColumnVector.h                    |  3 +++
 src/Common/ErrorCodes.cpp                     |  1 +
 .../Impl/Parquet/ParquetColumnReader.h        |  3 ++-
 .../Formats/Impl/Parquet/ParquetDataBuffer.h  | 12 ++++++----
 .../Impl/Parquet/ParquetDataValuesReader.cpp  | 23 ++++++++++---------
 .../Impl/Parquet/ParquetDataValuesReader.h    | 23 +++++++++----------
 .../Impl/Parquet/ParquetLeafColReader.cpp     | 17 +++++++-------
 .../Impl/Parquet/ParquetLeafColReader.h       |  7 +++---
 .../Impl/Parquet/ParquetRecordReader.cpp      | 19 ++++++---------
 .../Impl/Parquet/ParquetRecordReader.h        |  7 +++---
 12 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h
index beb228cea3c..a11e13a479b 100644
--- a/base/base/Decimal_fwd.h
+++ b/base/base/Decimal_fwd.h
@@ -44,6 +44,10 @@ concept is_over_big_int =
     || std::is_same_v<T, UInt256>
     || std::is_same_v<T, Decimal128>
     || std::is_same_v<T, Decimal256>;
+
+template <class T>
+concept is_over_big_decimal = is_decimal<T> && is_over_big_int<typename T::NativeType>;
+
 }
 
 template <> struct is_signed<DB::Decimal32> { static constexpr bool value = true; };
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index e0ea26744dc..e606aaaff0f 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -141,6 +141,14 @@ protected:
     UInt32 scale;
 };
 
+template <class TCol>
+concept is_col_over_big_decimal = std::is_same_v<TCol, ColumnDecimal<typename TCol::ValueType>>
+    && is_decimal<typename TCol::ValueType> && is_over_big_int<typename TCol::NativeT>;
+
+template <class TCol>
+concept is_col_int_decimal = std::is_same_v<TCol, ColumnDecimal<typename TCol::ValueType>>
+    && is_decimal<typename TCol::ValueType> && std::is_integral_v<typename TCol::NativeT>;
+
 template <class> class ColumnVector;
 template <class T> struct ColumnVectorOrDecimalT { using Col = ColumnVector<T>; };
 template <is_decimal T> struct ColumnVectorOrDecimalT<T> { using Col = ColumnDecimal<T>; };
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 39ee1d931bd..91bceaa4534 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -441,6 +441,9 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
     return res;
 }
 
+template <class TCol>
+concept is_col_vector = std::is_same_v<TCol, ColumnVector<typename TCol::ValueType>>;
+
 /// Prevent implicit template instantiation of ColumnVector for common types
 
 extern template class ColumnVector<UInt8>;
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 44c051401ef..106f443d532 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -600,6 +600,7 @@
     M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \
     M(720, USER_EXPIRED) \
     M(721, DEPRECATED_FUNCTION) \
+    M(722, PARQUET_EXCEPTION) \
     \
     M(900, DISTRIBUTED_CACHE_ERROR) \
     M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
index cfd9d3ba5bd..2c78949e8e1 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetColumnReader.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Columns/IColumn.h>
+#include <Core/ColumnWithTypeAndName.h>
 
 namespace parquet
 {
@@ -18,7 +19,7 @@ namespace DB
 class ParquetColumnReader
 {
 public:
-    virtual ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) = 0;
+    virtual ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) = 0;
 
     virtual ~ParquetColumnReader() = default;
 };
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
index 1f83c74f9ad..be9710e1726 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -142,15 +142,19 @@ private:
 class LazyNullMap
 {
 public:
-    LazyNullMap(UInt32 size_) : size(size_), col_nullable(nullptr) {}
+    LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {}
 
-    void setNull(UInt32 cursor)
+    template <typename T>
+    requires std::is_integral_v<T>
+    void setNull(T cursor)
     {
         initialize();
         null_map[cursor] = 1;
     }
 
-    void setNull(UInt32 cursor, UInt32 count)
+    template <typename T>
+    requires std::is_integral_v<T>
+    void setNull(T cursor, UInt32 count)
     {
         initialize();
         memset(null_map + cursor, 1, count);
@@ -159,7 +163,7 @@ public:
     ColumnPtr getNullableCol() { return col_nullable; }
 
 private:
-    UInt32 size;
+    UInt64 size;
     UInt8 * null_map;
     ColumnPtr col_nullable;
 
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 659a7a11969..3afc66dcb36 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -189,7 +189,7 @@ void RleValuesReader::setValueBySteps(
     res_values += *(step_iterator++);
 
     visitValues(
-        col_data_steps.size() - 1,
+        static_cast<UInt32>(col_data_steps.size() - 1),
         /* individual_visitor */ [&](Int32 val)
         {
             *res_values = val_getter(val);
@@ -394,14 +394,14 @@ void ParquetRleLCReader<TColumnVector>::readBatch(
         cursor,
         num_values,
         max_def_level,
-        /* individual_null_visitor */ [&](UInt32 nest_cursor) {
+        /* individual_null_visitor */ [&](size_t nest_cursor) {
             column_data[nest_cursor] = 0;
             has_null = true;
         },
-        /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps) {
             rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
         },
-        /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) {
             if (is_valid)
             {
                 rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
@@ -461,10 +461,11 @@ void ParquetRleDictReader<ColumnString>::readBatch(
         cursor,
         num_values,
         max_def_level,
-        /* individual_null_visitor */ [&](UInt32) {},
-        /* stepped_valid_visitor */ [&](UInt32, const std::vector<UInt8> & valid_index_steps) {
+        /* individual_null_visitor */ [&](size_t) {},
+        /* stepped_valid_visitor */ [&](size_t, const std::vector<UInt8> & valid_index_steps) {
             value_cache.resize(valid_index_steps.size());
-            rle_data_reader->setValues(value_cache.data() + 1, valid_index_steps.size() - 1, val_getter);
+            rle_data_reader->setValues(
+                value_cache.data() + 1, static_cast<UInt32>(valid_index_steps.size() - 1), val_getter);
 
             append_nulls(valid_index_steps[0]);
             for (size_t i = 1; i < valid_index_steps.size(); i++)
@@ -473,7 +474,7 @@ void ParquetRleDictReader<ColumnString>::readBatch(
                 append_nulls(valid_index_steps[i] - 1);
             }
         },
-        /* repeated_visitor */ [&](bool is_valid, UInt32, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) {
             if (is_valid)
             {
                 value_cache.resize(count);
@@ -504,13 +505,13 @@ void ParquetRleDictReader<TColumnVector>::readBatch(
         cursor,
         num_values,
         max_def_level,
-        /* individual_null_visitor */ [&](UInt32 nest_cursor) {
+        /* individual_null_visitor */ [&](size_t nest_cursor) {
             null_map.setNull(nest_cursor);
         },
-        /* stepped_valid_visitor */ [&](UInt32 nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps) {
             rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
         },
-        /* repeated_visitor */ [&](bool is_valid, UInt32 nest_cursor, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) {
             if (is_valid)
             {
                 rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 2c95f495339..66a1f4877e4 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -3,7 +3,6 @@
 #include <functional>
 #include <concepts>
 
-#include <base/logger_useful.h>
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnVector.h>
@@ -25,7 +24,7 @@ namespace ErrorCodes
 class RleValuesReader
 {
 public:
-    RleValuesReader(std::unique_ptr<arrow::BitUtil::BitReader> bit_reader_, Int32 bit_width_)
+    RleValuesReader(std::unique_ptr<arrow::bit_util::BitReader> bit_reader_, Int32 bit_width_)
         : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {}
 
     /**
@@ -45,7 +44,7 @@ public:
      * @brief Visit num_values elements.
      * For RLE encoding, for same group, the value is same, so they can be visited repeatedly.
      * For BitPacked encoding, the values may be different with each other, so they must be visited individual.
-     * 
+     *
      * @tparam IndividualVisitor A callback with signature: void(Int32 val)
      * @tparam RepeatedVisitor A callback with signature: void(UInt32 count, Int32 val)
      */
@@ -55,10 +54,10 @@ public:
     /**
      * @brief Visit num_values elements by parsed nullability.
      * If the parsed value is same as max_def_level, then it is processed as null value.
-     * 
+     *
      * @tparam IndividualVisitor A callback with signature: void(size_t cursor)
      * @tparam RepeatedVisitor A callback with signature: void(size_t cursor, UInt32 count)
-     * 
+     *
      * Because the null map is processed, so only the callbacks only need to process the valid data.
      */
     template <typename IndividualVisitor, typename RepeatedVisitor>
@@ -74,18 +73,18 @@ public:
      * @brief Visit num_values elements by parsed nullability.
      * It may be inefficient to process the valid data individually like in visitNullableValues,
      * so a valid_index_steps index array is generated first, in order to process valid data continuously.
-     * 
+     *
      * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value
      * @tparam SteppedValidVisitor  A callback with signature:
      *  void(size_t cursor, const std::vector<UInt8> & valid_index_steps)
      *  for n valid elements with null value interleaved in a BitPacked group,
      *  i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element.
-     * 
+     *
      *  take following BitPacked group with 2 valid elements for example:
      *      null valid null null valid null
      *  then the valid_index_steps has values [1, 3, 2].
      *  Please note that the the sum of valid_index_steps is same as elements number in this group.
-     * 
+     *
      * @tparam RepeatedVisitor  A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count)
      */
     template <typename IndividualNullVisitor, typename SteppedValidVisitor, typename RepeatedVisitor>
@@ -99,7 +98,7 @@ public:
 
     /**
      * @brief Set the Values to column_data directly
-     * 
+     *
      * @tparam TValue The type of column data.
      * @tparam ValueGetter A callback with signature: TValue(Int32 val)
      */
@@ -118,7 +117,7 @@ public:
         ValueGetter && val_getter);
 
 private:
-    std::unique_ptr<arrow::BitUtil::BitReader> bit_reader;
+    std::unique_ptr<arrow::bit_util::BitReader> bit_reader;
 
     std::vector<Int32> cur_packed_bit_values;
     std::vector<UInt8> valid_index_steps;
@@ -203,7 +202,7 @@ private:
 
 /**
  * Read data according to the format of ColumnLowCardinality format.
- * 
+ *
  * Only index and null column are processed in this class.
  * And all null value is mapped to first index in dictionary,
  * so the result index valued is added by one.
@@ -232,7 +231,7 @@ private:
 /**
  * The definition level is RLE or BitPacked encoded,
  * and the index of dictionary is also RLE or BitPacked encoded.
- * 
+ *
  * while the result is not parsed as a low cardinality column,
  * instead, a normal column is generated.
  */
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
index 00dee9074fe..2e3d329bcd2 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -7,6 +7,7 @@
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnUnique.h>
 #include <Columns/ColumnsNumber.h>
+#include <Common/logger_useful.h>
 #include <Core/ColumnWithTypeAndName.h>
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypeLowCardinality.h>
@@ -58,7 +59,7 @@ void visitColStrIndexType(size_t data_size, TypeVisitor && visitor)
     }
 }
 
-void reserveColumnStrRows(MutableColumnPtr & col, UInt32 rows_num)
+void reserveColumnStrRows(MutableColumnPtr & col, UInt64 rows_num)
 {
     col->reserve(rows_num);
 
@@ -212,7 +213,7 @@ ParquetLeafColReader<TColumn>::ParquetLeafColReader(
 }
 
 template <typename TColumn>
-ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt32 rows_num, const String & name)
+ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt64 rows_num, const String & name)
 {
     reading_rows_num = rows_num;
     auto readPageIfEmpty = [&]() {
@@ -228,7 +229,7 @@ ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt32 rows_num,
         // if dictionary page encountered, another page should be read
         readPageIfEmpty();
 
-        auto read_values = std::min(rows_num, cur_page_values);
+        auto read_values = static_cast<UInt32>(std::min(rows_num, static_cast<UInt64>(cur_page_values)));
         data_values_reader->readBatch(column, *null_map, read_values);
 
         cur_page_values -= read_values;
@@ -239,7 +240,7 @@ ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt32 rows_num,
 }
 
 template <>
-void ParquetLeafColReader<ColumnString>::resetColumn(UInt32 rows_num)
+void ParquetLeafColReader<ColumnString>::resetColumn(UInt64 rows_num)
 {
     if (reading_low_cardinality)
     {
@@ -261,7 +262,7 @@ void ParquetLeafColReader<ColumnString>::resetColumn(UInt32 rows_num)
 }
 
 template <typename TColumn>
-void ParquetLeafColReader<TColumn>::resetColumn(UInt32 rows_num)
+void ParquetLeafColReader<TColumn>::resetColumn(UInt64 rows_num)
 {
     assert(!reading_low_cardinality);
 
@@ -403,9 +404,9 @@ void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
     assert(col_descriptor.max_definition_level() >= 0);
     std::unique_ptr<RleValuesReader> def_level_reader;
     if (col_descriptor.max_definition_level() > 0) {
-        auto bit_width = arrow::BitUtil::Log2(col_descriptor.max_definition_level() + 1);
+        auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1);
         auto num_bytes = ::arrow::util::SafeLoadAs<int32_t>(buffer);
-        auto bit_reader = std::make_unique<arrow::BitUtil::BitReader>(buffer + 4, num_bytes);
+        auto bit_reader = std::make_unique<arrow::bit_util::BitReader>(buffer + 4, num_bytes);
         num_bytes += 4;
         buffer += num_bytes;
         max_size -= num_bytes;
@@ -447,7 +448,7 @@ void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
 
             // refer to: DictDecoderImpl::SetData in encoding.cc
             auto bit_width = *buffer;
-            auto bit_reader = std::make_unique<arrow::BitUtil::BitReader>(++buffer, --max_size);
+            auto bit_reader = std::make_unique<arrow::bit_util::BitReader>(++buffer, --max_size);
             data_values_reader = createDictReader(
                 std::move(def_level_reader), std::make_unique<RleValuesReader>(std::move(bit_reader), bit_width));
             break;
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
index f730afe40ed..c5b14132f17 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <base/logger_useful.h>
 #include <Columns/IColumn.h>
 #include <DataTypes/Serializations/ISerialization.h>
 
@@ -28,7 +27,7 @@ public:
         std::unique_ptr<parquet::ColumnChunkMetaData> meta_,
         std::unique_ptr<parquet::PageReader> reader_);
 
-    ColumnWithTypeAndName readBatch(UInt32 rows_num, const String & name) override;
+    ColumnWithTypeAndName readBatch(UInt64 rows_num, const String & name) override;
 
 private:
     const parquet::ColumnDescriptor & col_descriptor;
@@ -42,13 +41,13 @@ private:
 
     ColumnPtr dictionary;
 
+    UInt64 reading_rows_num = 0;
     UInt32 cur_page_values = 0;
-    UInt32 reading_rows_num = 0;
     bool reading_low_cardinality = false;
 
     Poco::Logger * log;
 
-    void resetColumn(UInt32 rows_num);
+    void resetColumn(UInt64 rows_num);
     void degradeDictionary();
     ColumnWithTypeAndName releaseColumn(const String & name);
 
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index a5744b85174..9ff4a7a16aa 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -5,6 +5,7 @@
 #include <Columns/ColumnDecimal.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
+#include <Common/logger_useful.h>
 #include <Common/Stopwatch.h>
 #include <Core/Types.h>
 #include <DataTypes/DataTypeDate32.h>
@@ -30,21 +31,14 @@ namespace ErrorCodes
     extern const int PARQUET_EXCEPTION;
 }
 
-// #define THROW_ARROW_NOT_OK(status)                                     \
-//     do                                                                 \
-//     {                                                                  \
-//         if (::arrow::Status _s = (status); !_s.ok())                   \
-//             throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \
-//     } while (false)
-
-
 #define THROW_PARQUET_EXCEPTION(s)                                      \
     do                                                                  \
     {                                                                   \
         try { (s); }                                                    \
         catch (const ::parquet::ParquetException & e)                   \
         {                                                               \
-            throw Exception(e.what(), ErrorCodes::PARQUET_EXCEPTION);   \
+            auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \
+            throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION);   \
         }                                                               \
     } while (false)
 
@@ -172,13 +166,14 @@ ParquetRecordReader::ParquetRecordReader(
         auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name);
         if (idx < 0)
         {
-            throw Exception("can not find column with name: " + col_with_name.name, ErrorCodes::BAD_ARGUMENTS);
+            auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name);
+            throw Exception(std::move(msg), ErrorCodes::BAD_ARGUMENTS);
         }
         parquet_col_indice.push_back(idx);
     }
 }
 
-Chunk ParquetRecordReader::readChunk(UInt32 num_rows)
+Chunk ParquetRecordReader::readChunk(size_t num_rows)
 {
     if (!left_rows)
     {
@@ -190,7 +185,7 @@ Chunk ParquetRecordReader::readChunk(UInt32 num_rows)
     }
 
     Columns columns(header.columns());
-    auto num_rows_read = std::min(static_cast<UInt64>(num_rows), cur_row_group_left_rows);
+    auto num_rows_read = std::min(num_rows, cur_row_group_left_rows);
     for (size_t i = 0; i < header.columns(); i++)
     {
         columns[i] = castColumn(
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
index d77cab6553b..69cdaa5ccb7 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
@@ -1,9 +1,8 @@
 #pragma once
 
-#include <base/logger_useful.h>
 #include <Core/Block.h>
 #include <Processors/Chunk.h>
-#include <Storages/Parquet/ParquetColumnReader.h>
+#include <Processors/Formats/Impl/Parquet/ParquetColumnReader.h>
 
 #include <arrow/io/interfaces.h>
 #include <parquet/file_reader.h>
@@ -22,8 +21,8 @@ public:
         std::shared_ptr<::arrow::io::RandomAccessFile> file,
         const parquet::ReaderProperties& properties);
 
-    Chunk readChunk(UInt32 num_rows);
-    
+    Chunk readChunk(size_t num_rows);
+
     // follow the scale generated by spark
     static constexpr UInt8 default_datetime64_scale = 9;
 

From dbdff6c038834f973d803f44ef096b6015d09e3b Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 28 Jan 2024 09:56:36 +0800
Subject: [PATCH 110/392] support reading simple types by native parquet reader

Change-Id: I38b8368b022263d9a71cb3f3e9fdad5d6ca26753
---
 src/Core/Settings.h                           |   1 +
 src/Formats/FormatFactory.cpp                 |   1 +
 src/Formats/FormatSettings.h                  |   1 +
 .../Formats/Impl/Parquet/ParquetDataBuffer.h  |   2 +-
 .../Impl/Parquet/ParquetLeafColReader.cpp     |  12 +-
 .../Impl/Parquet/ParquetRecordReader.cpp      |  73 +++++++----
 .../Impl/Parquet/ParquetRecordReader.h        |  14 ++-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  | 118 ++++++++++++------
 .../Formats/Impl/ParquetBlockInputFormat.h    |   4 +
 9 files changed, 153 insertions(+), 73 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4a0de354a03..2465164e912 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -1013,6 +1013,7 @@ class IColumn;
     M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
     M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \
     M(Bool, input_format_parquet_filter_push_down, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.", 0) \
+    M(Bool, input_format_parquet_use_native_reader, false, "When reading Parquet files, to use native reader instead of arrow reader.", 0) \
     M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
     M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \
     M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 43ccee173f0..557b49d2a0a 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -154,6 +154,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
     format_settings.parquet.preserve_order = settings.input_format_parquet_preserve_order;
     format_settings.parquet.filter_push_down = settings.input_format_parquet_filter_push_down;
+    format_settings.parquet.use_native_reader = settings.input_format_parquet_use_native_reader;
     format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
     format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
     format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index d5fedf99adb..0ac4ea5e0fb 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -258,6 +258,7 @@ struct FormatSettings
         bool skip_columns_with_unsupported_types_in_schema_inference = false;
         bool case_insensitive_column_matching = false;
         bool filter_push_down = true;
+        bool use_native_reader = false;
         std::unordered_set<int> skip_row_groups = {};
         bool output_string_as_string = false;
         bool output_fixed_string_as_fixed_byte_array = true;
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
index be9710e1726..d4956f83092 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -34,7 +34,7 @@ public:
     void ALWAYS_INLINE readValue(TValue & dst)
     {
         checkAvaible(sizeof(TValue));
-        dst = *reinterpret_cast<const TValue *>(data);
+        dst = *(reinterpret_cast<const TValue *>(data));
         consume(sizeof(TValue));
     }
 
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
index 2e3d329bcd2..e2677d7cae3 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -274,7 +274,14 @@ void ParquetLeafColReader<TColumn>::resetColumn(UInt64 rows_num)
 template <typename TColumn>
 void ParquetLeafColReader<TColumn>::degradeDictionary()
 {
+    // if last batch read all dictionary indices, then degrade is not needed this time
+    if (!column)
+    {
+        dictionary = nullptr;
+        return;
+    }
     assert(dictionary && column->size());
+
     null_map = std::make_unique<LazyNullMap>(reading_rows_num);
     auto col_existing = std::move(column);
     column = ColumnString::create();
@@ -304,7 +311,8 @@ void ParquetLeafColReader<TColumn>::degradeDictionary()
             col_dest.getOffsets()[i] = col_dest.getChars().size();
         }
     });
-    LOG_INFO(log, "degraded dictionary to normal column");
+    dictionary = nullptr;
+    LOG_DEBUG(log, "degraded dictionary to normal column");
 }
 
 template <typename TColumn>
@@ -364,7 +372,7 @@ void ParquetLeafColReader<TColumn>::readPage()
                 throw new Exception(
                     ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding());
             }
-            LOG_INFO(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name());
+            LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name());
 
             dictionary = readDictPage<TColumn>(dict_page, col_descriptor, base_data_type);
             if (std::is_same_v<TColumn, ColumnString>)
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 9ff4a7a16aa..42f131ff794 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -31,31 +31,29 @@ namespace ErrorCodes
     extern const int PARQUET_EXCEPTION;
 }
 
-#define THROW_PARQUET_EXCEPTION(s)                                      \
-    do                                                                  \
-    {                                                                   \
-        try { (s); }                                                    \
-        catch (const ::parquet::ParquetException & e)                   \
-        {                                                               \
+#define THROW_PARQUET_EXCEPTION(s)                                            \
+    do                                                                        \
+    {                                                                         \
+        try { (s); }                                                          \
+        catch (const ::parquet::ParquetException & e)                         \
+        {                                                                     \
             auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \
             throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION);   \
-        }                                                               \
+        }                                                                     \
     } while (false)
 
 namespace
 {
 
-Int64 getTotalRows(const parquet::FileMetaData & meta_data)
+std::unique_ptr<parquet::ParquetFileReader> createFileReader(
+    std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file)
 {
-    Int64 res = 0;
-    for (int i = 0; i < meta_data.num_row_groups(); i++)
-    {
-        res += meta_data.RowGroup(i)->num_rows();
-    }
+    std::unique_ptr<parquet::ParquetFileReader> res;
+    THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open(std::move(arrow_file)));
     return res;
 }
 
-std::unique_ptr<ParquetColumnReader> createReader(
+std::unique_ptr<ParquetColumnReader> createColReader(
     const parquet::ColumnDescriptor & col_descriptor,
     DataTypePtr ch_type,
     std::unique_ptr<parquet::ColumnChunkMetaData> meta,
@@ -86,7 +84,7 @@ std::unique_ptr<ParquetColumnReader> createReader(
             }
             case parquet::Type::FIXED_LEN_BYTE_ARRAY:
             {
-                if (col_descriptor.type_length() <= static_cast<int>(DecimalUtils::max_precision<Decimal128>))
+                if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal128)))
                 {
                     auto data_type = std::make_shared<DataTypeDecimal128>(
                         col_descriptor.type_precision(), col_descriptor.type_scale());
@@ -148,16 +146,21 @@ std::unique_ptr<ParquetColumnReader> createReader(
 
 ParquetRecordReader::ParquetRecordReader(
     Block header_,
-    std::shared_ptr<::arrow::io::RandomAccessFile> file,
-    const parquet::ReaderProperties& properties)
-    : header(std::move(header_))
+    parquet::ArrowReaderProperties reader_properties_,
+    std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file,
+    const FormatSettings & format_settings,
+    std::vector<int> row_groups_indices_)
+    : file_reader(createFileReader(std::move(arrow_file)))
+    , reader_properties(reader_properties_)
+    , header(std::move(header_))
+    , max_block_size(format_settings.parquet.max_block_size)
+    , row_groups_indices(std::move(row_groups_indices_))
+    , left_rows(getTotalRows(*file_reader->metadata()))
 {
     // Only little endian system is supported currently
     static_assert(std::endian::native == std::endian::little);
 
     log = &Poco::Logger::get("ParquetRecordReader");
-    THROW_PARQUET_EXCEPTION(file_reader = parquet::ParquetFileReader::Open(std::move(file), properties));
-    left_rows = getTotalRows(*file_reader->metadata());
 
     parquet_col_indice.reserve(header.columns());
     column_readers.reserve(header.columns());
@@ -167,13 +170,18 @@ ParquetRecordReader::ParquetRecordReader(
         if (idx < 0)
         {
             auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name);
-            throw Exception(std::move(msg), ErrorCodes::BAD_ARGUMENTS);
+            throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION);
         }
         parquet_col_indice.push_back(idx);
     }
+    if (reader_properties.pre_buffer())
+    {
+        THROW_PARQUET_EXCEPTION(file_reader->PreBuffer(
+            row_groups_indices, parquet_col_indice, reader_properties.io_context(), reader_properties.cache_options()));
+    }
 }
 
-Chunk ParquetRecordReader::readChunk(size_t num_rows)
+Chunk ParquetRecordReader::readChunk()
 {
     if (!left_rows)
     {
@@ -185,7 +193,7 @@ Chunk ParquetRecordReader::readChunk(size_t num_rows)
     }
 
     Columns columns(header.columns());
-    auto num_rows_read = std::min(num_rows, cur_row_group_left_rows);
+    auto num_rows_read = std::min(max_block_size, cur_row_group_left_rows);
     for (size_t i = 0; i < header.columns(); i++)
     {
         columns[i] = castColumn(
@@ -201,20 +209,33 @@ Chunk ParquetRecordReader::readChunk(size_t num_rows)
 void ParquetRecordReader::loadNextRowGroup()
 {
     Stopwatch watch(CLOCK_MONOTONIC);
-    cur_row_group_reader = file_reader->RowGroup(next_row_group_idx);
+    cur_row_group_reader = file_reader->RowGroup(row_groups_indices[next_row_group_idx]);
 
     column_readers.clear();
     for (size_t i = 0; i < parquet_col_indice.size(); i++)
     {
-        column_readers.emplace_back(createReader(
+        column_readers.emplace_back(createColReader(
             *file_reader->metadata()->schema()->Column(parquet_col_indice[i]),
             header.getByPosition(i).type,
             cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]),
             cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])));
     }
-    LOG_DEBUG(log, "reading row group {} consumed {} ms", next_row_group_idx, watch.elapsedNanoseconds() / 1e6);
+
+    auto duration = watch.elapsedNanoseconds() / 1e6;
+    LOG_DEBUG(log, "reading row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration);
+
     ++next_row_group_idx;
     cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows();
 }
 
+Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data)
+{
+    Int64 res = 0;
+    for (size_t i = 0; i < row_groups_indices.size(); i++)
+    {
+        res += meta_data.RowGroup(row_groups_indices[i])->num_rows();
+    }
+    return res;
+}
+
 }
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
index 69cdaa5ccb7..4789be59ec8 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Core/Block.h>
+#include <Formats/FormatSettings.h>
 #include <Processors/Chunk.h>
 #include <Processors/Formats/Impl/Parquet/ParquetColumnReader.h>
 
@@ -18,23 +19,29 @@ class ParquetRecordReader
 public:
     ParquetRecordReader(
         Block header_,
-        std::shared_ptr<::arrow::io::RandomAccessFile> file,
-        const parquet::ReaderProperties& properties);
+        parquet::ArrowReaderProperties reader_properties_,
+        std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file,
+        const FormatSettings & format_settings,
+        std::vector<int> row_groups_indices_);
 
-    Chunk readChunk(size_t num_rows);
+    Chunk readChunk();
 
     // follow the scale generated by spark
     static constexpr UInt8 default_datetime64_scale = 9;
 
 private:
     std::unique_ptr<parquet::ParquetFileReader> file_reader;
+    parquet::ArrowReaderProperties reader_properties;
 
     Block header;
 
     std::shared_ptr<parquet::RowGroupReader> cur_row_group_reader;
     ParquetColReaders column_readers;
 
+    UInt64 max_block_size;
+
     std::vector<int> parquet_col_indice;
+    std::vector<int> row_groups_indices;
     UInt64 left_rows;
     UInt64 cur_row_group_left_rows = 0;
     int next_row_group_idx = 0;
@@ -42,6 +49,7 @@ private:
     Poco::Logger * log;
 
     void loadNextRowGroup();
+    Int64 getTotalRows(const parquet::FileMetaData & meta_data);
 };
 
 }
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index d41cb3447de..e35d53dc4f4 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -23,6 +23,7 @@
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <Common/FieldVisitorsAccurateComparison.h>
+#include <Processors/Formats/Impl/Parquet/ParquetRecordReader.h>
 
 namespace CurrentMetrics
 {
@@ -392,6 +393,8 @@ void ParquetBlockInputFormat::initializeIfNeeded()
 {
     if (std::exchange(is_initialized, true))
         return;
+    if (format_settings.parquet.use_native_reader)
+        LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "using native parquet reader");
 
     // Create arrow file adapter.
     // TODO: Make the adapter do prefetching on IO threads, based on the full set of ranges that
@@ -479,23 +482,35 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat
     if (metadata->writer_version().VersionLt(parquet::ApplicationVersion::PARQUET_816_FIXED_VERSION()))
         properties.set_pre_buffer(false);
 
-    parquet::arrow::FileReaderBuilder builder;
-    THROW_ARROW_NOT_OK(
-        builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata));
-    builder.properties(properties);
-    // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators.
-    THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader));
+    if (format_settings.parquet.use_native_reader)
+    {
+        row_group_batch.native_record_reader = std::make_shared<ParquetRecordReader>(
+            getPort().getHeader(),
+            std::move(properties),
+            arrow_file,
+            format_settings,
+            row_group_batch.row_groups_idxs);
+    }
+    else
+    {
+        parquet::arrow::FileReaderBuilder builder;
+        THROW_ARROW_NOT_OK(
+            builder.Open(arrow_file, /* not to be confused with ArrowReaderProperties */ parquet::default_reader_properties(), metadata));
+        builder.properties(properties);
+        // TODO: Pass custom memory_pool() to enable memory accounting with non-jemalloc allocators.
+        THROW_ARROW_NOT_OK(builder.Build(&row_group_batch.file_reader));
 
-    THROW_ARROW_NOT_OK(
-        row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader));
+        THROW_ARROW_NOT_OK(
+            row_group_batch.file_reader->GetRecordBatchReader(row_group_batch.row_groups_idxs, column_indices, &row_group_batch.record_batch_reader));
 
-    row_group_batch.arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
-        getPort().getHeader(),
-        "Parquet",
-        format_settings.parquet.allow_missing_columns,
-        format_settings.null_as_default,
-        format_settings.date_time_overflow_behavior,
-        format_settings.parquet.case_insensitive_column_matching);
+        row_group_batch.arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
+            getPort().getHeader(),
+            "Parquet",
+            format_settings.parquet.allow_missing_columns,
+            format_settings.null_as_default,
+            format_settings.date_time_overflow_behavior,
+            format_settings.parquet.case_insensitive_column_matching);
+    }
 }
 
 void ParquetBlockInputFormat::scheduleRowGroup(size_t row_group_batch_idx)
@@ -561,6 +576,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un
     lock.unlock();
 
     auto end_of_row_group = [&] {
+        row_group_batch.native_record_reader.reset();
         row_group_batch.arrow_column_to_ch_column.reset();
         row_group_batch.record_batch_reader.reset();
         row_group_batch.file_reader.reset();
@@ -573,35 +589,55 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un
         // reached. Wake up read() instead.
         condvar.notify_all();
     };
-
-    if (!row_group_batch.record_batch_reader)
-        initializeRowGroupBatchReader(row_group_batch_idx);
-
-    auto batch = row_group_batch.record_batch_reader->Next();
-    if (!batch.ok())
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString());
-
-    if (!*batch)
+    auto get_pending_chunk = [&](size_t num_rows, Chunk chunk = {})
     {
-        end_of_row_group();
-        return;
-    }
-
-    auto tmp_table = arrow::Table::FromRecordBatches({*batch});
-
-    size_t approx_chunk_original_size = static_cast<size_t>(std::ceil(static_cast<double>(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * (*tmp_table)->num_rows()));
-    PendingChunk res = {
-            .chunk = {},
-            .block_missing_values = {},
-            .chunk_idx = row_group_batch.next_chunk_idx,
-            .row_group_batch_idx = row_group_batch_idx,
-            .approx_original_chunk_size = approx_chunk_original_size
+        size_t approx_chunk_original_size = static_cast<size_t>(std::ceil(
+                static_cast<double>(row_group_batch.total_bytes_compressed) / row_group_batch.total_rows * num_rows));
+        return PendingChunk{
+                .chunk = std::move(chunk),
+                .block_missing_values = {},
+                .chunk_idx = row_group_batch.next_chunk_idx,
+                .row_group_batch_idx = row_group_batch_idx,
+                .approx_original_chunk_size = approx_chunk_original_size
+        };
     };
 
-    /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
-    /// Otherwise fill the missing columns with zero values of its type.
-    BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr;
-    res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr);
+    if (!row_group_batch.record_batch_reader && !row_group_batch.native_record_reader)
+        initializeRowGroupBatchReader(row_group_batch_idx);
+
+    PendingChunk res;
+    if (format_settings.parquet.use_native_reader)
+    {
+        auto chunk = row_group_batch.native_record_reader->readChunk();
+        if (!chunk)
+        {
+            end_of_row_group();
+            return;
+        }
+
+        auto num_rows = chunk.getNumRows();
+        res = get_pending_chunk(num_rows, std::move(chunk));
+    }
+    else
+    {
+        auto batch = row_group_batch.record_batch_reader->Next();
+        if (!batch.ok())
+            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", batch.status().ToString());
+
+        if (!*batch)
+        {
+            end_of_row_group();
+            return;
+        }
+
+        auto tmp_table = arrow::Table::FromRecordBatches({*batch});
+        res = get_pending_chunk((*tmp_table)->num_rows());
+
+        /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
+        /// Otherwise fill the missing columns with zero values of its type.
+        BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr;
+        res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr);
+    }
 
     lock.lock();
 
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index b5b884b5efa..a737c695fd6 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -16,6 +16,7 @@ namespace DB
 {
 
 class ArrowColumnToCHColumn;
+class ParquetRecordReader;
 
 // Parquet files contain a metadata block with the following information:
 //  * list of columns,
@@ -210,6 +211,9 @@ private:
         std::vector<int> row_groups_idxs;
 
         // These are only used by the decoding thread, so don't require locking the mutex.
+        // If use_native_reader, only native_record_reader is used;
+        // otherwise, only native_record_reader is not used.
+        std::shared_ptr<ParquetRecordReader> native_record_reader;
         std::unique_ptr<parquet::arrow::FileReader> file_reader;
         std::shared_ptr<arrow::RecordBatchReader> record_batch_reader;
         std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;

From 8172f6cec023df144ef20a7cfd49b43548cefd41 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Wed, 21 Feb 2024 00:17:30 +0800
Subject: [PATCH 111/392] log duration while reading parquet

Change-Id: If79741b7456667a8dde3e355d9dc684c2dd84f4f
---
 .../Formats/Impl/ParquetBlockInputFormat.cpp          | 11 +++++++++++
 src/Processors/Formats/Impl/ParquetBlockInputFormat.h |  4 ++++
 2 files changed, 15 insertions(+)

diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index e35d53dc4f4..7faa7300416 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -673,6 +673,15 @@ void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional<size_t> row
     }
 }
 
+Chunk ParquetBlockInputFormat::generate()
+{
+    auto res = IInputFormat::generate();
+    if (!res)
+        LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "{} ms consumed by reading parquet file", consumed_nanosecs / 1e6);
+
+    return res;
+}
+
 Chunk ParquetBlockInputFormat::read()
 {
     initializeIfNeeded();
@@ -683,6 +692,8 @@ Chunk ParquetBlockInputFormat::read()
     if (need_only_count)
         return getChunkForCount(row_group_batches[row_group_batches_completed++].total_rows);
 
+    Stopwatch watch(CLOCK_MONOTONIC);
+    SCOPE_EXIT({ consumed_nanosecs += watch.elapsedNanoseconds(); });
     std::unique_lock lock(mutex);
 
     while (true)
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index a737c695fd6..a94637da942 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -65,6 +65,8 @@ public:
 
     size_t getApproxBytesReadForChunk() const override { return previous_approx_bytes_read_for_chunk; }
 
+    Chunk generate() override;
+
 private:
     Chunk read() override;
 
@@ -286,6 +288,8 @@ private:
     std::exception_ptr background_exception = nullptr;
     std::atomic<int> is_stopped{0};
     bool is_initialized = false;
+
+    UInt64 consumed_nanosecs = 0;
 };
 
 class ParquetSchemaReader : public ISchemaReader

From e0179150c1671f75f9480ebca17c4ea2595ae811 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Fri, 23 Feb 2024 01:09:02 +0800
Subject: [PATCH 112/392] Revert "log duration while reading parquet"

This reverts commit 5df94b7f8955b541ae37e4bbdc13a1fec9ddbbd9.
---
 .../Formats/Impl/ParquetBlockInputFormat.cpp          | 11 -----------
 src/Processors/Formats/Impl/ParquetBlockInputFormat.h |  4 ----
 2 files changed, 15 deletions(-)

diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 7faa7300416..e35d53dc4f4 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -673,15 +673,6 @@ void ParquetBlockInputFormat::scheduleMoreWorkIfNeeded(std::optional<size_t> row
     }
 }
 
-Chunk ParquetBlockInputFormat::generate()
-{
-    auto res = IInputFormat::generate();
-    if (!res)
-        LOG_INFO(&Poco::Logger::get("ParquetBlockInputFormat"), "{} ms consumed by reading parquet file", consumed_nanosecs / 1e6);
-
-    return res;
-}
-
 Chunk ParquetBlockInputFormat::read()
 {
     initializeIfNeeded();
@@ -692,8 +683,6 @@ Chunk ParquetBlockInputFormat::read()
     if (need_only_count)
         return getChunkForCount(row_group_batches[row_group_batches_completed++].total_rows);
 
-    Stopwatch watch(CLOCK_MONOTONIC);
-    SCOPE_EXIT({ consumed_nanosecs += watch.elapsedNanoseconds(); });
     std::unique_lock lock(mutex);
 
     while (true)
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
index a94637da942..a737c695fd6 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h
@@ -65,8 +65,6 @@ public:
 
     size_t getApproxBytesReadForChunk() const override { return previous_approx_bytes_read_for_chunk; }
 
-    Chunk generate() override;
-
 private:
     Chunk read() override;
 
@@ -288,8 +286,6 @@ private:
     std::exception_ptr background_exception = nullptr;
     std::atomic<int> is_stopped{0};
     bool is_initialized = false;
-
-    UInt64 consumed_nanosecs = 0;
 };
 
 class ParquetSchemaReader : public ISchemaReader

From 18b3ebcda363eb7e9b8f52c7170d8bc208bb9b07 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Fri, 23 Feb 2024 01:10:22 +0800
Subject: [PATCH 113/392] add test

Change-Id: I53ade40ba24a742a21f9e09dbab7fff90b032b4b
---
 .../02998_native_parquet_reader.parquet       |  Bin 0 -> 76392 bytes
 .../02998_native_parquet_reader.reference     | 2000 +++++++++++++++++
 .../02998_native_parquet_reader.sh            |  210 ++
 3 files changed, 2210 insertions(+)
 create mode 100644 tests/queries/0_stateless/02998_native_parquet_reader.parquet
 create mode 100644 tests/queries/0_stateless/02998_native_parquet_reader.reference
 create mode 100755 tests/queries/0_stateless/02998_native_parquet_reader.sh

diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.parquet b/tests/queries/0_stateless/02998_native_parquet_reader.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c0d222342e31969fd5e6b4fb0fd8d0ecd4a822bc
GIT binary patch
literal 76392
zcmeFa2UHZ<*7jdjLRG1#7*NcZF=GH4V*>`vn7UfKN)ZzV6f<VbWH13{%&67cRTwa1
z#*7&wO;C}bm@)sKgU-Foow@Iw_kF+bTkF5ptTnV{)kFPGckR8;eomj6@pSFi!iKUr
z*4E}j8=E)guL)(U*(8_28P<lUs3+C^x=<9gqZnQ6`vV3qW*RSU_?blyJMrqv-eT0d
zRi%qvG1Xz;uwBL7x*vP|4OdO;mA9;p-lMC_&NO(48x%?w=xe5W>^rubDC=l>mf^Z-
zqw=2B(?(r=cDA9rxJhB;L3FaI0sDdNA@0=`ljj(2m|~TW>|lDI&YqoX=qYYiEMx<H
z)6|eHV0(!souxd_aLcqs`NWE}qHDy?H}n>_Dy%Hgw@r=N&ukx2)p2rw;f`sW@`W8j
z$LpG~3k;s(c7>OR(sxZw*{^J0alfv(ywGsZv_tvEdeH}T&DceTe&SB0gglH+F*Rqu
zv;D<`x{~r@!+len@`LrJ59u7(C58dwF2zb7PCqcUU@2~(n4l{qFEu<g?N(@R1btZ7
zl3ixdiF=gN@<{rTsTIp`f_Ow%Mh-MQHpxmcZWMh~*P0D7=tZMqEsv(3m>gLPZjg9P
zS5^)-q?-0BmYffLT<64w7zT^`lydSI+H7)WS<WDy(3O`%4QVEm!f|8ilR6C>W)MY1
zsUVM|(@icc&q-pU&PEP5WSCT?I5(a?rE9}R7>0=PN=4b1&NQ`UOK?NQ)4ED>q~WP)
zzfzK$K%ddIW0xDe!~=@0Jdu88YR_76!^E??%JK@sbJIbk6z4~u({*518ob3rN)>q$
zon`9Cmga_w=XF)(RfZR)1f>i&nZBUYva1av#KTH8c?z9va%HW#kz$grx*TQ5F&$CL
za#QJxIyW}jFiJeC)R3pqxh8kE95-6Lq^l{%81hWVl=9qk`m(MQyT;%n9#`z-8T3n2
zXSM=2M!ce{C9gHSGM!LtIDh)8t_!=)FjhRN)Rt$``KGRHMQ)sUO;<-=Z+LA=R4Q?^
z=<7NUc7tKOcuJ`&&!*p)y0NyLub8Z>CvP;oHJw%}b93k$y6)^I!vyh+QeU1+zcck<
zt8f#=o4N*atl_=stWuSmN8i%*WH%f9#B++hJfHqx>cv*$CW*Io4dpF{kEZiVbuNIu
zqwCFXHB1&SD2?O=bb+Z4TZ5Y--qkghw;4W}l9Za<Li(Q0lihBZDqd8Y$cyODroOBl
zH%&~@HI;W5zL+j4wYbIfeO*6xr(wEyS!pIOp}(5?v$eSy;safCInMCSbVaGdEu|mo
z2C%yf{^C`|L0(3GHw|R#ax=w8x)$<o!w=Inr5+bZKi28kJ%(B0b)}^oL{o~u*5_u6
zPjszhq>_@XG~j~iRGpqR8s>;Ml-6<x%_xIddv301);Y?14aJn3N<%J`PSXu$_ZjAi
zw-hHijJ8k=Y$I;In67h{O$JNlw$hjjr!#aSs~7^rJBmh*pjky?n{W%nOr49Y8aU;y
z(v*v&pX!FN@rH%sJ*ADjoaU9GY%^|=_)OPU-ft+bq$tg~74&nR7kj|4SiG;alULFu
zlwqs`w?xd+wU-YXN-7VO7ThZOh0dEjWLPRbR65A3X)9$o+mc%*X6ri235HV2Bc&A=
zMd#>7u!jwS;$uZCN7JR1k!))&NX*r_%0~=klqZTK7enXiMzKc?!D6c7Ca<BbmC>vd
z7b3pYxy#24WfimH%&nzg>3rDZhEOq0=_IeC%PC`64HqWn>pIIP4CR${#f4i>zt)Xq
zPa49-45f>_fv%v8W7}{M;u~F8IniLFWGZdBjr3dHc=nVbQhcg-$eZYjiZ9!aTQ0uS
zb(2pUDk;yD_FOFeUN?a~V^|?RSGvoaX<KC?+ksmte$e%h&l)N#SxQH43;j{&$DT8+
z5??4i<*jrTWfH69R*MC?Uh;WERV7<-<+jnEbd%W&hA1&d=`C-kt0_}hH!fQItm`8u
z8LBI}iaWQ1{-T@8UNpprd5WjJldhpmV>@wc#IL%(@+Ct}<)zY@i=)5krn8p~YsFVe
zKY15zr_5lxaO=eHy8iMNLoFp=>B{Y<f9U+#tA_RBYh{4Ehpw&6WIebIA|(uzuNmqn
zZ<KDFOw+<F_PSxC_*T)$M!K#to9)hR5*a~|lMVHhcS;X#FI`NS!`?8&itiP@ypOK0
z%w>CWn?(y@kbKk7K>48b;!L!qFps@u*dl&Z2FnU<ugqtAb6Z7LFvzzJ4V40=52w<c
z5WwCsY!g2zq8v{*QWmhD+;)){B>AqPvGQ5z%k8I&3k%tMh8^M;Wr%!$ZlWw=`*AzP
z62ee9#n4pws`Td$(j|q(?0rL=_)YPW57Et(CF}rhmuMvnlOGtGE8mrYTmoH6Sjs*$
z>=u70-tuAEL0QJ?xIJQNVYvLr&_bb9fjdH%5dzuA23e%l5%N*Gr4q#IIiqMTjFg`k
zS}BY=h&x7?6@uAR!(OqNI!ZoHw^l;f!Q4KvoG@B88ypo2B+v=Eyb#K!8BC(3>LZ_|
zos=+E<P@=jFh)){I4i6waf!5z5YA>8RFPB1%BN_J62T7P;>C)>I62ecqVVca?lfIV
zh-9A{_KU^U@$wnEjk28e;tq(mg0K9{&{ip-4&%<!m4y}TbHhQgq&h)9N4HZ}vfkVw
zv5GKJ&N8%DtkmJ$dAh2wihW^75KF0k@&&qsvYH*i9TuwzljLkeN2Rnnl1rki3sG#2
z;fPp9oh)CZwMsNQiaRRS5T?kv23N&e9nD>$YYH)Jp5d5SR-Gzerrne^tPgiwv=gSu
zFAeTWIdu$og{~#6WnURih~?Gk@>RN%vW^|gofK;eGvs_jXQhHVj=M(J5!SP>4T++S
z>MviXyC@sj@!Tn~t}s)6W9X_>RDHQ*x}LC+eQP)^R#IolH)s!K6FY%BBi0vY%kK=`
z6kBy7cav@)#Io-VXT{3u9QhXAUD?d~apy#PVXpka&_k)BPU3FU4TUZ2N5grssya`;
zL-$m+vXi+BVk2R`Twv&>R8yyLcj?B$HujStNvy60$oJ^p%64`tcTsF2ERa7N`Y1Kj
zX<Q23RM^3OF<cUBste`&w5PI@oz7hrn+c2LuZF&gojQYiKsOiS*l&g_Vl8#C{E+Ua
z>|*`7tD=LjME-8*uhdp&a*ya1!fy75;hI=ST`E7O2Pk{kS=@E8rLat<#DPj(bvE~e
zZY9VpEhdZg)Id3v)+t7I4tGOrEd<GoC@A&Sxty7H6!x;k#G7IRHCRrg^~yeW9(PN0
z5<+APagbuK&gar;XTii;inqmvYN(t+4^|X5fV(4VgfN*E4N4<*0hdX;2rA2ocg4nP
zxcrnBm3Vd`cTa31M991-DNWQx+%vkZu%9h1rie||NclNEL^;4N=I)E_gynJxaj4Qv
zUBYG2?S+GEN%4W$TwNi*puLns>{9Nb*g;q+TZzLI2Xz^jO?MO$*izynv4y%y&Y``P
z!)ze;Skwxu<<jDCrKK9g<<hRg5w?u@L~Nx-$$9h$<tQ7>rHXDsv}`SoR9dSc+)LVB
zIL4M0&7z|kBfp|YDaYARE=}wttdYx!qZKDLjLWAx3n$p}V!G(8u9aWYKFUcpoXZfq
z2<zku;uuAvMsRQFu0kSfBW8*&>U#MtJytozMsiO@4`G8`Q5>hVQI~V?=x)MkwvzZv
zY^!dR-_zrjGwcfPx!7IUB-@I<N;`EW_kr#qoMkJESz>!NR{ls&P|mTdxEEqiVY6IC
zoTzkAS91k)FX22}Rm>JUs$1kww4ZW;jpA~|-ojS7nm9?(s?pqMx{r{=Ru^+cS9P2G
zg`TWjWMjBI(Nowi*AS;DZt5EDE8SPP#MTsFitg$T`5Qe|xy-KRUWxsLowA)cP3fer
z<G$1Vg)3|=F<<Pg#>qeE>B?1hJ@;B1AncNBi!+oi>IRNt1`5~MI^r9#tGZjJ8Gq$E
zyODb<>V!RVU2&%3p>E<B6#Znjp7>7erpj_LW|ne;jpg2pdci2y7iTNo)y<p*Gf23}
zHV{9EJ=DFjB{N64#cts~ii3rHvb{K0>8WnzSjHgSW*dqHVlUMsbId&D4!e!}BqEgM
zM&f*>x4NC<8A-UyHWojNeN<I0&IBm;*d5##aflEvHxU;op6X7n1T$1fVVjCy#lGr(
zxg@htxzEON-$XCrfZR-6r1VpFaaPPQ;Q`xR{4Vxa56Y#O#mYl=H}^yI77ob{;u2+m
zx`!*x3>O};EksHhs3yo|n5D{NR_17Fgm74HDK1lVs*$s1MhZ{ZRw9O_9+As3fl4a7
zmn$ZX5{}BP#UMqm?&HcaqXjeTC|XE^)MIjaCRj;hO`N6VBOI5V#1Lh$s&EyUF+w`)
zEV7b8Jt5mLp-Kj;a-1|)I4NtyFhx}3xr)p<A(M3xc}Y?e<w{Jr@|4}r6_>^fr{p$b
zgfc`uz}Ygs!ZWt5SV9`Ao|Y>!k;-%SAXieFAe@ogiOUr)^$=HunJ8qj?L{kTn0i*O
z%B)acunAl#$xk>ZcMw-9-s)kl8Z$}AW;=?drQzy%xjM5-$zhLhWu(c%1z9VuRz|2t
zxf;wAA(wR(t)-D_l3bICQu5eiTv=(Va8Y&>qm@zWan6pJCcI?b#d6YU^^#nRiBVp$
zC%E#`bm6kxNnE4&s3*DF%nTu)?JQQ1#;8~1I?P(-HJixUNdCfAxr?|?8LOV+>M}D0
zSlCrAHc=Xzn2QgBN*6V?m#WzfhN+!@?A4bO*~KWTj$3=`-P=;TiM5Tgpox*KD2-FE
z$@LgwWKVOIq*=msnHbse>KU#+Gg}}=)>iUWljQ~sF|ud5%F-O+hD?m?1oa$e&&(Bw
zk*y+4RBy@+8DeD5b5*5z!Y!E?SwHmx*NB-f5F=Ynnxx*A8#BbnCUMoJ0O5{IjO=9f
zBG-gjAP^&4Lz<%Am76le$X?=VN(+U1GBL7K)yrHnW|2UQterGXO_7^3#K>OZYDtTQ
z`!X@I)77h-1G7XRMz*#zLwz8(V2F{u#?_IQ3J+ysWc}6aTuWw|K#Xi%X{P!}Zp9EI
zo6Oad0)@viF|xDN8(eE9NFYYGzBF5XB0Dm~$ll}{NWns?OpNRt^%m#Egb2jQ+Dmg)
zv+T?eBYT@`D1{1XGBL9A)H|Gp2@{BsZ6wWC(`6Ti7}>jAV<}w7kcp8EQ15YVm<WLw
z*(TBgHB)ZO5F?wyHI*WTr!q0J3)TBvJ7&2+jBGP$k@`$-&k!T~fNL(T5T47#$SzhN
zavhkJ0x_}<(h@aG?#K`$`-p2HtrA|y#K<mHA9Gq}wLpw)OKF*!ExR(r$Ufm(Nl`+M
zOpI)xn##E`(E>5Dt)(C}S9WKJku`IUQjCx%6C)d}rg5E^H3BiRPEv^aQtr$UBb&}S
zOKXK!GBL8DY6jPZStk%9tC7Oge7P$_jBF<7BCQu*%f!frt4}!(W`jVCY#S*;eIs{c
zh>?B9wUsssZ)IX+Bh}|zcV?47jBGn;x%y7-!4M;x#kH4Wh4(TsvMbaVTu)}RK#Xh$
zX{Gu>?!^!zo6U8Uwg?|(Vq{mTIb3gMt3ZsbR$8qV$bA@MWOF%JX`Ap#CPp?&&Eq_o
z?E*2fZc?=RS?<daBm0tbmv#tWWMX7v)K^?TW~V@mY$s`r`c>}F5F?w<b(Z3UZ!$5m
zYt`4>0A`m!jBFQao%&rK$Pgp@hU+Tr7JkUY$gWr4ayn*@K#Z)1v_Yke0z-`KJFc6A
zH6$Z3vK!U+oSrcX#K?A+HmQtp5JQaY2d;;-S14vAMmARc$PH%p3B<_uls2mtMgv2P
zYysCxG6|MOVq~|dpE!|G1Y%@+OIuaeC^5vye&+f}=-Z9N$Zk`=a6_1Qff!j&X}iiB
zhcd*-e&zZ~`-S30Vq|xy-#9PkfIy6FKWV30!Z?f}M)o_`Upgq1G!i2lr~cr)nL`3G
zvIC@Ds+Dm#LyRoN50nywQbuBAcdIl%f;lV@Bde44sHKe~8DeA^IF}<r86z>WvRaHE
z#T*rgk=08^)!I0kAx74MA0!<U${LB0-K$#iKFo1}7}>$nKDC^23`2}8%NwK<LU|)G
zvL=<|$1*1cVr0>esTGXl7-D33%od4)jgc5xRV~hsXHE&k$PSU>)rv-6h8Wor{7~t%
zP{~M)?0&T*KY=+T5F_g)9Z+qJ6B%M;t@vTmS)sC#7}<krDc+AcClDj+Ege#;7$-5r
z$d=}ZOXr2EMq*?W)H3{J=7K<s><H<wTFp3xAx74kA1NgX)s4i+9#PBkQ<;kbF|wni
zqiPM~G=>=2a{OrNl2Fq~jO;PBJU^YeED$5>BOO=mj58QwWGnDvq$@%#BQdfkR2$x(
zxhfDNJ61ZW);7*$h>@+xkCUzmb&SNwCaRVAS<H2T7}@dCDYdR~Hbac8E$=HO3-ye|
z$eva!^K+OR0x_}^q%&%L<6MRq*(&@*>88-YNQ~@RwJJZ4xg`)I>nEL4?TzypVq~lF
zlcd{1LnAS==hf<b0CPtmMs~7vL2YDQzz`!_gP$VZ6&f3fkxf!-@(Y=J0x_~vrHg74
z<06I_Sv!83lp-`W5+i#_t;H{9?hC}oPM0pL&5TPJVq|OcGo%MXb0aabSJXQEQs$vR
zjI6(ORdp~fV~CNh%g>Y^2`!Ao$X-+H@qx@^ff(6Y(si|^F^C~Xwmv^wdLpzk5+j?e
zHsFJqRDl@TInoWawK0StM%JF6E13mHBQdf!)rNd1lO_-&J5Rc$IvK+lVq_cf^QCmb
z*+`7+ZM88U&SVJ0$OcGvRE;r$Ax5?dzd*_qT#UrX-c_6Ok<3$p7}<r=J++N-IYW$W
zGk%fuOlWH)Mm9xl&aYse3&hASmhP+Vj4K&pWF7b=QkKx(NQ~?QwFSS5c_9!ZyHt9p
zb}+7Hh>>l{FO#x`jz(f+AE~YQC?-cBMmA7-tZI$X3^B5;`5-A*a5WMm`$To*W0*XF
z7};PcRdqA2VTh4+;zOjDg1eCzS+nZQuVr2d#K?w9X=*3qI))fo4Id`u3!RO`$fm0<
z{CeiKK#Xj-l%aMpZeWO!ZNo=MZ-lN!Vq`Pbw){rstw4-yr1VtvFm7Uqk!{B>m);58
zjKs)3Q`_^g%zJ?t*%i`rwYzaMLyT+(ex>w5=wT#AHcRcuZ(%+P#K^9aUZ_2dTNz?x
zwft(SK<H&8MmAe@<+m}P1Y%^Pq#U)kaXUkdtQ#LKeHQu{iIL4!-T58N7l9br7%5Nn
zH11@Gk?q8<k-iFjjl{^lR6Fx=%r}7;*|pLuwV!bpLyT+}ex3AP=x-!OHec<^?`D1o
z#K^9fUaJF)dl+J5J@^e0r5|V{M)r-`jh7i(PmJtF>8+|W8X00_yYrhQ25x~E*>`FW
zelJr@PmFA=^j_5)_c6rC_T)E97WzR(Vq`z4y?7I2sV7Eui}X<)Y*ZLxWP9^lC01`R
z5+hrn_Tg2A(-R}RP5PvY#(0JpSx<hu#G}v>Bl}tH%kO83>xq%wA$?JY7!NSS$oAuR
zN+t9|jl{@)Rr~V?nUZ>9WaFf7s+aK)LyYVIewSpWA7&&*_PaWePhd*viILqc{ZPG)
zhZ$mIb^IQww0^je7+ES_;Eyn6^u);G`;7Pz#-j`|vU=VqS?fm{iIHXE2l2<4vU*}<
z_e#a$M;VVZ#K;cj_etgSqm9JKTErXp6HIwMF|sDfGTz5{k|9P`<Q1ucevFYASvFqc
z6B!#lF|tU#__4-Q3^B4p_;{(Jew>jQSw4O!f10VJCq{O^R6KsX@eD(ZtQUVkveo+<
ziIFW4Ka4-iRMrzCdr&GFKf!p8Ax74lKO|MrPc#xEYZX77KhIRv6C;}-m5TQ>USNok
z9l;-#s_7>giIFWGKax*ks_Ti7JtCEfpKQFy5F<N^KPuJGPcafBYaKtDzr@tk6C-;}
zDjPr5c$pza)`veX+3BYliIFWAKZd`;)Y20pdqOH7KiznhAx3sAe^RQgpJ60MwnF?k
z{u)z9PmFA$WE1aiyv`6KJDxu!)z!~55+hqN-j`2i>gkD*JuOv=pJlwk5F<N*KO@!G
z&o&YxYa2h2zsWSv6C-<8svJMZc#9!M){j3Y+3V*TiIJ@mKZ(E1G}IF#dtRy<KhJoF
zAx3sGe?e-bpKl~awp#oY{w~v4PmFAmR6Rbxc#k1Qb}D~SYNB6YBu2JI{4_p=X{ski
z_L5XHexdO`LyYWn{<73ezsN|8tX=#J{sGfmPmJspsaE`A<3olRS%3bj<e*<-Bu2J&
z{7n84(?U;->@}%Q{8Hm%h8Wpd{B^0NewmRN*}Czw`6o;(Ju$M$QoZ;<V=6<8>>U1v
z)LI{8Bu2J={9NA5IO>U!y(u+_4>qPT#K_L$Z%I!25F;_N_VM%ibjDdvjO=ZxVSK1D
zgCRyXfWISY^kGI~WE;gV;4>K)Ju$L(rN;5$#-|K1vJ3fpQX74Qkr>$~@r(FpOj|uM
zvMEy2_(<b(h8Wq!{C%mNez}nt*=F%e_$;Qqo*3B&QuFu~#up4RvP=1gQV0D?BQdfL
z@yqyZrla1*!X}os*+JXv*&MKzYiDC`V?j}#`F)bD>UrCH&pi}y=$_TALpQ8$TQ&4<
z;=SNdeec<a<{i3YHTBSCtD9C0yc>DXKV;`U?a-V<x2*13HTJ$@Rm*$&p{rK4y=NS{
zW>v@A|Il@-y52Jn!HjM!VY8R9`5F~|&$7^rTKxQmHNrA~OeMI{gBEa~-^o|3`-u--
zVM)v=O<T^l3v>;3vEOxWZOQcIOqt-Z7oY9$k4;>#Wl+n*7i;i-kv`!)$Exf?+ifw6
zRt%O8+tp~89~&1nenK!;qMH5MdMnrEE}*!eXGz=YkFyK>-h=x2&%qv)#oV#qo6`UI
zXLnP&?ysBE6ommtmatjR*sT3xZdP=>ftLBhtjTywV1OUU_rl%r(+C6o1MGh>?thH)
zNDqpd`;Rf=U@wOG+ecD3Ox4%sO&y!tiQ67o6*c+yoiybz%uF)OeDaB2p)KnM-~cCm
zrc9tYe)0%^yw03D3a?Wqj>YT9nN#sPZL~LD{l-nl>#%8~@#-^t242U{n2guyKEr9w
zVuZh?hVm~yfErV305uLj!{|Er^>H*ujW0F8!h1Sh+F}@vEJLZ`Q2CY`P&}0PpE8ne
z#E%&<5h_3Y&L~{LpU{lP(WFAn@EK!h4e#qSh2~0`E&Rvg!qh^^I2=tYl#KTFr8O3l
z##_Fp{Bf5FxPU)#mx(x<StuDdY7*ViVwzvQZn(+l$;AgW!~r!0*GCjypE!0J-LdHg
zTpTz3*Tosd2apr|lnFC`U79%T*Cp>?mu5^JL3gBl8j|0eG7=YYK;A^*?|D!6!Nu`~
z7iWz5^_v)e;l)wI#?u{p&BgC|`~Lbp47>2s*pYtty{U!2H)_(aix@GvXfa{TNT%>$
z?6hB3Fx<iulYK|($N@EB%+EhD*uo3bC;hzOyPN#=kFV62F~6=*6T4wVe^mVEGt`*k
z|D)ggpV9v8w<y2g-o>B4%fIj*{Qm4cP!s>R_x-cc{(dx+A9>inKleX<7YKkcySo1T
z?Z2PrA3xK-pXdKPvi|sK{`E6|=jU%3{&#-<{3NLVtM#_-xapHfXM>3rom-r*O+a0n
zP4}Ws6ffEm-FDJcg=Q^jG8dC}a}`~pLM&<Fvgs0boeEo{pKZ^K$-ZcHynTKC(e{wG
zfa52O8G%>-!uDz6uxU6Z(>H0KhT+wF`Xs!Lnlv7-erSO>iy4#ACSfwChNDp#iJz%x
zlSmuHQKRrTOzPxMv@V#|snLb4$z)%DG(e;c;;6CE%)}M^2@Pg=(rTfRnm%bbI-t?s
zmhaIcOdf&r6ABgRNvMg13evZAv=}$4P%+(yv?FMgFj+t`rBFedwT>1O#}>9o!^RT@
zCKGfgq+k>_X+ImJk$%4}p*JbKM0&HIm&Q!}bqVjI@Y1C5GtkR;H{3uP1PfpEhlMQ?
z-a_H!X@1_gJgTtOnS{2m@FGTEc#-s>KU<_}=mrZfVc3P2M);5YMVyH)u<#PbOD<XX
z4FA<2jX?9)ts&k4hFf@L{ODf|lFzRO34<jUiq<gF!T$3cYF1$@RaEicJN~~L<G&yK
z???7`Px1TDgFMy0=b+yr=I=S^|HT~i+o%1<Nc7$PcYglyQ~5hTe~Xy^oB3H6T@2}r
zu<U})Xos)O-P$(iqOO{MF2eo#*Bov+>8p@+A+$nm@bi+a7v+-<X$xJV?h`D<;UbxZ
zf9rk5{9HO3_jBop^ge}aKm7QgOGl*l;fIaK(h(+TOVZ6urb}Z{r*P#64GJ|9fBoY+
zkmk?Vfye?7^=}t~c+zxm|K&=M1}i~@jSyK0BCQyjsef4u`ne2-hN9?t5am<YDUsGs
zQ`l4d`HE2CrNZ?f(n0<8^`M_M;oqzX6)qP2>v|CS&#CC3{0lp%KU)&|xmYv-4Od~8
zRAf!4@Cr5gS1*J{v2fJGru}M?rvJKt{*YWi!TNh~=no5Me=jEdjE(=-umAs_D4aIX
zrOf}`m#|>`#$S4&{8|#FZ=0qYioQ}~E;jhrb49QJr(<ejP?g``u~#0s-=Do=sJ-(3
zPrOim-TwW3iqf%={cpxnR7H+a%l_@Sf76m;{Acg$x9j9x|DSk!z48hl*1z108dLO5
z7iIpHW7MU;_(}ZhJEX?^>%0E%zQ_OaasAty{rS84_s^*=7F$RU;_E*JO$@&A+E&}<
zeo{*3qN_Xio$8V=yta__oJ87EbDQ)p+4vhy7G0m2IThbO{al~n$u~Oq!VBvXTw#;K
z@l!^Ub)HFuUw9QR)bOJU*Lg<x;~Oc9sbtm02hB_2w`OF;rf@~2@K3s0VUvQMp>W-1
zJY9`1Y?%tb9>coI&p)9eUs&O*F|uYu`9b9~9N)%~bsGvxEaXpou|*mc@)g-+{7l7K
z&-l^B-{X)h7-9Ja0<vI)zNle0o-7!3wD23(v>Q&5r53F8)azEbT7<8~ARwzn9q}j%
zSBjc;!|DvCfBe1h%e}%2WPzyg0;Yd*p>S=88jhlZFY$g|!aE@M7&D@CXKFOLLKcP!
ze+i=|SA2bNWgM;`nAhM087#RO>CcW+6UhbN!V5@&8Teb#6!;krQ2#za{|6_i34fjo
zBu@VrbwvP$hlMY{6_$UVDKfl&{00fue=7dMQIVSz9mqd!_}g3m&nJrh`p;v`kbhbM
z{!v$oz>D1fpQHZQ>%Wzhe=7dMQIVVcJp6B;`17Y!^kzkmfA^q@uzvpz|M8&yt)gfo
z{?~7_$Suer8s)-Q{L*iS|J8|~PwDr{qPO{X$N#uJSzja*a;aahzHR<_{PX-@IrG~c
zs4+pm?)2M4|J!|kQ;@s=@s35W{{Gl^H@U-qekIe+e^&jKJ7IFiH^G06`q%FFb6lj=
z|4(-%EyMrrUHp8W|GRtqbCkb5&!6V)zw>VYw<GxbdH#F~<ayS`-V@StO!oD`=BgOR
z<`Hd^xZ_8}Z=0(^X<Ksn=Qm7+-3Ho-($oaY(uJLd4K|>VuYM*LFOBb$d@M^-V=b!{
zezR1XA5GR(sBySL`j*lb!)IV^#eXbWOc^%{i!kIHAwROdVK;UpVPGWame!ZBiDPWu
zB^^pARdgRyxTU5dG|x!8ltNmiT-q{!U~L?nExPNf3O^xobm^Xr)cT9oWd;=s368t?
zw7$Q0qId1?la=DX?YdfEW@kQ2+*W&B%Y<$lHKw@0wc><@4cI32x74k)?)ST{{<xdz
zw|f`vX2NJ?#^!dyjvl`sZ2lNB+IM823FP~`D`bCI1^g^#u=VN>#GhNQ{)dDA$L1^b
zzu0_bSvcHPM`@dl_}L_9q!cUKjb4)8U2H~%ToGeDLe|p?_lhO5I0`OCJ2*X`u(TsZ
zdFbj<d`5q(nzj^GqGCPDocp-Lla>^vcdSSGgwAar15IG7dX&9W`O~dgidto;M`d-K
z+o(h>iaOn;9;Hvce|`Xla>cqH<rTUsvI{h~HT5XFsPdnxLc`R>d8e#WKU^tlAXu7=
z0ehmc)K>{C>%1qAb&eD@60Dfed5^5!C~7rWdZ!A*jQDS#YzE6#JMZGcIuvystiaR<
zBd>B4bswzA(A~{XLX!rTXH<olRnU|HE7IwOJfjmud4uJXF|fOJb&9GBmNqYS&UI&G
z1z1U;^JfP^vjwa`CmX7MNs8JFmWOt}&zE`>^$4tl)Q1*bN+3MJN(|j|%^8}PU<E|k
zIG3tVQPsdobjt3LkN-N$M6hBqbT0yHQdA4D{PN6IJ=);kZv(3!G@uI8m7?~6mEcry
z)5cO1brdXrZ2&jbo}ylWm6iHv>j^80dJ9&5sO)Qk<_lP+sET)&K+_Pce5agjpKcU2
z2dtzFA&{<3Q60cac#(D_u^mN)f~5^waCjCp$H2;Rs??zpwh1PI6{lUWBexMneE`eW
z{MgO848o!;o>7?5sv$JI3!ZyarD@-xaRSTEIX5T`krxD(wOQX_R$Yqf0an(F^g%9&
zhc#gN1ufLSh2|nyw$8SXBVc{*fR&<M*m-1AiYnF>pQrgr+P<<BRSGQ6u)TXWLt_J$
zb+m1T8PK?c<>{PfKe#tVMS-O?4?6z_pRFD&+w6=21u?S&tk|GMAICv+7c39w%5!)d
zib@BIcU_eExH&~xgXM2d4Y033QB}YS2;0}a3N*FB(neQ~e+7*vSOLzPoH{TRwGAvk
z^Wc%=8Y1?<^2pANZiIM<2P-+q(b5x|OtAc&t5jWpt)Z{L(zq^Oq(fd*2P@8OuDKbh
z)c`D0nCZm|XqtoN7hR>F4>S^3Cg<2Y2Q4T{0V~#Qu*htT*aOQy`>E4b#7rVsRIt;w
zBxqiP73W;_$fwE_^%X21*Cn;Dx2C8@V5OMTPE^ME)?k^#l)#e6opxZwMpu1phGr~S
z1+6wuZH~M+0amhE+$uIh?12@R{p?Ov#KTpv9D<$QU7`67R*G}AL6fUdRB;h;>$>z*
zSLB5=SbSQ#v;rFxT*0yqR~sybrW;tv(bXacK{Ep^`_@~^#}%ij%V1GylG_~z#2#2F
z+0Uo0LOeVG%PUxuei)jPt#MCH^@8jgs6t>Rxi0fcbfKtDU}@4azLvoG-e76N<IjJ^
zu?{RMrh4<6&@2W^-+HTRhrCDu%OPz@TCY}!J+Szktn#H056{7h40c)25*k~uG@2UG
z!%=hUfK}ic_(Oxd=nIxlTIRY%b`)g*%P)NY$Z60F2g@O*#<lLyEC(yF^|roiN>kKR
zu)NZS2AoG-+5wg(XKk(5G)27!D>1l@-5zM_f#svA>GTLS#{n#Rx1fzm2Z|a3R!rK{
zHeZpOK48U$AFy}{jUQP4t7`h5fo3CE39Yy1mqlK@0V^`it7b<HVh=2zoOOv~5D$zK
z@;$ii$vV)q1S>{kcV__dqCHr8x8U}TkQd{?N=kcne=0t*X<%gqAKW?$nz>-bt+KOf
z2hAR^vRdzm2rf@iKfp>%8+PIl>QW+DF*)mprn9J-VA+MV8?qjnj$kEeYE8Y0n$sPu
zK(~-bTihwiAFP73=l*#OC~6^CwjqbyQlSY3D`i!!J^P_a0L#{KXR~6+3o8NRPxF=>
z+9USBO3K;reK6vo8d#no?Y~ukrYBehn%d>NATI>465K*(S4Cbd0n0u;tI{acq6n}&
zLK4yhXrjU5SJ&>|6q<8jc{s*hosBuGGFW!$!@tI%E<FINAZO!-TP0C5!3qfJus#f$
zL15Xt)KO2M=8OO<%Pp+RDi4ZU0hT`f#la^{C~7@e{vn40lA+lKmS%OG7u%t^36{U(
zuJP}XclE*YOdqkXmK$OZEc@I|?Rp{}oWL@LbZpBacSeDwcd6Ug8F?`YEQgNahpmto
zo4|^Fl08sBE!qWET*#4{-Jnsx@>yLsum&`#V8uD^F6oOos})!Q=_A{$MO}IamOeN3
z;aSvbcd*PM+6VKXnF3a<W4-)+s5$e%^6D6&U($=BWU!K-<h;Ay0=vY)N(ni7;uJI|
zz=~O2uR%04xnQL@?%DkmbCwn?Q~Id;<uFgtmGIniH_vL1c<=<vI@ERMM`!}TN_MOt
zVUN5B0V}ej>!>lviz8rBsktjXP>arh#fKh~+CXy|tfbZVTF-{&Gg!Qn+-(T*q8C`^
z^wItys7s~6O32+(`5<bw1eP|`&DIZ^Ft8}62F*63=EQ)N*wM{0y%<GZ0LvjYujMHx
zib@7c6MFos3QY=F1*=nTy@iIWhx1Oxth;q7YA{&V89tRPdQwzXu(EQuCNx7lj0MXt
z)csHvG;6_faI(Kz8F{fCto)Ad+Xf>qZiD5O`tpVgYSCk`d_qsGYYfd(u<WDmPaY3V
z1+aXa_Acv#yci9ZHe<}eIjBqaVA<wv6LunIW`GqN+DSJInw?;IIW-)=0yRek%TC+L
z{T7ch3RYz5D?ha@MP-8(6MC}EHfY{}rH^`0_9-;Az>0C&S3jvC78Akp%cy$Ri^0bP
zmPg+94>b`Fi@{0`?fm{eH2cAdbZS(xB=X`USf1L>={=AaFTqMo%`ej!wdf;QNui1N
zt3mSvtiY&;CJCBmU?n-3&S{Ys3&4uas5bmQ>XJKH{&_oA#~>b-gGGgPS=A4kQ(z@J
zHQv1lHRlRg0opDL&Xz(C16F?O>%CjG6lGBk&nWbi|7vKg+TgiIJ?e88nzmpSI4L6z
zH>0R<u#z*XGyOQk9$0aCJ6o4UJZuEZA*^ew%g|f{E8nR}x38!<_rWr0yV|u!Uhp3H
zJk4)<Rm1tRVA+SAu2dSiQyHwRsK+mRL*oIKy|Y@ZCGuh&SX5?>&6iM@B(PHQ;%)^Y
z9`=CcALMb<9h!$=**Q1Onu?n93@o$O<K%&I=wZO}G{4PV)tRDdfTa&Rb8sm%^}(`@
zeiC*8nt@>Ho#VIc#+(%gmP2Mut-BRs4=lUvU6UEaLjqWFLER=DgC+|sPv>UK@=$Z$
zf@SU6ZAdfZMQyMG%<n=<;e2DT0>jP@{DI?EV0lERHqk=k4OXD@e%Ct4ivwW!o9$AM
zqAtw<%QJg-+1ZGPb6}+eb+>ke<~>*e&duwOLd~Hh#I0-hZ#ycXhXKoEe&1+mcZ$-0
zl@NCB-3(~7VEIRzFDF1V5v&B~1Lk#@vrdB*XRbA`HRh?sU<G9FF}-b!nh6#k++&{+
z8V2*G$=TuD6V#kiVEMWBSYH!)(GD!L`NPHU$j#1RWrdwznGa1bu;QZA#<qZF4p>>v
z2j^EnUR(z&#az3(5p`)fSf*^b&v?W`Dp;D}p1tcsQwA)vbNbEGs5w=^igoSTHl`YS
z7_h9<;_jv6?e+)DHr#*nZD<T&r9`I}-wjO=SPrcZ)d<6!^$09Jt<JG}n5Q;^WzIIf
z%tWou1<NP6SKdZws)1##$(Ym@d0`J$vTLsg*2oJTSlYB*(+#LaUSN5I&(!vU#s@4u
zCL^vkG*MuAwN5y0iM)6LmL{#PVI%6&9<Z!)_J&D_hfiR|1osZD1WiM*w3<xo{ir#P
zU{P+pX9m?m4+EB8+HRZMmiS@<EdTIXspp`X29_o!v+Fu&wt^Me`mi|94gEh@K56wn
zSHwJ(0G2jqUz59z6va9t--G)!j)cYuEI-ZDeNB)TZeTgM^|57;7k*&HrtR6^6SZg-
zSaIRA=XHQ)Ay_^!PoI^C#spSk>my%2bVC2H$N1CguZciiItP|tj_Go5#6x+o3W7Z^
z@yH!_uwpgOdT&R~=>wLRo9Cff_0hwCm7FH`KUadHmVuQLKBxL2Xd=LhiFvjp6q@5;
z<+nb%c4Alb|6nDhHPG;Oh&`}kbCj_cP^<00vJdGyW&t#wU?ppw=T%2uh+sv!_0_$#
zM-KxQm2P~~9<^u{SbW6XW6hyi50-h=^SYMEol9WZIUaM#>W2Owtb#QAdjY6RsbD4N
zsKvF2ho)fZL;CTbp^?C%T(Ux=P;<tBmFU**{W#=BG*}MldzT-C|Jn?eXUIH595lPY
zvR<8aW-c`M!18oFp5lePXatshdc*0T@Y&{qMdilFC7@Qf0V^=1|IW$Kj0MZV<wfH%
z$cyP<<-7G?mDv<M3|L<2`y8617VQNqAY}e$J7^AqrCt5P`vWwYU<Ei%e1^wGX~5E_
zH?o<Gy7UPwhur;LTOb~~f|U?rc|Zq^KUiKa*_W1~<}3os&V4{D33;J_6`5|jz7ziI
zC|IVDfHiBNIRlp8>TG%<G_S!jIr=R*&LH2x3QTXjKO3JdYm0m49(cA3gBSo-RtP)b
zIy6hbigd{tLm@9B!SZw;c=HbFVZcgES0>n@7A1jY4q2cn2Td|qv8!`ppF{H<EVJXJ
zx=xl9)eo$M^d|j>H$m)y6`6Z*No~Z#FtBVxxwl=RSpim}OD;bXHD?1@F&%Z2dLu6`
zgT<$+rPjcI-2uxwbm6^VXdZ)=ygIj|7c?d7<Gj=4GkZCT8VXicdeb)#QJ3t%O3Xb}
zcOBwk0$3iQ{E9?qHi5-E<?VcpnzI|Mq>h611=7QSrAdvK%i?@GSlZA<(~BW@vcaOF
z@}8tXV+)qnX^M9P<i&WfY%`iIAAotPDOmZr31`Y99%h5(A6mSH3p6rV8mE_C#-ipN
z0;`~-eqTrA#Z$0+Qup@=hUa<(mS5;%n>o;Y1j`}n<=no|)C0@UX(}CSMNu=s^2liJ
za20i_4On)0hrL64V_gWWxX=>G3N(ko@^O0gG#xePELis1LA?{&p@#u0CiOrzgY#d(
ziVa<|{~eAk+TyuKy{dKznwDV2I!%kUMP4ig%Rj^6dS}d2UBU9q8|UYTc!&ZkCA8$k
zM$nuGE5<2*u>pB;11!CEaGnG5;s;nssRx5**MYY!k7pFRw7)MjWx<M!%0KD`O-Hbj
zou+qOjyY>3SaBIGCY(TB8UR*6-uN;vEKoDS;=`;;Z-wR-SV>N=>)b}oc>-3T))2bO
z6+KKhe4eR??BC(>R0N9(TlVHDG&R6VjCwuzC^S96qMT<uDUQ5|1uG?^WhrOOQ^UYA
z<@xR%hIlZ6r3ou#tO|`8tOBPur+XqVa==Q^ij8X{FDip&Z%(){9`&jYSPo%<%ZEVI
z7_9uLH=kNTGYBjPXaBhiF=rXU;xk*x`%srAfMw2`(Ca~Q)J(8^!b<mC1x+4U_RepI
zokPtj04qx?U0T}(Jq%cS^WjlX8^POx<rNm>a1R;{Sa#8GV`OMXf#u~qv)UKr-4U=f
znXP-+W1gA~mi5btx&06im%xe%E0be|-1!Wa-uWHX0eN9L1aa#+WK4PFMJuoZ%||SU
zpcb_UD>5wjdLL*ygXJ0h&KX}KQ&Yf-be?r|3g)Z}VEJS^X2+r~1%jo|_6xaO8Z{Fv
zbC7lL5@<Ngn}N>nHyuRHDF>E^>rg(l2YMK=63j=p-G%?^4pw4V$b=+l`hgV?{r>I-
zXac}Wbe=sp7kPIZte8xvpvs-GCJ9zx_M}Fg5D%GPSqGPG_#K)GU?n(z=+GQ_Q4=hG
zSFfEG$ctWJWtoq;_dzWZz{(E`E#(T$P_RtVAEs4=CJd~6=Q*E7V9rVd%WQVGUxB)`
z1+0YZ$rq2IR=);I8(c1FIyAMw%5whrcqeL36R_f3%N@v~(Zhgcn|3_?0{qtqu<XLa
z<ipU62g@8Cx9Br8>%g+p%#C?m8~s06)@hmxCAwox60EH3DL#&fhwotd1(zTF3YunM
z*=h>r*dZ@mz)Eo~KVTU0!Ursmv=a-os6~^&@(d5}(Gr@OU|GlPsyzjoIIui5^PF`E
z{V!l?(_BU^LR~rrmTk^dZZBe{qz3sOT!Hn0rY%?=nom{NpyqS|i+8K=_CAXq1}y)y
zlXi#Uzvh7z5FU}e8=9qHX=8Su%7NwpSTU{V-@VoV{r@10KP_j<Xv|ZW!1BnMwxd4c
z!4|CKV4Lk`Xgt93*L*%w7J1PhERCDZihjt8g<!>{C7x`7S`-47DLgW$4m2yl@{8Hy
zH3FK`U?sH<nAR0}VFOldT5dTr>e4;1{Bx#v-h_Cl2No4lvC|-E27(o*`7$UNHOC7q
zAGeAvE|)|P16E4fsUf@Jzhb~LhcCBZ4^1psu`%+GN6=gctDyCQ3W-h7|AUpBmZ#_~
z5qn_8<;=*egm`EPmP1ITj2qB+gO#HBTEHMLe8GxwtCZ}HyjTwwpMLsVZPcReU|C13
zxKIHaBUs5X#*G7^c?6cd<HC3i@}fCdRQgNL8ysQ}tdt!8MUjYyj$nC(*e>i2%|x(x
zmv7OtQFCU2mE>kS@t8Gw7_c<yXV$NS|5CxyMywna2F($$s5N`rUxVfaSbE1r{S?eu
z?Z9$KfAy+c3B(>)eD2KJ#UbqpR%A%!T4$h{1D3|+yVE<=oMm7YxK%Fgh`cxmmQVWG
zwiR&xBv^hCt1Q{Bs1IOytlsyi3p5|V3Upljx*_tS8(3cH`QfKgmr7Q~J#%NBT7Y;M
z1Xf~5mBhBt1cBw_^5f2U)SOjd*}GRU8g0<SfEAN|?m;L#*A=i5LRN2^2hDA;{8yW1
zpM<7Z1DtnU61l|*{XbZd>93o%#XMyTmQU_%ug?upGr`IasXBB&G*Mv1xKPucqULM{
zOYdH_XMN;FGFVCJ=Vw~t{C%*pLZaM1;y4YgxYbG(4K&tZWjQWwUKM#U9IV9jH&^0O
zm+FBPlRM|fWW>W1u<SyseQyHIR<M#>=n6wnbM}H2=w2;nV-@r;U=^fau$|Wn-WDv|
z(CGAu(7XgIWwm<P1Wk3YY@L=}TZuW#53Kz3x8s^%o@xnJQtsT1In7Wr!SW2PzF|8w
zCa?-z;%m7fFOGqg;9fnr67nJ&Ec=X^^}SJx-ht&2+AY67G+)8uqvG`qp=kt`hf|=h
z74l*(SauojN^D16>Ihar?!5LR5D#Hs1%%dUR|A^kVA;3XpLhf{=OS2?wnoFqn&@G`
z@=Kj{?g9K4T>;N1w0p!AXo|PPbC25paWgc|VEH=*SuN3`{|C!6<NdDcn5TMzWuG_y
z(IeFAwP2Y-Yd(yE<`P(bP6vjzLSEbj%RyT+iAP>obVtsn&K}VpwWu^$aiKk$b%v%Q
zSUyn)qN+gC39LA$;62~m(Eo!KknzDI3Uz4^So*wx*#i*|JHaxC+RZA1+_?u<tkc2o
zdr)&S!1B`Cjb2a}Jq%dMsdI{5f&Z!sR!V5ktH+_K16EAbLC2NQ^aU%$DWv=K&glQa
zGG%;xUK%~#D6j(a7F4;0TD>1E>#$mt1EI+TE7|GLhPud$e6S+5wb;+~(Zhg6ndffl
zgj!?|79ZAYyc0AIU?oKzx?K{vGXyN&IrPQLuIT^4GG`PlU52_e1+0X;g@?N!9!`O!
z4Xd3%A$MMbML8$5+kl$$4Xi|M?d_A17mdMkFwb*6UJBk8EKOMNlKY@(4^}}`!j#3(
zi~~#K92PhlbJhv4tTR8=qtN38fR&ZENPiNw`Wje%VReL=(0m8W!TIn*8{|cap@>^;
zolZH8(Zhh{Wu9+#L@jay%O|YQ?gr3w2g^SCaJg^L_=Dx+9Nxg(6a7C}+RV>q{W0_~
zuxwv0{^*Q&cnDT(Slth=p(%-Z)64nDtZ>wvN?_T!*3B4>yyy&8q&Z;zUihy*V8w)a
zc8i5Z081ZzM41N760l;NwT@>g<U3e?nO}y##%H@0ERUB<qD+XHEU=Qp?iRZXO=Ykm
zosU-LkQa5q@^r1Y@Db@@z)CbPsL=qmNCYb>tZ$YrG$X(Yj6Ul55}FlYB{{p^Zil>h
z23Bn5S7t2cshwc?zg+6r5b^LHEGqoomfq0R2P@I}Si*eNoEBgOxYn;V5P2~atbFsr
z<C`kM+k#aP)^FKzXeNP`5Pj^`C}=i;Rp9J4<$xvDvcO8t{I)s031Sbd#O!6aHzQ^k
z7re*d6xR#Tv;r&N`M6Mkn$rO+lWT*-tB&Ylz_LqQB--Nqbg=Bh``0gl+?fYfR`l`k
zRA^+d>^1J?njtT!!5IJ3?^*-qsZ(I(X9rHHf_NwcmVfYlb4O^jVA*L-eDFif=>eA6
z)qZFf<i$*|Jku6`iG=@J1eQL0z`2Fcgn(rmbE2spn!{k}HJy|lC9swSmPgu;)LW=a
z*TAyN2`aY|@lXw{xZnr#6QJn@mZ#?A>KCXvda$hB8h$^~20aW|0clIt7RUKWumZye
zj`)J(7_dBIPF_id<~&$|n$Dgzkr!3K@=v4Y_ryH)5G>D}wste{b{m0}68x}63up#|
z6`)CM?Twl<5-hD-qYci;i<MxR(w4d`tOaiiRzkS0*i>k?gXJHSIIbHsx4=r!ba@qx
zIjaF!acOjobEr#MU<Kr~JNb%1>Vn0GJUYG$n$cjHG^cJoK+Ty9mY-YWcBYQ#VZbt{
zElc@~+}sUTR=BVw7aA3;xR_HVPD5h`i*oE5ZjHQX4OU7TbG$v~srO)+a@r5^K|FK<
zOB3=~tPRanu*{m%llvnt=7SaM*5si*^1=v~b$Z~8DX3S6z_N|dyN-nBBv>gir+2l1
zCJ!tJM~|jKn6q5L;?s+X2T_-pD!6A(hi_&MH4`kKkSAZ(LbCuYYnL-+ub}3Hf|cyn
zbk^oh=wZOprUzBbwTHI_%Ohft`7tzCz~a}O@rZ}!Ggw}Z-LBBci{4;q(k;F;!#q_6
zEbH8k>jxnohJY0llDf7$G~r-rUCx?1AurZ|MY%VtTm^Y?0W81t-~%I2i*A7BA2E2o
z4x0O5Y1W*5-WVFr9(BO6`?y({vkYMQq+71tsYUF8rOnm0y=jG-306{w*(DU3bzu3q
zoa=KOHD?D{4(`npR&+xT16FK$$biR9;cdZ+i!ju<4$U*LeAb*>x(%8NU?n>CDDf6~
z=L1$uI_qNRjx|ZJ{Bm6%bVoe+gH;fcmSTb2i32Ov<@`%W<V8GKUhWP;N#sQuSjp+3
zZwI0l<$#qRA|CGw&0DZctIyY~4oxkv@*R8b8izS+8dyo`TuMx5tVx0uo9j073~Kcf
zu<S$AXUv7>09eT`7sB?U<|Kj@>E7bQqTc9Xz@jq3R$PPsDgeta6#p|8XsGsh?yLP<
zp$(y$gJtK`%Olec{XbX*>HLhcn5W#qO3rn+ZHIVR0hT^Aqv8i>PJ>0YNovvnd2to2
zME8thKFEu&U^!%jx9EymWNCxk3?1@C15GKgtfOY$oC!@kusof5KNpb~5n$P86t5VJ
zy5tEKmDlOee$?tsU<HO|9+&{lb+8=TT)eyqHRl0X`R<up%ogZjdf@ZSh)6!^3~vio
zK<Lo53N%%~(nigi^ah%4U<Ek!S$YSzUJsT&qr?HG7uF=fa>(nfYl?V~!Ab~yI^a1p
zkHGS3b7`zC^5QvIcAcKO4nkg31S>Kla-s&cs3usZP%oE8&@=$cFKV{+IB0ZWnVdZ9
z^+sOo0xK}%*y(5ZY{!A+mDlC{cErqKu(Cp*y&Ve83$WsxE*D>pn)41UAMG>qjpFEG
zz)DG7URr_wY66xybXdw3Xj+368#QNN1~kLLGCTD>d%h9+f3OlVjt?7J46z4RWM0=*
zH4qQy!Lki|z9I#h4`8J@U5P8MMIY#exYa(N-yM1302XgvVQh$6<N}s;nD>mT(71w?
z95uJM2#p_D*3SKgcSK&C0V^xx1eIcm*aIsu&!bf|;^7up9${H6`a)9-^Cs_nwd+FE
zoYG(=X|rmaDUEOgOJiQya|`@e7qGNp!z-?WrZ-qr^t`;=(98u(>)fA7z?_u~mTl(A
zO`ber53KyWZa2#y9?W3*hrLL?2#qyZ8s}@zKBMMT1*<^&;&@x+MR%}#%&T&$;Cz3u
z{K7^Yu<DHZ0G31a{Lo&|1cT-0JYcf}^5QXA9+`<9E}|}N0?Y1Y_ep^mRvuV!Vc8Si
zps5a)kMs2<Q&4m4!LoPF7WbD&4+B<=d3Es0F7UQs#fFU>uml<(u)LxJ8lQ(I8mw67
zf!bY|v$DbR&ph?mwG?6xEYFuctZBr<XRuPja!MbCrV&^%&dGIiQF9!@(!1t-X@a~M
z0alVZs-YFm`+}7mHtOv+98Uu)GJ3(K4$y1^E7@6>S_^sc5v;h(({qlXE*%Cd;APK!
zGZ7D58$6@%Tw^O}oWV+RzHw#*YK|LNfv&l0x7(tJ0jt0qowT?Iye(K%_~;eWp;-u4
zV)Vi>hoDiwqBO$1wJzxY4H$psnX0WYPn`$L^s-m)H%(A8!P121^^~FM1Xh9bq8lE_
z3s0~TT=QINATQ>DWuF!^`Wtd{8CbUApYHfU<5wKdJ$e!E0L=-o95njs<&hU<!Q!8u
zJ+cRN=@wY#m%a1GA|7gi6%hO~r!F*o!LrvZ_B(}|BY~CW`Z6V|I(ite^l9CvrJ?Yx
z2FoM-^VAp6%mvFXX7SEAXfA{0r5SWA1anpmuryE44XRrSu?LoYPM^>W)aqtnnSx&h
zZ-8b9SbEKpGHopJ9RygmZm(vPL0-gw6`0ne!eG>*EnxYFe<|+`O)yxVF-yACf+hv5
zNX=k_1@fXXSOID03)Z79<$<No@oX$29@>Is4$f~_5t?ye1!|V=RZ(+hfaT$qUoo&Y
zdKj=0(t5_<WU-b7R$Ta3^#L@|U<Jf1eYyskr(h*&44>z?<FSEdO1ltkgL&#RSb;fx
zFWq*+x)4~_A+M9dq45VRL9?t^W8}qRu>9R#AE5BaRj{(sdiCppT67Gol<;qT`$Mx0
zEK|&~#aP;=-hh>_5!bx$jQ$@ib6S#9IO-Bt75B{PH>MZjVIWxAkT;__<jxYXvNVCY
zTTyeCgB9oYX28q_=wZOJP4E5sOi6fKu=t4Y`In(lz%s`K)(L^;2UvD4l4b$|s~=d_
z=@;*?wGew?W##ne&!bj*gXI_UmJNVr1z5H&K_S(U7aPG!aeMprbwl(pV0onXiEM{j
zbQdg5#E*zh(3}9vdQH&jV#pn<hNuIML+(B6j{YAkZTh9D^HG;-fn}RJU}p!!!$h!R
zL*8vKfMyd|9xlO+R-@+Z0gHEkw_+^v;yzgZ>7LCGSYvty%O`?rejJ+1U}@I`51Ru`
zWw2r#hfW`gyciFbU;5?p1^8^6f#s1qu*)GW)`h@I4td{c5;QVc{w^UGOCc{3z|y$C
zZ;{aqJq%cJ>3y#?MJ>t)D<*=z<_Jv+Sbl3lsQ1v+2P?_3>|%H1#VoL5)32z0s7r0Z
z^3T;hZH{;d0*eaW*>4~;N5G163H4cunsW{;ANLQ*2IR$Cuu{_d`R=fRw*@OHg7Mu8
z%~P;q*Mx5Jg{Bo)1&-zF94m%=2P--KYTp<5Y(2n=%M}*KA!ee$atMukeHEGuV5PW(
zaX(OVZh{r#{&C`M(!=z`=UHUGZb3vbs|LuOH(+J04(sp?8ZB5w_v=y_*YbxoMeKo<
zlB=&%3-PcOtfKpM`8MG@W}xPzf|cZ6P`VfLq7+y~_v_k6TI|^b%@43_qrx8#fu<K&
zMfdAEWL%GUfV$)j7N0lh^cuv239O>~bv11wI_IP2<bqY;{>gZb^e|u*-LLBxYT4Zu
zxnorV&pj$)&OK-bgH?3Du2)8Kv;LT;CW58O8$7HW;^8=0MfdCaw2930LCyIDmVKws
zJv$&T8h}-Fziw<O`yvvW%3%3NMOO8MW;9qu_v=Px+_-ufb!iS*K6zFXKj2YZ2CL|P
z-IzAZ7p0-*u*2|~bo!EWkn}KM72U6!9Lfb2NAA=ID=upJktArQf>m_CZeqsG@trVF
z1%VZlSE}?x#6t>LMfd9_wOLVnFha9DSh3o#!OgYkVZbW7UzZBw8!UvT6<8@zD+akj
zvjD83`*rg(Zk0Tax)cRgQeJ7}b4$c6Sijq^>%8*RP1GDau#&ak8pgSyhXJeTeqD#K
z;^#j@qXmnPURiJ$nsBg+?$@=;yuI5I^VC+b3i8VI^x_e>VEt~tuJbCd9>|NPU{S8$
zlk7U9hXD)!9s$|vz8*jMmkY9g_n+0eB-KT>QR~Sj_x-fxyQ9ryvN``d+3H@AF1pj#
zE3CxGDY$ztj8YT5D!L`^z7C@-y3^M)^G>(DsAwjP(*C7&PD%+xJVyDuoxaYif1E+J
zYJ*Waxc(TurYpK1jI!uX-^j3%7P-(22FoXUwbLGG;=n4p(>EaVZdQHFaL2*YzbqTv
zw-h42EvkIcoxaXd8`~kHx`O59N^#|oQPaRGy3;o?%xdchXhwq-6CHJ@4m1bAD!S9x
zlzDI2CU>l6f))6(T*FJK@|D5*-A-TU==KLttp<P<=}PYi?uqUPtfD)8^TSHHJ%(l~
zSV_^*Q`bXt8myu_ea)FE^=&c3-2*G(W%(pGbV&8V`rS@nP0XXF$f#jpCA!APPrxIL
z1gq#yU%T+qX#=5I09HYCw}k=FB!g9Sr?2(X`{yE2(K5lxdRbxgQB?U>VEt~VuV&5c
z9jI0lz{+>s-|snt)ex|X?)3ExFB5PLnsBh}W4hP+1kGcxithB)K7BBvIJ(2vVA;O1
zVOyg^(t=fVr>~c0ZIznHsM%oIxgB^t)Do*9U=`iz`~Q*l-a%2bZ{IhFnk?Op$!^46
zDR!EQii*1y?5LZF9qT3p6*~$jmrAi?C3ftPpkl|8Y}SgMVnLeN5zLDHIk?|>?};<d
zf6vV2jMx0W7oYv@9><aMJo6zNo#@(Z5M*`%D>TKB%|PZIu+*k6U0Ptb8YbEwU<H3}
zeCPyB`Jup4o4(0DyUuTiX%!EwkbY;Aa}hWV0hZeIO;4<M<tk*-fEAxIWYRmxd;^x+
z^yNwu>dk;FoTC<A=ldo>tzm^U0a$9&H``}-s4MivR$xW<JLfe5`r;<A)TVD?VtvvF
zGG~C5l`{0t6v))>0H2-8^eum#nEnzbT2o+UzjrI%15<t`u+*k+h0mUE@qw^{1Xg;#
zta}zmI1K@o+VpizYVa!^GS`7so-%At0c7d{OKtkP6(tQH2dl$2z^ZuPbamtAP`AKR
zo4)S8d$-v_Uz`S3VZZb9`axg31eV(L4NPjdt371y1Ium4@PN^fA%LYeeS?dVKi!9k
z))iRpAF$?0Eun6Kr8a#-d{etGfthm&Sg!pqI9{v=ry;;no4#~XBftHSc@C`L9U%pG
zAkz_8YSTBWXk$V!tPTT#74iYkM%qB#0!wZBM*DJ)D`DnXfEC#P;_<G~7uCR0o4#C9
z<2(#{=Pj_Jc8pju05U$nQk%YMMVmTZgSj*mSkWJ}lcL)}-2zK(`lkEtW9Pukc?vAL
z|0Vqin8$U7!uzQ*eRGnUL~VwQ6<BFIMmD_ynE}93o4&b4n{WHV>Tm+E(m&|_RCj>7
z1(w?ME%e=AzXWDZF|fG)m*2L6zNil@wdre<?AF8)ddJQI-jf}pvU)&fB(T(`FIv21
zHV<=YCa?-W5PKL8s9RvEO<z~P(e5(LoG-x2>3=1D4-Tgxz*3vO-pRLGRY0a5u+W{O
z$DM}EBw(pcU!UTwnvSqKj0BeJ$7TWVU{+rTmfG|U^c!<E5c=W|uxx^^wrvc3(Fs^;
z(>E;n_Kli{DG|U5**3=77BX{zr8a%Ti?^j6fVs2^Sb-ni3#Pyd={~U3rZ4R`mO5Vt
z?p{oR_arD=N_2<Q5MZfI-?R;Pro4tsM_@&78@rW*%wk}vP2ZH_?Sr&%I-dwE`eXAY
z1u(0h150iCa(?4}cZR-j16Ekjwdn}-#Xw-GP2b!NcWoy?#s^sG+s1Wk3Yj&)Qk%YV
zaZ1@1m`l5W#eHnibTq7x-U3T)`sVnB?mlJ@cSC`d9CW>QG`y9gfTcEl(S&>4L&yvO
zR^hhLM;qaP-V7|Y>04dABhCp<<kNta^RZ>t9hlWtV5v=C8~^eCEuk;E04qD_#)0a#
za2f(Ewdw1VaKDcMG9!WIx_x|vJ@gI-EVb#|u4HGcH87XX0L$i6t8oKhg=7~1vsz{P
zdizhv-vu+L53ta_H~TTr7xRFnHhsera$eqm%p_n1Zl6#u1~Mmsr8a#-OLpD(1Mls1
zV0nLPZM_V$+7(z?YSXvp#KjGvFNOfir?2?>Z70|r0ZVQArX)OA9srrSz@oQLO#ca)
z3&2vFzVRiyr!0cGbRSq@pW1Bg(HBmXfc1~*+tcWl05fMiu)_OF(Nmx=)&fgy`pO9p
zwPzr+7+BnP<M2qxn1Q7>eX~mT*jB)M`y5!wpW1di-5*Yrfc1~*+mk$32l`?Luu}S(
zn-#zsW(%;?rf+q^ql+GpSp%$`?c}Eqka-L&wdq@4vX=|*2(<^S>`(0;wH*j2O2GQZ
z^!1^}u7H`t0!!{IUl|2`u@6{k)3;sX;|Yf$vl&=6DOAFA$h-!Y+VpiRP3`j<=8_dy
z6`$HiaKSJdfc1~*>oZBQ!pw;VR&`$sc?Z@or+}q4eM1wUd?%oHIAD3FOzQL!GM|8@
zHhqIjxtHU;q4spp3!gjGYYOX}5y1M#^z{kbVuG2I0IYWXZv6^^zPJc1wdot5_%vlZ
zWKIGrEG6vrSjhYYmfG}<D&4pIA<QLLV1<0{n7(l+oG1b7AJf-oa@TU0IXi(B+VA$R
z%dm!#fu%Nmvl5^6bcNoz0IcMc$+Pc62JHx+oyzo0E8VX(!0EgWSka$fTwY)cbqg%D
z=_~t8c^C$LaRgZL{qFenfWCMFEVb!do|yYI9x`TNWv5Kh1Vg3~u+*k+Zs~!GH()Mx
z0G9mzrSX#;)Ge^orf;>+R3;DpvrJ%R^}CyQDiEX_V5v=Cx1_uUw$M9|fmM+*H7y%5
zEr6vqebKUm69V9Lz9+D%-@p1fqb}4fu+*k+JKt%pBcU&@0js><y{NX(7oUNpHhqJW
z@*6IJ%xhq|@0d2o7cySJQk%XZMTfp;!dwahR=W>{J71wtx4=@HzM;O;Pv3%>a}QW<
z{qHy72Eg4BV5v>tsH6g+5;C8F6|!S`Sq5YRfTcElql*rwc);oW2w;VNc<mbs>l_YP
zYSTB~cgC>(&=&>33htkC)(!eXGYsBOmFb(7^nBzT$ovFW^o|*E9UwCpSZdQZz352K
z!!Va9V8wqZ%FS_tx&@Zn^v&{}`SBvmoN{1A^?xvCqaN;#084H9<|e)PQVbc?5#Ey>
zGg}>i%vfNlP2a+zw5J4|&d&i>)`#LngJGR>0a$9&x7;^;LpSIP1z2hQAAZ$9U$g|4
z+Vn+}UnUzN(+F6FJHl_^keLcBwdw0xe00Hfm`hQ>D*sT@_&UsLGqBXAubbbjjwfN}
z)S3!?*FUz_CmXoa4=lCm>y!M->j`990LyhJ_TVpM7+|SQ-@xKy4P6JrX(q7TK9-*G
zgLTehV5v>tV87Y7T0>vB01FL@+cyLHqBpSArf+z1;k}`d@d8%hPMnT{%nD$sO<%hB
zxDXF>X%n!5KbDOaU{=2dmfG}<@|!bjFU%Y*uzZ5n1iq>Rclv>)Hhoi)U(b^u696oF
zr`F{QWYz;qZTfP>Cq~*1gVRi4MSXlz)e+V?pMa${ebfBrqD`PL+5;;*Xzi<U&=;Y=
zQk%YVa*<;n$P5Nn);8U#S&&HqmfG~qDW<|oVJ@8nR%py0D%BbGo;dWo%Jj|kn|CA$
zW=;=a1?OA(=eL0MzX!aZD%00Bq4@YY$czP6h{r6iy0CXR)*E_OW%}BbO!`a0>HGy?
z#mD^p-2^i4fTcElQUCb^QRs{Qz>3PhRWuU*Zx02Q+Vl-fDA9XEW-73vJ!aoq0Vl2J
zfTcEly-ULOK83kt23A&#&7Ne)d;^x+^!4#Kl}5wN2?17G{_Rz_VI4IASZdRkPAGkw
z1{oSy=^k_DeS;di2`shg8&)#8*KiHg9<a(|Yy&i~zo@N+epi{k;r?{&_W^Ks1X#KG
zcie+u7R>~f+VtfT%Ho?t?<@yap~qZD8ctvz084H9CYMZkZicz^8dz?zwF*{2rXH}=
zrf-Tr(=q~P&TL?z1$VDrf;Cbku+*k+PQshEJ0Y_USgxM)j+aA?y#$up^vy1rx-<~#
z;S;ceW9^pwfD8ewe@x$=5!c_r%vl7ikZ1R(?l6m10ZVQA+9Z}s^`UpR11r#TzJ4;C
zz`h5T+Vrg`nTDN*x%3lQQL(j~vXJQrtba^jA9nIom^rb)ihg$g_eoeIB?3!r`g$k6
zoxUD22Z2R<n%?HYTDlroYSY)fbo%+uP!A{ppL=YbtoM-d0oFgJuTN{!An1!tz)F9X
zv%3w<qFumJo4#R*yI2!scwlj!bo@x@i#mPa^{Y(ZkkT2U$Dn2!0V_AQ?zm}?833$*
zOkbZi&e<??Qh`<Y?16tOtdY`yr8a$&6L;4yhs;%A<#;k}Z?}TIC$Q9}Z*=L*Z!Mr6
zS^x`;L#)psGZI+;n7%%3Px?S#oB)<<?!){hFpJIrOKtjQC+^7zgUlUZ*?2`r{h=?K
z0ZVQArk93q+XeOD1uUO9bn6(%Oaj(Frms)CAsH}p&I2nj_tD~HSR-8rmfG~KNZcEe
z2bnxzd3&+bFSUogC$Q9}Z(-@I?hT<H0)Q1Br|EVNGIN3TkLl~%{(XDs3kg_s?qfF%
z%%c0iQk%Z+NvWSlK&A{>VP287yF*|20!wZBx|Yp;yaDQAFtAeM>>mw;%wk}vO<!-{
z4(s+i!QD_`ak)><t%5bub6}}W-;g9O@fKvNfR*gE;J`^w*n0v?ZTbe5&0*_6J&XmG
z9On>m6*6mpr8a%Td^@(sp)U%7m6Q8)?2mr1{s)%Y^o>s1=g}WBHcl{eycYIr1AQ?H
zSZdQZzG!a!6;KaTf#tf|v7QfPHUmp-`X>8!k~hQk`XjJx@}4OXFt4n@Qk%Z%N&D|y
zgp3ogD!dlG-V1kq!hoeVeY1+@Wmwz5%mh~8YNzyc$Z)_?o4(n;9y1-GFMa^aJ1=+3
z`$4e&_k^FRGJOk^4$SEW84OtN-civ_pfBbDOKtj=7tIebK|L%77QNbeczej41eV(L
zt?>1%8wb~GWEy-%d3jx@!Mt(>mfG}nO+ILU5;ARp72>_PSu))9SpqDz>FZW(`uwIN
z%uHZ$t6e_rgUkhBsZC#ZKd(c-o5I~tU?u0}KYTs}*8jj#o4$d`hmN*}-suLcXzwLg
zQ0R-bz*3vO!Nqi97}UddVCAfKO~4>y2A10N4e|31S_pfq=D^C%+fbMdCD0LAYSWia
zJ{-IkGJSz1cUnrWg1bIjfTcElqly`iJa3qpz_N*{*J(3k9s^5l`bPV8F8)No-B4gv
z<Rz^1g<0eSEVb#&B_Day7<y+Iu&O&P`}MsK>^*^{Hht5IBkqiVddL8lcTD};4$wQV
zfu%Nm)BU=vp4lAkh62kyKat?;z{~`e+VssyPFs@%nTfz^=dpZO1oXuzV5v>t++udl
zt?qC~8CYR44Q9te<`b~grf;EN*XD&S;ch6fLh_R?cYs+m5?E@}*Jk6<)+qE&IIu!J
zR`|USg1slO)TS?55^3Kb>ftu9l4BZbenaLbu+*lntADrbQ0R*>z>3aKHXg8tnF%bl
z>Fd4m*v)9jEC5!#$I85^&=)eW)TXab$%3O7{ou|Vu(D$sr7eIA3fE1w=^N<ZebR$=
za5ofK>G>Of;xLQm0!wZBMkO4d`W-T>ftBSE9rb)L>^*^{HhsfO76x~NdME`}MNH#C
zpCHo+SZdRk_V4j$F!Y59ScUnUc5ZQjnF%bl>6?~tqE-ZCl7Ln2v8u@!=!+s?sZHOM
zl0|P$K+RMF%RRP9*$l|E0G8VH<@|f@x$XgXLxJU5u-VrMX3-j8sZHP9gp>Q;L1s6w
z+&sIshD#hgT>)Od%Jh{>qSmyAdiV>hkXTC<j078RV5v>t9Dknxf9Q+<fE8G<B{!}i
z%uHaZO<y$eRNz#|90gXeXSbUfwct)Xu+*k+b;;t^d!Zg2o5ANEdu!7T$n*l%Kc=sb
zZ-LMS?uG&@{@K<=e_##C0ZVQA`XruyRREc@z>4zhKD7h%g?%8rewFFlu5`)G#!wGU
zft4P6yK?|!^uYSZ^!4#u(h2(F0I;&2ZEL&;=G94HsZHPT#Pk)TA#(#*X`Vf59YDY`
z2bS9O4J}<dH3{mW4X_Gh@8o1cW*o5oF@1gfn;wF_RXVWBpKU+$8Hbq(EVb#Il9-{p
z3z-~X<$CtqheKbq0+!nJjW1nR3x#^<3M|*SyCx6FOas<Grms&xmJa&jGO*loQ$~lw
zyfOnzZTiZI{G|bqc>yfc%O`M)BkVnar8a%DN|*189tL|HU<JnAb3P0i23Y@?zCOLi
zZG*klEno%b?x-qshnWd1wdq@(C``NpnRmeQ@$!A;2z}8TSZdR^ywvOTG^mGRz@p>s
zpCll&0$Bf;zP`P!&d?XnfEAUybJKX3SFeGkHhtSAW&Y>^nQy=f_wrj22YV|6u+*ln
zTbcK;7qww#0*i~w8L}NR>w)!;>Fe8P>)O6>Yz?fm++Cd?Lb-nemfG|UO**sVG-PT!
z!_4vW*ZqODbSSXYrf+ar=Z|Bc9>Rf@6Zhb~3-nG3u>LW9eFMAMKwo?TR&MU@96ijU
zpTJU^zVS(CecC~$9<XGufJ+OZFQx-aZTd!)b=h!F12Yp?HftWPi-*i1V5v>t6yLs&
zq6We}4q%~qdrUVvz|8c5pQ$o^vy#p|;~+x-tJ<sA#LuvnjsTY0^i3=4+Hnxn!)jo8
zuX)to7J7#VmfG}{efvdx35I(d)8RA9+v^+vv#1fU)TVEFQr5zz&^sN0)y}*3kD1UH
zD}kjpeJhH(-MZ=wGZR=*s~^jYA#)X2YSXvcw|~9a2Drxotnj?llbNvpY5^>@>Fbt!
zzR@Pg_y8-^yU&h7SW9mJmfG}nFYZ3e2kK!ru+mmPnOO;$JHS$#zU};i(u;<|Jq}={
z<Z(khU>11+OKtiFCtt|4hu#?gta$H0pYhNaJAkD&eM5?SpczmzM}d{Q`f1%ckjVp9
zy2|to^|M}<Y@y$QCFkvXe+c$p0l-q5zER0@uYZEfC}3rI_kH%D1z6_5Qk%Zf#XXO-
zhk7^*EHviXp<>9C0ZVQA#`}FU_JO_#1Xgw4{&fV*qQSsYo4#qu^Cr)POc=1rz56ZH
z!$&0mOKtk57yAs{4>fZGSUxeiK}N__0ZVQAX8BeBJXaUaG=SAE|3LfgaB&(7EVb#I
zn>^p95Hj<C<?YeG(T%oXnFC90`W6=ZmSRv3Ilu~!$t!*g85_8ksZHN<zwbM}p)V!?
zD>VP0>;kiBDzMb1FS^l`8VZ>uzzXvS%Jhf6xC<<`>FZkJ7rPni;RUc#V)9oHg^Uxh
z)TXbS{}11z4sfOctoZyxGvi_ZMFUH1`uc37dq058T3{u63>cN!2`qD9sZHO&691Nt
zP!I2bCC3yrmmq@yOKtiF`~S>s4t)^`tgQUQb!}l5EeDp`^bOz0yci6bEx^k57+Bc}
z`r-|+)TS?85^y~h>fsx(s$-sK_l8VcV5v>tDF0uJcEa9j6|l<lj~rUuAJ+fCQk%Xh
z8zYuohs-`;Rd@{AcnJ1ZUxB4IeYuielYh5@nduInd+duz=OEJ!SZdQZ&Hs1f2GAFY
zz;Y`{3#uFd>wjRWP2a)<7WaqDDPXyK2743G7q)%j^{Y(ZoRZ!)3!xtB0V_22<sUD|
z^aYmM^v(7Eb7np4t#$z`xZr5<Y?xOEfu%NmT@xcO2#~o5tPsz^_qX)`%N$s0)7Pf7
zPwJ=kP<p_MkA1Z#4Kl-kr8a%hfWM<_Ltmr;E2`kw>S8^t|AD18eFGC0jPC>)8CcPt
z`uQ%<7dWugrmuHtVDFhw4;_J(6<Zk495NGur8a%x$$MXy!QSc&u+j>SH#fq(x&kb<
z=}RXrtUd^tC%{VgG&sfkgJljZwdosH+V{mPPnem&Dvy0#umdvT!1~AZ^|9Toz~1UQ
zuqvLBUV$)+?f^?|`f`bjw(FpGihxz<Ipl;5^hI}IsZHPH(tgWAp&kYR%Pp>GNqxvH
z0M<XIuTQNm^P9n$2C&?7se4(l|H=cF+VssyjOwusGM|Cv>NV7`I1nszV5v>t?9%@D
zgRZd00akEaantpXSq-dzOkW?n2W8FSOaoXUxs&E~hFMewEVb!tleGAWGxW|cU<G;&
zd-tUu{I(&m)TVDmY0!niP!E%U6%|*KWe2^J1gw8dU*FpFB<PE`z>3Zdb3BH^%mkL&
z^z}|!61f&K8W)&3Uc>*J4Sit*mfG}nFB>rax)1DefRz?kI&K+cb_44l)7Q6-OKuxD
z(*Ra_?&RYwU>4bU!_QQizF|pA8`wbaGzJ#u71FMF5d5|wu+*k+NZG(@Kd6Vrz{-s)
zvwnrlQDFUJ`uf&A6#{)>Hv>MS+$s89PB1fpr8a$&la}#OkZB359Ip}PiO?4dfu%Nm
zqss<u7ka_623Y8tH(TdH<}9%OF@1fJp_YztrU5M1ys2*+z%0Umr8a%Dla`P80vT^$
z*?5nf@puSq^?{`}ebdW=dvt<&*bFS6HRavPAaes)YSTBz7yZx|`a%b+z`SYk8({y{
z7FcT2w<2jp#VpA50+zRT?Z_;6n>PYWZTc3L4SsSEYK8+=_?n&lZb9Y&u+*lnjh|-y
z1=w4402ZA$y=@(sMcsg<HhtZbS0)ufMh~no?>Y@SLtpF#mfG}neWQ=mK|P!VR{H8)
zulqsfC9u?{ueYClhpx~UJ%PpL&5)MESx{eKsZHOI<Y>={kQoQ8WbeBC(K=w6150iC
zh87zdY=e5Z0Ib5*yQ42c<~^{~rf-;^gXK8vt%87+lQ(m^71of$fTcElqmx(NeFT|l
zz{>VUMznyw$O4wy^o=hb!aGAfn1SUQv!_`%$W#MMZTcqrIfl1_z8C>4oBZ(FCYV<f
zfu%Nm)00=v9Re8!SQXxA#ZG&$%z>pgeY1*(j#vxz@EBNuF?+9^fJ_~@ZmLb+Y(FPt
z5A3ZdV0q`yI`9VeU*W(~o4$p~F%BYRRshSrvnHtl^u<G9xvEUx^5VbL9H@sPV9_zD
zWNXOO2bS9Ot?+X`+z9$&4zR-VXZH((d9?soYSY(sW9+eBkXa9`kk0m=8(?qs3Rr<E
z)7P!U=1*}gn3=%hVz^&>Akz$3YG2pg-(^4|?5(1Jm7G84b#6<TnZQ!}x`7+x2A_pY
z3b0Z<9PZYJzW4wvTIK5om)PzxLOuKfR!+>mU5z2r30P`hH^kqyq%QPD9I&$U=SGi!
zd6fh#wXaKWTvP4|nM1&mJsjsQhrQKzU~wv6H>#voz!L;!W^?$AV)y$cLB<zYYF{_n
zzh2DBAh^c?tcv`3&2F`WnF%blugh&*yY>iVcwkj~I5}8hEnT-CyndChn^t01Fcj*c
zF|fR259A@xI|G5G_I1<!>$j*L2=_RE<z6uVN<WxIM}ejGb#pexw{eHwxeBayp3cYS
zLtiui7Cch!e|_EDlG;loN0^zw3X44$6%CnDz*76Vh5ik$(SzY02e3j4Oyormn3=#*
z`?_w4|A{G(xdW_FPnW@OaInmQ<*o8{(b76i`#?Q-11mZ9P?PVF2?Lhe*L4kO81~ix
z_c(wRT|ocp3bW`2u++Y8aN@dY^`Lk1fEDlQT0RN-!UI@pU)QI!Zq_;2fAs=ZcI@G^
z3}og3OYQ3h1~mFRc^KT|09JYdv+D%xzjA=3_I0BY*W0awOc}7UJnOB^b%!k_u++Y8
zcquZ@8|pz1tcutpW8Oh#39#Uy{e^ao%Iz9uD5-B<0vfezR2vS?Y%4y8ekpczfPr4~
z$mGJEe%{C7#r@BthzF#-SPDf`^C;Y?paa+4VYt*hGCAu@ss0zdxc_++@vhz{5+GCa
zDBSd&z8kwhrsk2!xj~0)>O<4kJc>A~@~!kT0E!QOe*Bk`<1z3i);u!Vb(jDBmXP_M
zM-dM=y6kHQIBWt|TF}ACJK#0cJTkfbOWE5<7?d@SB1Q)sPF)AhS@X!@z+D0Jcf;Rn
z9xd!v`6m7&Z0c$rA?!2YNN+o6s+vay)4O^(HG;p_JQ6s#vb^mK7)&*f{srYf+P=}c
zT8%EO-Tl!~{WSjQIu)TXWo_c^!SK(j1JB=gkN<yq;J*6X0{$3m{=a{&KtV14<6-;1
z|D5u$ZT0_uSHSvO@bXUCH@Z-((G%fWT3z+a+g>La)_*p9&DR8k!k;bhr2*DYw$83J
z{L3j@O~pv5<{J*`bwB^S_Hf70Awh#Zd-QdUX~>3*T;JXGx%+)>%IIlB>-MxXKpHn1
zvHAAUiks15(3X-Z4Qh7|?u@iP6KZ$Osq)UM!2_O7qc;aIVI$y2{qM{EzkX-H|NL+V
zcu_U)YUX#_Mpxj^&)e6(BI@r9$Y1t#e9bRZ+Q>D(O7WrQyAB@L{1Qbl{K>Ypt?m->
zQnyC#9PvUICwoUcN3<DcBa7e}k}(3oGDMf7*od3IOleri&7Y<t*zj7fb^poTA_{f!
za@UAgy0u?!85bf%hQ+u5(PhZSNc+(@|C)|tqi+5&9m7W6{Oz<s?io?6TQB#BDAKKy
zyE`sMnrGZGMj`GQw~dRCW=bv7DQv80YdVRI5p7H-s{JC$bcwQWM5!)8_K7G#T4mff
zE=5{q+%qmgT4daH$-u^owN2?*sAy+8jg71IR<}_Oh$z=3%l;8>bV*+x8do4~G9DP0
zBds%XjLYneB4QG-iDF$7k4+HkIBk*pM7-B+mU~CM(`}M_Ij%z5XFN7WBkeLC8CN20
z6?@ZJj1o1bGZ-nNrp)So5ubG1<h~Ifbz9}Yh!04ojAzCeq+`ZY<7%Ws#uJzG*ksYs
zl!b+f4yJS1q*`BeJLI5<3SElaKjO1)`<Hy<8pJar&lrbzWaJuS?Wc(@ri<8A(b;qX
zn<6?n?Un~cROxof10yPRJLLh6{~?_-o*Uy4?~DTDTEt7KZ@Plb5bK#PW79=f)1_*C
zgjKg!9vq?Q_Q=5zUy*JZuZ-)Ft{E?l>yR!PFI=u+v&4p`Y%E-CV7iLUto2>DPaYCc
zt>a`v#5Y~)mm*^#(j()wF#+kGQE1#?KUZvGx`E9R8=J0Uv&BYE2j$@rKXnJ>VG%!c
z`{kjI8xh}(5@RyrlTmC;LV7BgNy6rfO-&*;PjoZgtR5NhM|W5r5%F7hNDhhkg#={0
zF>XfuGs=vc5WkF47YoLST9b^?B5pEcCOcc*QF%0Lqf3)VMf}wr`SQ-V4e6cn*0>ev
zl~HcoVjn3sGu_5mkucrDB1E0j33(iAr#mi>Wozk<$zvRMAbm4F7*mkIjQ7UvNFSwz
z=^nOFY;L-XEfC#Jcd93_b#<rY@oXL4Nja3QjRa+UHts_DXM8g5MEYfXba{X+7F(He
zuqd&m={~l|PE%91EUHVFC$fm{^p{HGUSwd#7vmmeKt_dexBW7)t?3cARBU5<h%FIY
zJ7vm~SO=XTQ>?v?mr2Kc$l#2xMh*$is4}J^gOm=Yr`QUyz3B<ITx@50Ts?(#)}56n
zvrf7*av1A~49WOrJb)N7tj7I_K0|TI!&Zr%Ou1OJ*wOS1TWMEMcV3>xy6UpzsjQ3c
z+?OB5!^p6V@5V#O(2Q#1LHk(I%T$2Hh@Pf=Y_;g&bV;7cHqc#^XR!5k7v$-VN0AX3
zzl>=}NXAd&5oEa1#q<(eBX%~uz~V%2)AQ=tY-8OOc^2D9cUcZ+8zQ4K{u+-XqcZ*&
zk0B#7e!INJ{u8^I3bA;xtLYWC)~>1Unmm_v(`CzZ*e1HGm9>ngkg>e2@gy>aw=tfu
z-yrrh6=Ung9;PB}o!H&!rfgzy-3@s@i|MY*^Bgmf@qBG#Iugp;8BZhQ6hBiLmMHp~
zO0fjd$5c`s!4f)2W>}q0lxbFr7<t4fAQSnz@RtdE9hbM*MlrxtjwOr!rZ-rUU2~m9
zj%3|+vdps0bmmHX<5`5_HO4at$)m<h`z>N0(|c^Q*xU3D+a&gKx+5=QTk3Ah3)vRB
zTk-<O^T=f0(U^sV@eamw$Rwqo=@Yh1>}&dnZ50DeAF7wIZFKkK#cXTcT{((vg-qjJ
zj2DrqytDBFGKF_?`GV~bgG?1zirC-u8QX5xUiUy=#<tVt$V=I_y8D&&jaQHvd_Ci3
zWIFF^ykx&y9Av7(c8LQ`mDo;kfYW1nCEHQ=NM6Bq&^?rwJ6=O(@ePgHNI2iXcomtc
z=uK8^uQ=GGV0*-1)7R?Ntf%g&yo&YEJ&~i?PRLxoiSY(9hi`1Wj?Cs8xqQd=i9<})
z7$+J`->_7>&bmA~mi5-<$}y~$?pY;fl#uy+Qy7EucsJus`-9?e(@*SxIL!0|+b<4v
zdM>YJyXp$$HEb7Mz8vRhK^R_Zlo6W8jb_B8j5Phh4vQm9zp+DNi0N1LI<|-IrTib;
zUH3wcXS*Shd^6*1gyjk2EhK{1+1O%7#nH44mL`re{l$*h`RZQF8(1G*p}d~$se4t~
z!gvo^$Tv6MMHcYx#yj>W#BsD8c3d1w*TRm8W1LFlB-US7EGM#lx*|Ej@d2`!Z)MCu
zqWG4^`^X|?0$mq7C61@-U?;^;x_0#@wzsZK-pKaSmCDI%0J4m4YkY((<=Yq^B1`zz
zHX68y8EF(t7bnsPcG|A5?ybCq4b+v(o7q0PH<cZXPmvXTd*c&iIp5Cs*gjL7L_1)D
zNYVBfFOp6l<n3&K-FtZ(+fVmS-s+f#tl~QvbCGDiqwyKCQkg<KV`s(5v=eql45J;Z
zcd`R@pX43v0NqD9g$+Vtc`suD62p5M^O4oOhfO`~yf}?^#j?byv<r65Zm{l)yqgWy
zRmi*8LAuYCU5qc0HGF5|3nY&BHa@q%B+jH8U>C(1bbaiCINj;1oXQ$>Rq|d|ud9^z
zIKD>y<GUFPk$AqV@fEUGnN2swu86bfM%ZOBoNidXpB<*N%KO-%Iz{H#A;<>4r?D7W
z&-XAEA?x_=Hchc>;#}Gd%NFO*O|YwWBXr;8gKUVdT0X!I*L|z>GnOHVysxnoN#K2q
zCH6N(6OCgx#Q8LaT^Hv${gRKcqjW#z!|X`i5BZSeTVx|2U@S+Hd4J;@BuR;&2}}|h
zT8D`uO>3)<v14_A<fH5u-ETRK9gS?^`xxINoB7_xcgQBbmrZlbB1Y2gm@Kk%Gt6u^
zp0Jfqu%U#Fe4HJp`&-%1_zBs@_ceY*w(^0-5B7J&MRZH-wz!aPf!z`pIM<d>vl9tB
z`4l^Ws3o6t{DSP@gNzkO3g6%O8QHEZq1#~h#Km-L?5-F^x2on@il{4Ruq07OPG^nC
zZhnxl3faXEG*%)z`2p}yoCo4Ex*e7yE~VRI_w6PV8aa~<BT!jjClN@c-e^Vk@`H^E
zvWE{gezkuruB1C+kHi&p2kfD^+}Tk+$4(_2<g@G)!d^b(_#N5D4>49F9B(jwLsFI1
zv?umdTt$0ePsC`tQ}qRQ2H`B9XQvZRauz!cImiz;{zMM&!;C+W{rpgy&RCupOM7Fv
zVhrttJ+qrd)RQl<;e@Mvk)27nRE{+MK@RgHjK7gXe2DRv{c~|G-4!bk*U(+Cd@;_s
zp?sB{Lo|@Du(OH!@?|Gm<S0Lyv_aDNQO3W>5oI0S1A8g{M|a0wi1Bo{>g()$qOp99
zokuj1v)Q@G34R=DhaBg}lC_Xy{1_Ww?6tUo_Q49p^>k0{l^sJgm2a{%;U?c;O+=H*
z31nU56hEG<gPi0;$=dcMViN6-6^n_qA66tLIBR7y%M!ROu@MA(=uR3WgEx{WlFm;g
z5#+S8iSCV+i5ux&SgDvy2UOo;7ZQYQVHXfJmw-qllb=L7AOcU3_6X0DHhr<T;ubm(
zD;GD@eXuuni;3p)9X5(^mv6I+h-Q^jNN40MKbdqw&hTNRqx}bQJKZ09FK(mzVeiDP
z&aLG8>{6noe2-m1w2<#Q)kDtn(@0k&i=RrmAm@~w^g!&BxPu;meH2sZpz4S03Zjkt
zfCWuT&S94!m-v}v1LPt<gRGBS;HTRR#=eNV>0qov+(i$<KHIG#+RKmGXri6`h+Rpv
zt(;9ZMy~L)$VSLzKAdc5|5Z$-4Oo@9m)2vI;vVNt@-sGu=qNvBR}&rNCr(X~Yy4c&
z4aw%`kWG-Q%6@tnW)=6*Lor3<=pohl>>9#T&ST>U4>^~OMQ-vY5=U<E^GOW3&d;+M
zfqfSb(ji#2cz_;`eY5+I=qx{H;|XuMfL%*?RYs5mBJm8VLqwh?wf4WnBlIZjr+Ani
ziTw}{Id_v^vFnMh@=JCd(M5jY)Eu$!k)%5!^DNm6F)PRDvDhE+C_M)IEvC_<tBcq~
zqKEvNO(43<h3p384!?+OiQMKFk}Z&1`~sWtxUG1C4#jQ6<McS}uiZw%S1w_b2_Lzb
zO(J?$E+N|>_xQzRYve8;MYeLNEuN+);&$RGdIDZcJn0-DzhO5M{&E?+iSUz4o!TQ0
z_+?}}B!^#0wngqMJWb(s#SEIn>xk*JvHBgmjp!}EWw#Q&<Z^Zk@|a&qc0?ZWE65JW
zLw>o<WLzU=(qT9%3iKo#vD-oPl|Qg4M4<eh-A?qWTupi+Px)1(2l9lECObJeis$I5
zxPy3>o`TzpXPkrN&+IOuzx;{aN%WIHI(0_!_*l{#$>n26FXWkWfu4aoi|6U-xRaPg
zPphtE_Ywo;FYF#-fLy`uMxOI)$*xENzlQ9B<nwVhv+#Q2B|02;6)(~=aTmLN#9;X=
z%MrnH6`M*7s$56*Kwk3yk=>COd_39Bp`mz{o`W|Kuh6sc`r>8hA@Vo&0AY}=?0!No
zD^9-1YkmXigB0@X$)3n7<vKkdZ!BJ;=i!aSY<h0>5B4xIO#aRuB8JM<>_Mc2Pa^%1
zVm^`dLyGtW8wPJG-lS>VO}s&y@FsRgi4pQIHjN08f3io2;gy@n-bfk0k?e((^2ua?
zgH|-tERKs39f89oz<IR%mpx95lK-&Bh>`Mdr@qKrehV3hl=GX(KFAy87QGNBL<_wD
zuUQaAey_!zBF0*5*^|T=iw%1M`M_@{`y=o9ZDc>>9lzCPG2UFfLr3B6;%#~n-pnq8
z7;mY~rW2tSJN7g&u5u?i5c$OKAO|2H`4lq9p_O=_UW&IA@6k)}7UErJqXl6FVxpxk
zEMX>C>NpKXzVN%rV5EZIMGitfD-Y=vcpLEny&P{X=FrQ&+p}j0%A#S<5TpfVGm)=+
zDrrEf_`ReaspR+Atis!ikLhT<o%o1eiMO>oPfWHrvROo!#eqFXOsd>Z4nwT`K5{6c
z@EkeBp_BNGj=?*MPwCZo2l0vXG>Z#+k(g?6W-ky^EKW`%knj9KG6bpS50Jx=Z%RJB
z2KN;6=s4U%%mwqkK6{0jVX4PnCZ=0l*-OYT{s=h=`N<z9M<PG?L-4?f&f;@A9`_at
z=(V_)-8EvCr6HS5gj*W0SBaUG$H=kBAO0vg2KmjWk)s{DiLdDOcvtZyy$<gpzHpvv
zX~Nzh=2#lD*NNGdMo!~VTmA$IM>YI$avbtkDWVhc9^z{{0q-sr(i^^GtVGPWG=&Y^
zJc}E96RpjkCMTkH{3&t*T8lqvvk~_dOXy_WM=Yk3@Sb)Sg0W~>nV>B=YbH#UJV~K-
z`3#ao>+tEM(IG&5LvP0Y#WH#m?kAQyM_QV(w+Yrlu(yZ^i_U2>s^K%qFcjqlauSLt
z@91rKZ}Ba?74IdM(_6l`VDAwNEzQ}x!~%;udk1yo&yiD62mUNM1-0kT*zCajiXZ3{
zJWza3Z^!%CJs=iaTCq7ql%*wmpIB6Rft-Ol^XJLws1u(>PICwnKhwML{^BQkC*Du|
z=)BC*mVHDlwX|U$5=$(too1o+_)BCs>dIdvXQD1jCA}9PD1M>$-~+@8diVDZ>{DWe
zr9JzESZ-;@K1LhzSIIeO1O5s*8?Db@w%Lad7QfOQ9xPVTsrVqfJYtol6Prs!TRO7O
zh?SMs$@yqw{u((CZNz7ja~+0=-{=FlLA28QalNQG$6CDD0wTuZ$>tNQEgnt`+LXUZ
z(x@AMgEXN{lppkAe3<y1K7<bytLcN^yRa{bHI~lo3nI?q%|1u9yqRQCoR`Q56c)fX
zNAVHjFFFkm5r5K0@Zol^iT^C!*g_)S(v^KhtgXC7E<_35LM}jSw!@JQqs71UaeS2c
zhdzdn6n{H!u=He$iS?EqY!R`}(%oq>+MK^bMxpNfZE_LX%vy^+g^!hN>67>v$%a1h
z-H$CJ5-q-LDUo3DVN1|f{C#pM+LFIVE<s!HcWpB8@ltI%9S@c4=+pQ(ySKzfO8{F=
zBwPI1H$+n9LvjV$hJQdVM_cnb<T3}?{?GzGQK}0Ynh8=J=Pi~#?0aIfr8oPI*ktMD
zv<hv{KPIEmcKjo9CEC_%PoKppNkgB(NeQJhzxQK55!)<%*^k6lOCb9J?ZiJLW6+NL
zQ*t%hfq!Ci9-k~Z(ph+z<UpUpC)s@=c36Vg3L?eQpZ!d1ugoXcpq_jl8HalCxn!)v
zG|7d&h)<QA=?nN2$;o-QWe{6M?6M4GD~X+!0Z#v+o%!cvJnGFCkZVydYkm3(K0~TU
zU&g0PuJonvde%zpwG3tzVvi-5{fc(uUy<w4uKY`K9omI|VRH?iB{ih8@o=dDeHEW+
z_np{h8NyZ*oW;O?BT_4i$V9XU|C&rdyYq$Q28X#)6Z!@|M`}!8$7f58oDW)tvp<Og
zmSOA<V!vgm(?-;nFCmjrAHJAOLVH><TEgc`P2o6ap5#W~{63QXLmak@V1E;bEFtVK
zG=P6YZbtq2GIA5@$Cuhza7NP7G7kS4ZN^QtZHc3n(UCSpnq?IGmpD@Sj@*X!=HHTA
z(O!Hxxy2z;YDVA2VTjPT@CZrge8Ms=(vCQ885>!PIA$5+v;*zSe;`xPK>j_s9qnUn
zLEpm{O3mrJ_yWnDzVm%TWL@HvWqf2E;-n=svNjsTe<pXK{rOMiPP8BY(dGfZSZYP*
z;89Xb`aZs>wx;IrG7=@yEfXUV;&f#txfdPCe<Amv1NaJZx5F~2E&T{zDz%{>;!C8~
z&Y6};kq(4lp(5>T%HC-oI+*`Ta%eDLMW&*ItR3j5_zI~#{RCewwWA+@pAzXzoV84j
zbRy1J!Xh2fA^bP;0BYc^<bG7oD>ixfDyb8li$_Zx>1X)L+VzO@mT8f$M3!Z0qziGb
z@&|bs9manr51~W(YVx2%tmH)(;4zXXosX}UJe)6CW=1w3E?Q<p)+a7lraK))NASPM
zG&F?&NghFmTf5LN@ikIs`UM^*dDG9o&yH+NT(QiGY(!kPghw_+NArKl<LD^<4|xn7
z$^W)_jsGWgqYLqPsVn^oUt7B=am_L}(v8Tr%!zD5T&=1_o<he8w&Y24j9^2aaM&RA
zgr^y-mwM1e_&TY(^G%B>5+`n0=0{@0b;~@b40OCun@mST1v~OII?n1xm*I($FI|c!
zNIrDQ_lQV>kSt83uI8d2sYT)NhZN9>LR~n{nIP1$d5dq90_bu)S@Ngf;7Lle)LZ%m
z3Xy8jM=H8hxyqhAi&BDyJcGi2MrJx}k^0c@@y$|i`W?PW>h<@oK1ykxdPl!VaZkOi
zU+8omoh&$#S!kHxK%PS<S^LqS@NH6G`Xjzo3Zy^SFI8Hl-q$ZtTBhF9FIHNh(*zgt
zB05!YCNH2<1Sfb*%nm7tuE0~I{`6;jyV5rGp?<m2CiQ`SnbJBnr>Z`A1)U+(BQK-V
z1y}Nt!)|F1U4`$G2GW)IPHDj3C;Dimed=TVN~K-uBmD}eYv?SYA(@Sa3k}Gt=uE4g
zw&HuG!L)+!k%H;3_AyGQ)MxtDO2^cv`c+B?bgs~Zyn)UU8k5)2+29a;$M;D?=xUsk
z4D>fVRq;yA*T*TIsd@TX#UnMh3WG5=UuX)4+4BT9@}|Q<X*m58KOhaGf8hJ2p?_cK
z<CV^-&-H5+@6-bQ8Yc_N2wGA`VQ7+O)MOn=|G^JSBk14wAt{9ZWxrnOmikJ+PU)KZ
zQvaXQ1&tJ%k+)G;LXo%72tj9SiyxInGd6gdG>ZO<A5nUy7U>g|9;vVO8<g&;g;g!c
zd+0);Ie8adAh?rv98O5%7(4v9G?uA_ACt!XE!8J0zNsbpB*iDSSfA+h09`D!B6H9v
zp(S}AU1XiW)WuIp<C!}6Nhy@6ZNFIwNPVN<r1+<n={G8V=rW-#`3PMqv>_j&ON7?8
znwnb~42q{q6Bz_Qt@KHKr{AjdPJOH2qV!5Fuj)WPMOO&z$tUP?p&j|yAyb;fIN*Xr
zG4?eV!M`8%DN5he5Blv&VCsAQHm5vvmC%XIMWcm|<TG@obqeE*pOq#vPWTxqjB&Kz
zr39sZ*6&pMr+(7!Q2L><f)`nU#t5EdKDt`)u&sxmm!>hUc$PGkaly|igHkK?dz68x
zU-Y|`0jU*LUC5W{8lf}!0*w>A$>$E2q?t?u{Gv32sgGZfrvFv+oH97|t3Fi;POZ}K
zb$X5dCv+nV(RiUN`3hZYoy|1HuSm0)M)+kZoM~u(KpB$yO}}3;q+0d+6g|2@=t&l%
z>xCX<5xP$3Zrc>UCe3Bs@N8)g(*(b&3{U-`Kcoyx{jNW#3{9=B@*~U8M8TIVMH2)c
zvc%!0WMXjqhBTkS@axjNzrXcq%81lo`Xfq6>QDV)r?==vA%HALlLdeB4Vq+)U<h22
z7)DpK1z@!H$Cc5kfAz<dQK^6QN0pK27NHOM9^EYTCf}i(gkHAIaf=kmxYryAFwJnY
zGLEaIKdFr6Z1pFUF`P|RKk^fNzd&E|Bf3=xBtJOZkrpv6@!QfurUiaWT2N6(pRSDO
zYU@ucp`4xml+zb<hY&<opeaIs@-w>Kx`b(i-;)+It?|236w}IHP~h+$2Ju9$E*!^C
zKz9p+$SQP~Fp#W7cM1b++v5+UWlTFfM_S6X#qTSVID7pWh2k{&Oa;b%m7cVsdxgQI
zg6<K5$*&HNrIk!a{E@VR>3~0!mRC6Gvy{o4qyC%{#yRNEI(<j?2}8(gloJf(H#F6{
zn(@S+N~;(T{D~CJbh5vwOygYi7nG@-v;Mp?1wAMXCx4;`gkj_lbiXjvwlkh5#WLP_
zt`x&~;m?$rTz&mzWd>JIe@U6nxmJxN|DcD35#(?5kPt%ta(FJSWxC=8(i)}<o-f5!
zG}31)v$%%(t4cW6K!3&A7CkDArfkqOVHEinJz`zQ^uS+A|1sV17g9Xa&HjcmmusTG
zuFT;Y>#r%Z(G$Wr$__m)jHPO!$AmGqzW8ft1LK1iO6!@P_$$T4VXzFH&o$NGROWGR
zRTHSX=qX`5RR=vOgi^H~N~9#lA1{^?89%&8N~qAmrj+5ddb0wHUcKb3K{Es+g`(-g
zL<&JqTQ@Pi@iJ*6(+e+^l9>Se+e#$YOn*y(EwJ9AM4*|%B+3C51d6h+*#g-1#otO>
zm_WQ-+RXI9-zbZ?7W%u&Law>~j<SGrubM(Rqi2Q5loNVJ2%{VwK1kb{{`h-o8`BSe
zCvB~0t<O;wbFK9El_;*I{+@F^^t>>Qaz(R*sgw(P&bpHsh<}oHFaz+9QVJ7f|43QJ
zwbegVmU3<M50oY7C1EDj0KF*8pz5O+gz2_}@h{SDCK#`fb}@tS&&o=!gZ_!Kf@`mT
ztSsl+Rn4XvqgRAkR3r4V5Kc98_$s9`2E0nz%joe+X-|cRK37@Ab<#gmqPdRxr_N2$
zYr<U04b2wjP)*RQ*8R*d+$!y3hT@9EF+=PNlvvJ7pRdGlp87mxHF{GpQ8;=-m``Eo
zbzz?E2>iQrkO{%7r31`x{F}0t>!N?5tl>KApDS^kcU1&Mppw8)x|#zJO6%}TI>L;?
ze@cg$k@yekP(^orq4FQsP5(-X=ep`&IyXlxLL}v0a~w)FL(SG>%vk)7bd(u`|CZ92
z(e}m42Ck>RNLkPI(7#sJp?8EuR7>=>u#jqj-Vzqrj@Q~sCzw#JjdYwDhyPWQI6r-<
zlF0e$OOyo8r)mk+2E8XNrdp$Sg(#|(V{Pd)Gf`_Nonj_vYe^?7dg;rRja-2Kjgrjy
z>&u+mqYs2-R68_BSW2};?^}6>($<wS7*bnDN@tAr@0BfFAN@OJGuK=HR@sC;7FJRn
z(MQ4xsss8^SZ+I6tC2F9Fm277FiDFj+qr)FkIFW#ul|Fwl?$v|O?jeEg;kUX`b3DP
zIypK@=a{Kl2k9&`MQbmesTiQIP<C)Z`p-%V*I)n1xigw4#8Td9t`I|cq0g)rm>F7U
z={z%C>m+3{)9kC1-P|C3rLv0~sQ;quM4t<5sjg^&u!ib_<_mGQv$XZ3OH8=dRl3N`
z)Ve6CoL;Xed%3~-ugV@SxN05M1AQs{M|DSE2=P=m$A;2XW{$RjbcLC%tuI}!7^<&U
z_Hje>-xQ8B=&jDa=xbpE<%1Rq>#3gTE9-S;zP7P+jhUxyBxN&m?SCo<x#9XB$^mYe
z{=2dtEfJC^f3#Rgr2Nn#A;Ff>HkEENwAM|!!I-p7lq1|o{cq(kH$wkQImCrjZK8Uk
zWx__P7g{PLQvr@z$;`0YnoB^07H$Moj4{|KN4e4Zze*Z6O8>{XFZx#4LItAb!e*)u
z`o?;TS*Wd95H8Tx+!l;<uv1QO;|#Tw<J?$-t#S<gAZ(}lqwj@nR6q2cu+?_4wz+hN
ziPE}Dx0yxSX3A-9f}xIbiW_gJt(@dSt9DWY(NDq-Y5@9CNTGrpTS@nsrP`L#J!Xlv
zg><)qG@wcbXEY#6Iycc!*Lg7dMc7RRqZPs~Y7qL_`jA<nZ6iHkmTOx}Im|K#2PKo6
zWUyBRjxuNz9{nn$QU<h2*h}frN@0)fDs6k|F%zwACp}_TYTGL3xG4rF<t#VZ;HaG8
z!m9RD!%(ZRj~a^B{D>iroup?>jJBinlv%CqAU&y=Zg5r3bJGkiN)|WO;OsmC{Vp7&
zLeOgA05u%_X3b~TXg#GoCQj=i<ub7j4U|jVOhbL;A~(ZOPq~2p5{^)#(4WF#3ck2f
zIAr^uwzKq{iPw5d1<YElmvWVxZD^!i;bs{cDwnzNs$<kx^p9|q8iW28(x}mn-K1B{
zdTm$fC9_W3MS4*&&)}w9<K`NgDB0W`Lu2Rh8e8E61;5xJ9H+*if2~DKqPB<hnn}=h
zmkOB;4!ClYGZ`@D1~=c(RJpFHEu5w%YV3ql)C5f};iT<Gt*=zVBx`-7VkSx3Q!#T9
z23^hQGH7cKu&a2A($p0)C{j~LNT-aB0n!_0v({fKV>W61q|%B726x56MH-sb+|Du(
z&XYA7A(INz)La54X%Oo>W}CLR^p@GG?Io2nTO3*{ceq7{7RqgIp`p2QOXDb<qo!&c
zgtOEXjlFQjc89jF^nppy21@Ul?b<%deQt@NwQ`SJY-pw2<)W%CP%|{n!g*@C#!1Mc
zra1;lpP60S{?aFAr?#K;v0}NQo$`QNW@xMAa7ztsoM&n3374pFjjM2xnyGQIRx*3F
z1Env_9_;|Bg4ylRQF+X*G;~lNaVre%m4}*!!c}UHrh#yUnyslXT(;e(9V~r?3u3TT
z#iVKnDbKjo1`p*ax605-dBR0kU8m-28VlE`d74H-HZ|9Ai1dv)pfyNVX1`W1DHU-B
zZzYe5HFznxT#Uifnb9;AZc?<yO}IgsG)=5On8Vs((s$;NcBoX%9CYZaJm=OLx+n$Q
z8bfC#U!xVw6sxIO07q!xHmL1U?Fi`?lco)kelkb2!<ARuIzxBmCHJ49oAQE-uewDo
z)YNQ;7iemvh)BoL(qHDdc9isoIi?*c{jNwb_$aTr4ThddA-CSp!+Ei$xp0Sy(zpw^
zsYRM*-)b?Zv}4V-%t`GSvkh~?!Cxuik_>)IF_&oYRf;sNg!|M|O-tb(wM5fGxNDoC
z9dEA9q-#UXcFbw*IOPqu$<RwF<2D)slu|Ca>LIm4(?)ne!Gq9+9BP>(+{A_r%|vru
zxUD_GT&H5IAy9eCZ87vw%DK&k-p;Eu?S;oww5FZ#h+3&>`^}y?tEJ2u=8P7$KTM`W
zf8_(W-Ox{Y&uugGRo-bj3D2k)O-JD=wOZ3bcw&2AJK5~WWNE|94$L|2B;_-=(=b5!
z#O*KyDId9%s(fmV##6|n;xryYE*0xI&FsQl)J`=!GZ(Z|%uW@148h75Znt5OQo-#q
z40Qfa(^+^<#cRBU0&1<s>sx*1igt#%9&=ec-R#O-axf@gxm1H*sp9q;1}l}CZo(^S
zy{4=1l3J(fBD}D@rk!PO$Yg87%?+5V+L_8XZogruV&(Q3hA40es4AioH9dsaRD!0v
zP)KcXoNI2v+|bT3H)gJDXPX;U95RF`-?@W^;Yu}kz%a~tqsCV#p^`N|LNS%3>G=(V
z<DB{CrXcmqGrKW29Y!g?xFd#<%1`dFVTAHS6Ck{yHf#KaGHR2?PbjsuXc@DXk+pF6
z1Aj3of4O6ZG0GqAsA077n@g*DM{U#e7T!`@HNAv#YKvo}xfyd?3rE4sEp3EZS8>u1
zYPID~7{*y`xZ{Sg&O0=Hg%4DUCQx`!ZP)bq)`GdGU1)C3+|@2HyEAtjCR%HArwtRV
zcHAk$cxx?9know>rRgtxqIPQf2_J1AXcwDXF*({Ob4%vFc9FH_Ml5Nq%Vijh);e5z
zRVB4oGf?<K?a>SnDyZF#%gk+=N7|+4Hq1lq5_9W{Glnp$hRZZeveqo&Dd&Bf!NOPA
z4g?ETRH|msw+_rx?Fw^y=81N>xgGP^VXD=UJ7<_;b>Pk#CR^<_LxgYC0gXYhQu{S}
zL9xx#t}=IGa<$Rsj?6RdN~;TZ!7$zG%$+w(vpR8ERX?c1nqk6s>X2rrP)!|lj5T{P
z1=<+1CzG#TZT6_RYzVj3<1QIyT3xw|h8fOBH6w&yRGKD4_(>hn4FA@Jd8u7v?##T<
z#+kjD=MHnM4Y{j^+13W!6~ioRea&d$FLhiqO87$^(~J~;+rHNRXYR%nYU9manOEAi
z)+XF_!#rza?wVn)wGo&7wH9?sGd9zfI;k0xX+xcG++gm>6l>R;doV@Xb>{9BqJg$H
z<!%~GRyXd3VZL*QW_)ICDqRzrX-A#bjQi%tlxY*qzD%h$!R*77IIvbNXEsDwYYs3N
zE2c4KB5<5DF|#g6JrgqP*uK?nGzT!{+GMjo^G2IwZN}X)EU?xbXGU5<YWr$Woz+m8
z8tROO%tWb7$1UbQ%zN!-b8qIIc9XeR#a%;`wK;dku*mAp-8L+AKChXa=}2X1!ZIDG
zbDBxt`Z1rh+su8LkJ_!~K<0zPQfn*jzF~>AC3nxT*xEufEz^a%sF|ASOkL1S$#k;)
zqTOK*Vk)#L=KjoQ?RINh?xA71wGH>cu*}+;%lTTLx}uqpS&zD`nV#uNU2@!Q9>i2>
zcbNw=mD-)=0ToXS(bo3dW5Y^oJMNKTh4VGdtjvZ~wkABY0d-X~^P8TrYWJE4Gm3VP
zIhgtC5M%AcJu|GfcI2KKR#`h}=4LjbZfNFYHm0s?W@k3C{jS|-9>P@Dyot;=ZK~Ca
z%QwVXJ-IwXtkr|d{ffb1_WaDIpft|Qbfa!M9yAYUergYxhcQ32`^`fuUKrx7ow?_R
zwN`Jgz_7;IqG2+%l&qmMaTtT9ZzGvM+Qa4%%x~=>a|rXxVZF5*_sX!&+Le20_|Mu!
z6PekJx~*X|3F?+6B2!n(R(sSuI>JVqW*)`-)gH0-<cbUl)*jqz!v<@2uJCIM>Yip{
zW^?MUW<jPqb;t39d0d2@_PBX$L@n(x^O%ZKL$cMED={QleYj#n;{Pe_F2LKy)`d|g
zPRW9}lr-(wWoBmP)`FBd0+)GIW@csvfy+DsmvOc`WoBk>lQ^Q%m1p+ZXU{tO-1|TG
ze`b=$k+qhWU4iwj@BI{i)jdWonsUWGS}u}u**(hW%4PBH33uT#dv}LBbD7{}dfm;h
z{Y&*an_u~t=(XMB<q|2^-Q(oqDc9U%<zg;(G@+Ht=G_<W#%1;H)f;Sn=U<`M-~86U
zT(7tJO{8?nE%!vZRLV{F1i56&4MYx892`9fa!_<C<jY6@YQ53s5B^np!_DvgE939F
zr^sbf?zkt*Wm0atCmAdk?^VMLhkBJT%^~nQz3Jx9{<V6O&7b^h^v3S#a)p%p?rC!Q
zlzZ-}ayeHH@4;|SF1t4r4saf?rnlIf=HH+<-<;}SuQ%KLB~m%%k$a|GDdnMihFme_
z0dgeVhs)(X9PZ8K^d1WL`jX+_thd@6al$z*H>dkI#y@q>k*lUWanF{kq&#-dGWv6g
z-eci@T!Qy#xG$F*7Ikd1;TLqLnV;8NyXVU_Ql7i#$<<Syx#!B&TzS1G!vnb_?}_jL
zE|2%PPHwjSL7mua`6V6SY({FQymBv;Yo)w&FOX}dyg<%`hj71mPlpF{`MjsXgT8$C
zZ`0ds{^sARCvX1h-xB}Uy+p2?^2WVbu9NcGy~yCW{LxDQ&KI2s=DhGuz5V8&{vCR|
z%|HCx^|tQia)Xrj?qzcQly~l>ay?fe?;qjeT)}8s2v@)x);sYo|8Bh_AM4+xci>|p
zjZ;3kSIUi2KDt-P4O2cK7s8{sBHr`ikz8T#x$ub8%>I3P7e14JuilxD^Y4jIb+3_|
zrhIX)mYbw}cCRwVa>c!u!eh8%-izVUTv7Npy*r=P3CDNivp7kaUES;D7AfiOb#n8R
zH1}G$nX8odYIr<X(t9O5jw|84tW!MXSM(I#?U(f)e6~pI6y3d1Zk3YZ-XOP3i6A$^
zlejY8>*0x9Y45f0gjCcY(itA{YdX!tel^~5^D>rVMo%FrhI^ASl`HSP6`sPC^WF?k
z=E}l{^`5-Pe@GAT@&1E4>y~6P<*Qqii4@x{$hfPL_ilJPSJ8VXJdLa1y{-4*bNY|!
zz4;veBYH1Bd!%j35BC<iP0Dw7P)<(yhCB$*;;MM>hi7t?z4yX1QWN|q^nQG9|8c!9
zpUZ#D6YJhCx2Iy<+vIjBKiyl6xm<PcqwpNAn)hLNHdhrstq<h$_)qBr_(cCny}x^x
z+=+^F@02@IF82<(gR7SJX?Q+Y)B7YmkE`K*tPkPyIg^ot`Mmx!`XD|j(v`~M-XnLR
zGP`%novBR7i|`_@j`w+ZAy?b`EW9Aq=cFe5%6t7`eJKBn{~sQ=d%xVB%I4lDccZep
z_ZmyN`rcRJC0sr4%kW~ZE__}e&KK~X(}(f-{eS43Tb3ybj$Q&#P;?^L)yVrcyqs(3
zeG^{BHSoUHNAZRI7xj^RA^!z^1Ya=1Qt@t8W+>FH$TWo@AHu7+Cf@hqm0V-*yYPzC
zV*V@o7`~|gvObzG;=km{;XWw$q_Vq1a)9!<HDfK;-1{lKhHK{i7+%dag|F-5`4awX
z`Z&J0|EfOLeMIg<<#HdEds8{xhvZ(aR^HU`dakASOL!gE!uwgD#FzHp)F<+#{5SLo
ze91_ED$#vR?nfoKkIH?i+(;z6iSv8Y!yCEQ-n8(B)N=ki`V_vb|F%AvFXO-E$?HBT
z52TXZC*%QC9`|uW;0UiC<~iIOy_CY>`}%agg8!aAjW6%Nt50>Gk%v&fxKGQ2seJBJ
z@*r0muN9WKWUm<(InrzBv-ryXhx$yulK+7|gRdCjsQl3Y2<3~W!BAf0TX-wi&iggI
zg=_1z!@<;Q{wMkzzN-JRKAW%Nf8;6T{zD#46^tguQ3c##V+YsK`y;%a>)`z!-o~|u
zpX>Aa8vbYcJifaBsXo_zK^{dFai5n*Qia{;<PokezS!_CuCp&Dyp!wX{i!eFYx`g7
z3;A097y1IeW@Ic?+<i$NLltvhlt)uV(M;jJTsL1_cn{ar=L+vmt>=HEFX8L@U+atc
zI{sIlQtqqrc&eoPiad@g;l6Af;8J{9!uz=%zRcl$TzB}rzMOC1f2S|w>-*p8OWil*
zNmLp4b$KFH+I>x);9`94u*}iEY~kNH%9mAN#W(hU)K~J2{2%lce8b38s=WJ_JcTOf
zz9~<p%A!bE;{rZ7ta7Xm3M;A2{9p7nd{h5teKp_2|H)IyeOI1NRdnBxr%@H$w~a$w
zZ(n@)AlJ)>hC^IWI9*@QxA3Rw>-gsWRDG@cfjo<<;=V7>q$<1b$unI2d^y5LxW2yZ
z;lo@XpGV)sxAte~8~IlLh`xbu8JSB}cR!NnP}STI<=IqKG<WzoH^7%Ge2nYw%NahJ
ziu+BS=P|z#y#(;<o?7mw@_eeM`-wb{s^NZYoZ<%i62m9CLB53W32q?#RhM|uZ|fpY
z_$^&<zmOMEb==S8g;Z_#GkJmQS6|-n8E&XADSVn6;>)9N<=gtd>s$CX{%?AaPmV05
z>bqabOQ?G8m-1q&F6s@3xnaIv!vEknU%v3~sZMz!eLLUY|5M+_xAXt-G;+U{ms1Vh
zZ{%fE1NUp=95>RJKl}$b!siR0<%T11`Yyf`=F)fa9kEz_hx>!PifZD1FR!E;yWhzx
zTw{EN!WX#FzJlTN+$diGeJ|e?%dGF=yI`60-F)ZBTB^DGle~s%=Kd(JrkbKf!<V^n
zz9QjE+*n`X@Ws>~ST=n>-yO@U@8i2+Sv;-Wsq%WNrTdG#j%wllY+U0e`ih6Iaua;T
z!dJNQ2&~IIg+b8}4(8SmxFhl=%I{8>H&U(LY4Qfw6kn<E4Q{fpWcWHa$yY+xcosu-
zm1i(SS9m%iP=q_$1@4Xpk}$Mv_%=7qS0;Rmo9ZhazM0w!%dQ{fdtx3v#0Ri=PaC%-
zOH{Jkltqek8^%3urmuYXE;qwhE_{cZj^xsh@O`kH`eD8|mP0?}{w8mw+PS~VTd1~f
zTMoMB_$q}TaI<|C!}qyaz6$zrzCV_rALILBx%H!b-^dQCqx*-vo$BEJE^njSqgBI?
zxp}@S;YZwDU*+(_)InI1ev%)E<<U>@1F%F-7bsTVMRkT^<egL}_fO*)x6oHT{FGbZ
zs}_F3%}0LG&+tRAeEMmAFqT(81!a==Qr)09c@NbUa>=`0OMJD$FSy0Nn&Ic%B43T@
zp$qdx1K5~1n#LG8K&3!g<o#3+D6_ne>W<b8zvh<t>V#i$OMSJ&FH=Wg1@%Ap;aGua
zQZts{!$8qf01ZVa0w^e}@s3;Rs~>*Lt?<<gzu}f6Mf3~&D6Fu4o*#)7($7JNtWg07
zmQ{*{AX#y(@ihv6;8yz@hTn6md=2!={8+4*eu*E071b~Dqa%l?-cY=Jkm?1Ya)|1Q
zHVuE~*7=%*KXGe)jl&;PCtxM@tNeJZgnoq|hZXnqgL246sJ>8k`7qT7@)&8{Mql%A
zD!0McEc}IAkCf4G@RP98`gMLHR!YAH<(7|A1E5^;F{(e5Q$FhAeXYU~ZnLjtIGx+%
zYoXufr()&wTl^HPtbUW9963b|h7#qI)F3E9K0yscv9Q63KIinw2|i~MC3OZ?QNPPi
z$13P|_-R;q&#zEk`3yA_N|H}gL!dl{&28}!VT%j;@UY2ANEQ77KMSj@-{)symGpa%
z*ZBfG4En{H2IZi9^6#$gzBb`++%{iw_$#;7N9vFHxmY#*5kCj3sz2msN6t|rq5Mu#
z#0bde(3s(9`|wY0r>|Z32e-r5HvBzx0ajCg%FoAY=uh~0Sar`BsE~Yt8Vwbc&r_qI
z0;Y@G<Lh`fmfP*?a5je9h1Ahs@Qbk8`g49ER!e^d6_qbj<Deq)C2A~GSib1m@9T0l
z6SvRT`D`4w*VjpZ%`e63>96=DSY7=kzc_M@ng|t_uTm4BV)7MgJnB#eXIi@3*(?rD
z=z2DD>I$r({+3^kHPGMi%dq;MDNrf-1~nNfDPN~1K_yIxQ+>|m52yH?NpMa^n&=<+
zRaj&FJ--rbq`!m8%D1U$P#O6aH5Do?-*g@HF=r7j<fG5RoaUqS&-_}fnf{4igEiGZ
z@~b2FsF_fC`7SjBDktBerlUR2dbq>Bz}a~2kdHl!rf$Gm>Z$yCtcCuCUxzjK%z-M&
z52)EtMfpB83#wq|<c|4zpUuG?_4PWNojZc~^$5QSYptjA8?jb;8dOz&OwEI;$d9PG
zP-Xd{>!h#W+1%U-U*EI2xZ}P)y1@$=u18M+nC?sjM4nL#q3ZHeY5`PDenQPh2cFHt
zo%Rhlo5-E=^*@`C8pM)yi<dA`H+c~wJWHTj@(XG)R8xLVErM#8`JAMj!DsV2G;7e=
zB<>8-PXESl#oFp$`7Kx*-G=JQuc>8F9r+ct6sj%1bp7FTf<N3@pOXUQgmZ@IKlvS4
z2mJ@X9c!<D=eI@PQ7fVP@>^;JR8M|GEk}o+Ex?`kISGN>IiJ%kbvM@8h~;--os1ZM
zC)Uxk25KaKpjJZ-<@eMosDW9SyW|^rwh(vGH{xtT?gG-y$i(l(x*Bo(9;}Puf||;o
zsdZ2j`4hDkYAk<rUG<GQTa>%v8-2D2ciA_}h^FKBFtYIbvF=7@eqSVw+6Xn5Q>hJ5
zGx-a(9vy$S1b5vx?rd@Hns4mcVyOy78_}=ulo6fCP4VzhD>>p!3b&NgsZCG|vov?h
zH}PyK?xt_T*^=B1Bw!%ChOq|Bs~BTI5GEUx2ssxIlmIzX$gaD-DQC-acYKr2mf>#u
zCK(?75Z23x=MQ2%4U`W>Y-$Tc$QBiZaM`3Jbo$u}+<o7)v*o#azNu%+r5?rl8aenQ
zSRW%he;DiS*$%alzfs$uWce$#6(T|3i3~7u^T)CNMlSvs*3ZZZwU>WVJE3;+4{8V0
zR{jq9P9qP03L9i3@+Yx@Mgo5#!$s|ZIx4Z$Zm5G2L+t{6XX@|RP$MsY1{-1|@u#uD
zp8Zf4B@?v|>a4_3d!bIC??i?f(EtW#MAKM)HS$5-m8{fnP&XwDbpYzBWCnewQGh>(
zjWD7~ZNm+RHfBJS3OT_aN`agdAW8;(XX-_4v{8t^fQ>Q=^5?OUo`VphAXEsV6`0Z>
z3iO@GIHM?k85?U9;V)rhjKWY)#X}v20!ln}2x1i!^qoct{u(yHD9&HS#v8@>D;YVd
zV^D7;2Xz$crDUg$fW9;JCN|k9#oxdt872Aa*hJ4ssGpLXIsx@na#6>jKA`VJrWs}V
z+t^g241WuoVw8pkDtV~W&;TWoItBGt5<uT+RN(JnGmP^5U2M8hj=z(UkNO8RSjkKM
z4h>S0s57ANOnr#WHY)KCuvtb${ysL-^9SUFzNxd&P~{gY3=IK&Co<2d%0I^D8ddm5
z*c_uWG+Ze_ori`g`Kfb|bI=5Rr%{7{hAl9v^G~t)Mm7FPMq%m_G*T%<U4%v`1*r?5
z?@WD(EjDWLFR(>MP5wEy&~p_UqZFmCK%<o+)MaQC=sS^RMqU0jw$!M@zrvOnwW0A!
z3F<mDPAN`ZgT^YwK;LOJ;NM{@jQadrY`Ia7f0I#~x&=*CN>MkV2}()o2IxCeKVqwm
zM*IhCmC=xYkFE6Fg{CNFsXNeQr3`f&ngsezWS!BJ|BS6Qn(&{nHAZ7-x>A9<4^30b
zQ}>{$N;%MX8ZG!VY=hC9PsP?7&G;`Fm8nP2Or;X_5SpP>q#l62Gd05r=d|J@PEt=x
zJ{{ZWc?!)@s!~s&*-91aF*FPGorq{)ynzXZ6Z~-=w1(y@HK^y%Jf%AI44SJ{1AV7K
z@-`MU2;RaZ1Lw_*+SDs(p;C)_2`x}+QZGQ?>CkiapY(i_@^bTEW<Dk%OM+wh?~kwP
z)T@Y@oYkS`&!YN#y>lbK7WG%gIS;?Z#-@J9wi#{sZ`f8Nng5Dy@w|nWD0Qhf&|;+y
z^%`0P(okfl(VqW_?J(N$Kd|jaTWGn`fO-!tQ|eRipruMZkcJwa1Q)j3=qSWuyNnJ(
zOh#ks6SPukM16!-C=IC(APr5+jO{bJ2$`_GMrR=o+v7=v)+kM>FVJeG3H2FT1=3LD
zH^VW!PCjQhA&aAWyF%-g7F0U4PH9f1L2H#}APqH~BR-}WPTb!)2RPZE8LcTD+NiXm
zGN27gODY1=&@>ba8H|8nnn4RNrg|)hS1`(iHY<M0fHr|N6gh146g=1=BOt_M2MrdI
z6q5Q1i3&m4kf7io4K?}*IkBTgZy^VE#ONht&uB~ifVL=YsP9lvNv6JmG&C&%J7M$_
za%0DhzCteSSWGOmU1?9pK--jd)K6$DNJEj+#y}wtcFGtaBw{Cx{?IO^6CDTbR65cw
zXou1Pq@l(TA)k}YK3K?$oiPRpNf}+~EYKdM3!NF-t#qa{fiyJD=hzl!*3<b0?Bu><
z|A=uz`<3o=HfW#Hjm`?~1!*X9-WV<vz|I-Ng#6ea2IqYHr)b!j15TkK=z!7#q@l(r
zp)hvQ7%3FOE*K+(f*C9w52*@6qmZJ|Gy>Alv|`v5V~kJ~yKIaWieQ&wazF=_o^*C7
zqy%UWq=7UPxo(UXN?_NFaYAwIsxcNiqV%D2L5G#zbWZ4y(hH=a#w4LMcGH+Bl)`Qp
z6NHi({pm#Ln9`3<fQ~AC>D(X<O)H1pF{TJ*vD?OEp$vA*xfkk5WgwjdolpkQd7$GU
z4Mpx7(}fDyJ!6_s9=mHyh0Z8L=wG1I%3wMlbV?Zn(okcTP#Jq@%oHkN4~!W?#SD(l
z?_|LIO8XoKHI(*(G&HRm_QaSYRK*?}vxO?yqnJX_AIfmLpray((FLF|NJEk5#(bd$
z_RN?kRL7ngbD;~$D7pxAUKvRjhR!J?KpJW+5^7^FjfFxj?1iyFsF^XAE)HE%#?Zx}
zi^^!aC`d!o>S1q;B|=^7wXs;JgT0C=1zlCf(<PxR$~d|NbQz?f$a`bC&;Wa9EEDQu
zZ;hqU4P_Et2D+|Hq)S8BlnEdWHC73Yv5&?|p%M1MSRpjbm`az2ZYfjfa?nj>GF=v=
zp=r&qFUA_7DfZb|Ei}PC#Z-dsD%0tT&>dwOT>-ic(oiJbSTD4|(u{RNb1c<Z3q4R~
z(N&=P%1pX4bWfQ9(okcQ&>G7yHVUn<h_OLvnK75H4n0!l(AA)a%51tSNJG<b%#2<=
zU`BKf8Pj8GK~I(WbWP}qGLNnSJqBqg^3{+860;3aATY}ipcl#_x(@VQSxDE0o+%4J
z8ft76+G5|0EkYaYn-LU}GnUfzp;yWhx*qgWSxnajX=qvpJjU2Aw8wrL+k|%5kC;Z#
zTV*-j5PG95qZ>f4K^lt28M}l|xXainbi`wg9nc4572O1SudJjSL+_LoAPqJ43SIHc
z#vY*yp2^rPbk0~yH-|ncYv^XsM`bnL6r`bPJ@9PCexW;_)z~L=!?VP+f>M?BbW7-q
zvW{*6eFkYL3LCOO;gIp0kb=9715iZSMEjw1Wh31hN>es~G}O=p7Do+LU~t4x1Uf^Y
z2`Cyv!l7s;5eCxGv|f02<Dk$J_ZT4|fXBzQfh<L$lOa<PX%aF(8j9vJjtG75oW^0H
zH=e^d1btJs((RzH$`-mUWGg|Ch8o9({&<3MOz4N_HjWB?Gj`A&p&!b2x&!oG*+#bq
zX=vIYJjpmI48-#oCxiibVoVn}R@p^6_kL1#(w(56APq%-G0q4>@O;K;VKAQ8I0a`?
z_R`(pIAssr6?Q4RK^kg=1rGNa{}6t~y~gjt(2N6g3Y<mRPxpW`EBol~APr3$ffqFX
z5QgIgjI+WpJbw%WN9O=&IGgetO~F|~8j2P%E(oLW!p3=FBwolk2P29`2VhuHX%==)
z0U!-EE(^{*^^8lx7`&))Q5c<Zi0%!?D+lRbFsg*;o*)fPn}C-zt_tJv62=u_99}%8
zADlxuLidHUD~IVmum_}}Xc^;%FbOYhTo)$drHpHEZsj;V0M4Zxqx-`-m7^dHHEs)2
z@p8s3VG3T>xG7A|I7JVJ6P1(nAUHueK@S9JXxa?CqH$N4j#n`52-EQLF~7ojl{55E
zI7vB84}tT5G!(63JP>B#m5uwtOuUkD501`&4uhk4@Eq(^LjY;0@mQFPS2G?7bMUIh
zLt%EtIeH`<RWT!ApHmxUI7ma&7T`6Fr^0-^hVew0=iIz}3|vULK#ztCD(C4@Z~>5p
zqIHZH!Xmu3@myGl*D{{LMU~6+IJk&%i5?3VRxW}x)Oal{#p@Zbge7=g<E5}T;~G5?
zF0NdqC&0y&EA)7fhNi8+8yatg<#+?*jj#-_A2S6mrQDz=!zGpL^dz_hNJG&k#s^^)
z-q?6Ati&4`@8Gh^ZF(A9M!7{#g-a_pK^kg&7S`g;j8DQEys7a~Se<c?o(Y#%?$R^h
za>^ZgI!Hs)HsCFdRAD{d!uTSr!<)y<fh#Ew=-F^Z<vu+Nt^m?d)Ne$DO?Yb~UD$}X
zGSc9x%42#STt#_A&xI>14?!Ag7=nP~hA!|pW@HGPGoH~4;p)m$dI4Nbc|y+zX=qvy
zPc|$;!b!svM4X6O0@qSr(2L=k%5!=VTmz(`XglMZuoZ7>d=<9fZ44W(tGuR{!F7~Z
z^isIC@)D$>#!q1f-of}GY{%Oh--T@%@933qedR5^0<NdLp_hX+G;KHD*^Cu-;hoGF
zVJF@(W)0j(`9QCR8!GSVRd554hN9ifOu}Bgs~IQk!Mm6)xT*4)UI#Z(KGAF8#>z*K
zhMLiO3_Z*&!hXEFnOWGEkw$NXn=7gG2Dq8>g<cQR&@=_7&FCC1WkxgkQ(}0yl@f7O
zKuaZ^-UPP*X($>n5kbRQ6BbmQF(KHwc%VhtnSFLH2ONU{(ooYQ9Kw5<@xnp8r-=%o
z44d8p6N*I#VO%k338bNENAbR94&eyi$ILDq#(T$XhubLM=xuPa@|E5SlOPR62bj5q
z<9L5Fmv9X4XXb?4D?jO-a69D(y#sEmd<SW$nMXK<4>A*lllVY0K{%ni=sj>pHJ08D
zcTi*KT_6oj`yC%@<`vH1L(C-MG(I?HKioylMDK$;t8w&RxD!Z2(P3sZgM~Arj_Fr3
zAKYEdO8*9TQ?t+q;I3+BkcOHCgmd@^GpZVgn@(*j9imm($sTedK|R!Lv<%YFw2Sy?
zvygBBA7vI4&f_Cv4#JFz&>`4qMr$wy(ol4qSyZ@;k2Q-3m+&!WVYsL2p%23WHJ&~M
zvnmSGP_u+^4WD2Z7p~&t&0@k8Jtut(?ycsakHWpw?DP?khNj)bC!3{&8~7x%q;MUd
z7;_Ttr{<<lz<t$R^l`WkNJG(SW?A7jKGiHE+`^}rrQv~U9{MyqKux4i!Tr?)kcOHS
zgnRf5v%GK@pKg{D?&$d((>hqq>r^%yq$bg4KpL9%5T9*U5+2~Q%!<N&d}hoa@UN=Z
ziGvSSe{r%Qhk!H`oo7}R9^-S(D#9atj#(KVt`?xr!^714^f}mxx`Q;-tRXzZ7ns$B
zr}%ucn(#y~OkaXWs)gu_@CdaaeF3DQX)p1`W-Z|bzR0X8JjWNtT!qJ|Md>T>XtfA^
z86E}FP;{ADS9pytHR}kk@Fiw#c)VJIz7CI5i__QOv1&1phMEn8clZjkzVH@bZq^gt
z=%wjf@I<u~eG{IbmZWchG&JoazS?Xge85+k4Tbmk%9y+G6tygU2cE2!p>M;JKpKj!
zGn)#Z@wH|X;S;{bYz$9VE714hX=-`;9z0bo2hvcpg^-4CFq;dh_<FOM@I|joKZ0kf
zmFS1?47DQt0HmR58BRV=D<R^ju9iYNzR|fm=p3~w{REz^R-qrmvp^b(il!4c!UfaG
z9&#c<t>O7<4f;7ePpwWrgXgN%KpJY2f{h1FLa=bjbeie4=~wVVwHEynUZB>bUw||;
z?K{5BY$JTbx0=bqSA0v%TX>0Dmwp2;R_oBO;YA<~MR%I*g`fBivz_n*-)^>rm#Yov
z_wX{cKK%|}s@4N(sM$$$;k(U_Vl2MP>>$SIjp<MDO0^OF5niD-q(6W(G(9uE&+H;*
z!uOh;#W;LVOe(xaZAyQESF26v&+sachN8cj-Nmf<0kfN!1>bLWh1aVs=yZ6U+MG^<
z*Q(7x8frRmf9F8#WPdu5e8&Lmt!W+JsJ5ar;0<a^Is($rbQBMnjELZx=^XZPHO7k0
zA<!ngS@qKfya}YC=wY*`=)n(}0WlswXtJ=RlJr+tR0-OSo<cwxYW5Lx;z!NiVh;R>
z*-Ol>x21o;Thuo6cQ~jf)89ZEnx254F#CzQ@#AJ+F&BO;HWuElwr67CZE8FEC%hG;
zq3CIIpqK|gWeyM%@snnMc$eCViGz2l9T^wAL+t?4P;-cw&#6f;Sj>x`F$alBdRHb3
zyhrWAWQKREotaD^4Ndns6-u1kcV`~csepk0Bi0S?SGzOW;C*U0CM&!bq@n0}bGTRl
zKW7dT^W%S*TyzeYfni4iFc5q|?E%tIbCg&Zzi5sW3*i^c5n@4|W#VB~Wf(Mi3BVvA
z4NWhGUoppsMe)n#Xt4->DK-auQ0>WNheK+B@xU5LL(%Kzc(DY2%^W8d$FG`W;Uj7v
zCKr5I?akzb52?LC8fs1wOXD}qiDD`IhB-kjsrP3R;bUq)CILRG_GNN|G&H>&e#e|5
zmc?(Ilf^Rlt=PQqNp&ET1fNg`FnQqPAPq(Do72S#_&sx)SRTJ?PKD2?LzrLS)9PR*
zAACw3lmXCCbCy^ce`wAWE8!2!8Dd49WAaBG6yt+`SBElQkcOsL!=IRQ#H#pXbGBFo
ze-v8?{zDzk6pYr!VhX@vkcOhq&G}*t{FymVtd2i5=fW4%QA`o|ygHI844+d+WB@eO
zTqM@UUz!WWTKEfdfml-?%M^z%sbiR8@I`erQxv43>Gkk8<`S_k{@PqD*1=!JmV&RU
z<C&816?Gg_0=^9W_IDbJzBiYP4e)p7GO<4X)?5nTP$w~E;Opu{rZjv_odD8MbCuW_
z|7flh8{r?!6=FkuDpMZ5rA}eW!8g^(Oj(eIrZ>aCm}|tQ_-Avq*aZI+TM53aPG>5@
zchqT21^6~dL(z0|z1RXzGuMgD@l<my{6L+>RDtiSGnvZpJ#_|1L(NTMYdpi;D7L~Q
z<_58)K9{KuKT_u~)!>KfY^EwmL(_5GjLre$MpOdedTcHDsXCvj2|rQiF*V@FAPq&o
znvzK3wke7PZkYo7LS4kvfuE}jncDC(bpc32&8=cv{JXhDY=eI@gJQD2l&KHDQkO9G
z;Fs!RrY=ZB(>o9`=610?{?pthw!?qKHiF-(%bAAo8+94e0DcY9(D*oWm)MDLnLEXf
zM69_3{-Ca6n!xYXl}uy!ow@>~q2^w(E0NjUBX%J&nY+c#`dX$r{7GHIG=o2?tC^-C
z4NdPsWHa}R-HEK`KCv5-CAJlus;*~R!e7*NObhrkNJHb{XbuknnU3Tl+~xr|qHbdR
zaJssYX$_~T8!`YIYHA`&pr$G^1QHG5=mJB)(M%#7&QLcq7)V3YdlA{qgJMs@V}`^4
z5g*$Iwp58phNE4OumRH0_*~`@u@8~cJS_Gma+rtUZ|YX29sE_@!nB2LH3-sB^SIcb
zNHC9y{fOM=QL(SSgXswWP`5K3;P2`-raeeQ(+3er=1FlNk;gnC4j>X^yCAXZE~YaQ
zqwZun!9PJ78vl!VMjS%qGf#_yiM-}1B$K+A>4wCqdzh|>OWh69&}arD;fp#}!W*q-
zrXOHZkSywcrU#N)-N$qXX=wTgqM-SQIGiXDt!+r;k7W?IlL<;A*&Gi{Az7jKf2X1G
zMa&E0D59`=UK~jjGS4B1sxbj1dJ13>XCeTkq2^_AEK$t7B#t49nis{<`XQz_60aU)
zdLgJ9VtRr!G<^b5(!45;CrX%C#BoIN*nUV3^$61!$*vw|`XC;VhQ^mMZ-|qK(&lw>
zB2mh`hU8X{GXs!Z>M^E2l2bhj(oplZIF%@8-V&z}WzC!7Wc?H~7)ex5GJ}u=^#n5z
zq@n3Eh>GT2aXL}KydzE{%E$hS<W<iwLy;u)G&2Or1Jcm=D&_-m7E#%}FU}+?nfH)r
z9{eyQS`C3i@~OXPZ2db8H6M#}iE8E}aSl<{d??P=&oLvB{7!9@5s1&JkUJctq3H{V
zn&wk+K2gJbBF-bK$Bsb?sTY{hNI~^HGYTmH($M%i<_mEVQQLejE+lH1&yb?(Wo8^w
zM7_j}MGC7IK^khl7MBwB%va(PqOSQ;T&!PXCL+bvtIPzXn0kd757N-|6+}bxt+<?M
zV7?KT5%puIAf?nB%w(jbdYzeslmKaHd=vA7xQb|Oz86;#jm&pQS@kwE4Jo7EVx}Uc
z)tedG-)X4%SzJprGe3!Ih^FR8akYMrnTeEF?=mxxa_SvsI!Hs)HxMn&RB=7g!u%qx
zBbvv~K`N;anAu20^*%ETsQ}W@c)uADHxaGPba5lm%1lG5s*jm@NEP)FGZ(3>J_Ko~
z84cv)W;7ENGaV1CKVudm)zzoW0;HPygqaW0(DWdYY+9m3kY@B0K*TOVYN;=n#Yj!{
zIkO0<0n*U;cIG#6E78{cDsCa#m^M;Zea$RG>Zq@nrATe{B}hZfpW+UpgZV?;PP8|_
zi`(>f%u1xb`j%ON)KlLu%Rw5NzMJT5#frO#PF9S#ljs<`25F>zU{)gy)%VOQqyb1n
z<GWdz#JxmUD^A=)bg^7WQ}r{m4r!u(V%8#!)sG+zwW3uydstb-{X};wv$#)BV>Tkq
z)l_B!(oFrrtOsdmx<b%aG?SmQqV*h7VtJ&M8eujgE!A{p6Vd{tq45C=5jBFfqF><|
z3qml}a4rY@s_sk*w^lPi8ftmOLqsntUOY(jv`{gm+sqb(P%S2i;Ht?;APr4FO7yjI
zh)0M%R(A0)(K~iK(nkHpY(tXOugq421Zime04uk6oak@m5|0u6tei-D^(V6vX{Y{R
zb|7ul?;s7e@`$I1K~|!8k{D<uh$jpevj^#@#WK5*4q6Pe3#6gxzY{~Pyy6*Rh?OLs
zCI-juN4jX4n0-iREsoiXbOLE;{4gu(m^kYfr<%>LRz9S=mX-Mp>85324j^5%%peW5
z3W(>35mtVuw$*UUC!RGRC!FILFee|Vhn9_zK^mHVkr-_i5-$*=tb*csVr1+=gwYTt
zggC(;=K%%M(D-pyQSmY{)+!=iBF0#Sk)E1|IgA9fc;*noYA8rUtrFrjVuDp%yh@C>
ziiuZ@oXjz#x0ZuBiuBU5Ge<xgntqd*Y?Tsk5R<Hu;&ozT>`A1bmYX?&^wn}P$B{lD
z4UL~>l@)IjQ>`-MEn<pQ8X2hNVNN3hv_$3<(qBseX{c2}yhqHi%8Pf2=~g-Mj*-u)
zY&KZS>(sX&q$M$DKpL9<keF>%5+4w=tcv1&VrJ|g$gi5$$%Y)N{o<I|As`KnpJ!DS
z9}{z}D&iwzj#U{Mt`%U;Bg3@(%sIr#odjv9RYQD6EU>DJPl@?fHSvj2n7M?E)Cw^d
zkr7%!<^o7V(_a#cty<y>Vv$u-d`>Kky^4&{iZWM_(OMDaGBOIJq4CSCy5ehMsZ~dO
zMJ%yuBjdFa%ynd(R-CzpjMa*PG}LM!z9Uvx^~JZua;u*B#wg9)LMCdZn48E1tt4{;
zq@n2_iPcsk@dL5SYAC)ZR>s~%rf6lEJIG|M409Wq1k%v>byidHGqKicB7P#)SdEeC
zS_S4lGEFPb+(V{n<v<#0wGh*Y4OVk8l~`{z6TcXhnMcS>trGJPnW0r=9)L77J;PC5
zt;C2^+oz?NPHc>Qip<ffGEb1%S{3FoG7F@k@uG!^h7->5i@K8!)Eb$u)nJ|@^R(*B
zGi0t-4WywKDcVHPB1DUjEL=2=+RQ6tp;n7|i7e1+GA}?Hn*N>GX0;K&5nHWf@hh<<
z_ARnRtINDW7Hf5w*T^D}hQ{x-+KWGl9acN>2eI91i!9d~Fz=CNT7BjnvQ(?57x+63
zwK_>IVz<>%iY0bg9i$keG4lyosWoCgA}h3p%m<K$MluuotS(X}Vz1R%iX-;KrXp*!
zrpy;)wbq3BjI07_X#8(hcPT4z!0IMtA@*Bck@Z>&CLLL)HD}V0wOTWfhFX*aIpO#e
z$?fEG_K>m}tr;EJsI_7;kPTW(CIZsX2ug%3MnVY9q9vG6W36Z=DPtm=H9un@n?M>G
zf7t3Nd5A++K#C_0S}Y=IB=a?T3SsQ%M1l^`P^*uWlQ?SimU0kBtX@)fqb>6T*`l>!
zz9T^`nfV6N&`1Jt!s;jGCXQQurCh`@S1ht!YtP0Y+q8DfPh=}dL*q|d1EoB~DQkd~
zNSw6#BfGRtY#g#v>&Uv09a;yFhFU|Ud`@M`!BSr0j5SC~GP<%^kUd%#HZ!tY>&#{X
zX=udf)T#Yd@;ce<L#1Dcf4JPpeyuy34cVu4W3wWAK^hu=-Wo0yAkJCCr2NDm78jKO
zER6i7rLYikK<fe0P-~P_n7C+-lnN0StPxT{gJt8RmjEmpod{+TkcLKz5m&4+Qc>cv
zHCie{Tyo_=4r)Ew>_|upupUGMX=wa)YrIr~xMq!$iW66@vB(jv51R`)to3GdB8RkI
zAPu!9Nu`OK)<mfkal@J*l{EUZiO4anADe(2)%vozK^ht<N8GWdNM(uJ)?}#+am$q#
zIjId~laLeI05%VD92WmhL*wsT)1?Z;J!_g&p15mGMb2nL*k6#-+F&*xa!MNn(ok!b
zRGD~a&6Fw;53CtdMT2AWN2}qmKIC_8DC-4jXrvnP#F`^jB_3O|r7FZDS0UsNZ8%#n
zS|Ob+fP~?lf2X1G&#n1V4dR(KPpVElwdNuhv{7sk<h(YLEsUJgMu0TbS|rsbURn#K
zTEq)$fmG8N%N9p2X=B)8$VF{5TNI?Bk$S`%Yl&2scx^3~>JYD7rI4%Ic(x>RMH|PK
zKrX|-{hfx!zqgi44TyKvGO0fC)>?|(&?d2Ekn7q+wls20n*h>KYn9ZP_-L(^8WA6?
z6;eZEDq9}8rA=YWAvd+jY*~<oMw$^{tTj?o;<L3{YC?Q+RYLA+)7gs19c>z00l5v*
z(D-y~z0`t8v(`z?iBxMX@<5x#RzdD-Gug_>J#7X^L#<6xYa+wiD77LY)&{AiF_*25
zJksW{)sTnUY_=*$LnAn0MkN4YSeqq3p}T4!Pqq1MP2`C-kF9|`25D&gS4)ye!j4`7
z5SAq%FSJE$9pt&Tkgbh8(-wd<)Y>YwCB9o*q&CDiD<~x!OWFF!D{To|4|%CAX6u49
zG}3{Lv9?R?iJ#UssU7jd)d+d3EoU1dZ?t7>1LQSGLp^cUE~yjgvUW-x$yjR#@<Ch0
zHbLHNE7``#J8cC>L#@41S2DA;N9samvUW?IjkRoZ<de3BZH9c*R<lh(8XD<AX0!H7
z-N~%hKB*g-#nlQ))z-5ukuTahwgvJTq@f<zawHcCS-(jsq}w`xM6^w;A4%6XvaOLc
zZ39R{ElpxcG#bJok!T{1A+Q9ZJDv<jqL+>sNJAsN$n4fZsVC{NLQ;T?ceO#HT_hvX
zKoW@<APx28vW`f7$eh+;sW+L!I)r@FwzBPzui6&2En;gykcL{vrT%1sbxi6<=C+PX
zeT^M#N92dLo$Y{p*S4|kK^htvL?&4$rGaD~>x49bOmuZYW3^puXEa9J$#z11f;80g
zi*-gCLguqhOM}V0)+scTwwLXO#%X)luBc1f4bsr4V<mmjYG$N2TGQS*z^0&CwEb)k
zG_$sk?GDn=$Oy8a^@lW^ED*iJ1)1N)pl(fOX*8Sm8%v>CK^p2QVqK6%k%g`E(nzw9
zbq+-|jSZl%rm`#wX$nX~t;^C_vY2&A8bcPfE=r?~Lu_v}UOUM4LQyTm_5^8YWCB^z
zx+;w)OITN=ab$5<KQxDSgzbxF*ABCNP!C8$J!PyL(j>C9bzPcBma?v)xwYf$05q3&
zjO~x+)Q*BQ)VeKACCgd2q$y-s>!vi>IK>V|6Sb4<AT&Wc!43pzXk-Ri(Yh;5Co5QY
zq-kV%*RN<^?F>5<P0~)YL(n`R4fRy99!Rsu%GP~pCRxe4hkCUzI}H6r`v=RR`Ly44
zfQDL+rMYA^>yb2vtZF@!W*g_&k!XJH4|W9V)6TNPK^huaK-RRLO7qDY))Q$SS=}`T
zEu>vwN23L`^Xw?J07yeUb*vZCBC@viTv|xhvYw$uwae@{w1{?z9g7y$E`l`FdMzy_
z>sha)C1hRerL@?%#!f_wYggF`Xff>yJ07H=kriY^>#ek$Y+${SmXY;cQ_xb{4R$hG
zQoGJhLQ8-&)YHWJAgv-BTkoZnWFzYxT2{NwPD9IRx7ev@Y3*ioSN?YzYJHa0lFh77
z(i*a<^-)@F++$~=<+Z!)478kfhn)`6(8va|rIjkJCtFxwq;+I-*BrEx_JEy@R@CmZ
zv(O444fXh~h_s1pZKX>a$yQbxT2*_@&O@tckJ!0rW$htIL#=2!CvHVOFlJ>)n~i7e
zLbST}lwE*U)1I*NK^htflF3%|6hK<hi2%a21g)jLU>Bn`wdd?2v<65+J?*S-(pIvq
z^;OzJwy|uquJ)Q;hSt$uu}jg~+DnjzT0f;7WC!bqw4H2keV4Ww@7R@SeeEr~0<EXL
zVV8q6G_srQY{yEw$WC^Qw3F=UT7x#yKCr9NhT3~}71{u#p`LDbCTTC()sB<)kX>vS
z+En|@u0xw>pV+l%W9=hIL+z~60kVgkMcPkxw=+xoj5Ky5+FVOzH=xb5FYJ1dhDH>U
zwxjhNC_9_<8=2zb(N<c7-Hf)>(%DUD3y_9-0(SH(JZncMav2*!9SFdRs9)1r0d1{i
zfHc(hNQcN?cD!_u>}jJ?$gtThsB`ha22p1UnUz2q8aYb#wR1>E$Ub&<=`h*bwH<Av
zePg$w$=X+TD@uYi)HA@&EgdKO+qtA;WIsD6+Ftv~?nK*ZKiC~;TkShYL+w1$DRPjV
zD4iq++6mGL)5Y#VJBDJ}-DrnU47&@Yp^@Lop>|&B3^~M3l1`I@UHj24p-k*Pv~wtq
z-HUbtX{cwI?RBczaP}`wP2*qfd}#MjR`xfvTPO>A0PPye4AM}$fOL)=Vdr-$oDa8s
z(peL7l5(6%K#uBi>Or$INJAqR$<cNp=>j>*E-0NRN4gH8PVk2fp-u{r6V9QKCV!`)
zo^f_j=`uOiE+Snb$Jm9@o*@r=7!8Et*+Zz)45XoU3F#U+!7eUcCCA&vq$_4l_88hb
zl!HBr_6lWZkAO5Za+92Fmy&LflkAexb#kKXB-$^On>~T{4dr5wqkRzn-)X34nq5}9
zO-{ATNVmu-c4>5AC=YuY9S}-nPoe!o2_Ox%D@gaq8FqQ;E;-#UC*3jgIrZrWhw`$&
zqk}?8>=}@TMjn#0?Ml)Ea+Y0Dx=+q@{ee1l&>Yk1RKs^Fn+-wQ{hfw-=Gj%H$K+hQ
ziu8z_V^>CphYGOgQRm(m>^amCv>*+&Ye>(?1$K4mDLLP+COt6=vzO43p+f9MbVR5i
zdjX`Ok(cCRyO#8VTx8djo|6k*SJ5${qU;rPbf^e>865@EP|q^EuJoE*YS)ookxT5_
z==e|x_BuK)RGhtrjtvz9X{g;mdPlCX>q~FR<#s*kjaiz#g-#5WVsD}oLM7Q7APtRt
zBv;#wqz~jOyP@=+T<N-tP6?G|@1T=IW!T&3B#?%B*4a&^&*WOWiS&tFV>d>phbpl5
z(P^Rb>^*d9s2oT`?G{oRxxsEOrIPFIX3`h4GW!Uf8LGrSL}!F5vJXHS8p$9x+pVOC
zGmYO;N+&nEo}zO?RoN%#>`)c<F**wYXsAcD9Rubh_4p;7<n7kz{7?<{IXW*?oqdMR
z4OIhas7*>X8MK|?kMqDu0Wxc|uh4~|TI@@7L8vDC0;Hjl@8mYSjr5J&Y9~uy$t|w8
z=#o%f_6@o?REK?yE&^$&XQ$m>`bqAv+etsj?RHyqd8h&V9$gly&%Q&KhU$Sd)b12?
zk-P1V!B}#a-60rbHfBGeD?^RgkLZd}L-qqmLo+gy`|K{kOypj>b1;tF<4Q%>gqpHn
z(AA+P>}PZpNJBlp+1-O#$pdz`U>0(}-4$IQYQd(X>q5=hG<0pK8AwBI$MBMhof34D
zvfU$?&1}u;=*CbhHUr%dYRN`G8k&KUA=?SwlbY?MaFVLaLirHJn&{?`pEb};$h5!H
zP|snzXV60)vID_*@}SK+rvQ@u>P!R>tnH+O;vfyR`vh~6NA2Fh9OMzZS1`NTmi>Wl
z3AJIrqrp%z`wgU_842VGyI(LjdED+B%taoHi$%AG+6Q9LZJ~DTPjo9tLp`VMfx$fF
zDSJRLkvwVlM|Xuf1>(@1p^gC;x+By9q@nhZU_SD9dvGu>dBz?TOftI$vY>lHT>_cW
z-J#BbOdt)-@R4WjUxQvUY!40oLjEJpjqVS14`f64g}Mc@qI*Fa>N#%@4;CQL*~5bQ
z$v<okb?P|;VAQGV8Gumd-Yy^wwMPXDlNarg!9wH(dql9H$p+%16S)Btb<+6*2uMRS
ziji0BF~OqbWqWk62ze<k2YN8nGmsq(g#rN&s)01rbKM>vEJ0qg#|4X%SM9Orkx-vN
zF7$AycOWNvDAWt2q4uO;Y4WB$F<6ScVNVE_H2Vh<(PN>0fduqusBa)QNJBHqk$3DV
z!LsCSdvdT0c`Gh2dNMRHkc6HH4G82xk0SsL_1w3o2P=^G>}kRB<Xw9zdL}d^@C$l6
zG&qnCJrx=R(olO=urm43o*Aq}KCov5E1Fy&f3&7?z=!@G8XE9|zB8j5`NW<RtV%w%
zX9ug0kKziUe}sky3P$g97bt*+LEq_lZqE<aAfMUug4M~V_FVKrXjGsGdOkEVP#8TI
z8UgxFdr`19`O;n(tVO=C7X)jXV*|y}OQA7=V(7)t=s;1>cV^Th-`Gonb;;NE;$R)}
zRa`0bYG{0*Bzh$@E>Hr!4Ej#bdwY4X0r}2e7OYRcwU?qdLX!ez(CeXzfzs%;&;-zT
z+N*+%$&dERU?cK_y&~AqoEj*P-U>|#ltXWZCI`xbzB8j4`NdunY)XE%R|lJrpW-T^
zcSF+y712APX@LspZP0gm((U!Z7G#>eF4&w*wb!B#LbC!@(EFj8fy(H;&<xOb+M9x{
z$qajAuoW4xHw0Una|6}UN1-`^YUsnz>_An}cV^(EY4bshH0;emKdHynLZ61_2Wp~E
zLh}MO(8r+f^nA6YAW7P`7$iu`7SI==MS(i#^U%UTZS+}a0q8sJt--eBcY8~)4f)Lu
z29wRDf%@pH(2_ts^kryqpf2b;Gdd*4*xQ5c$)EPNU_0_hTqE>tXnCL^`X;n2&;Wf6
z`p%d*dsnbivdi8X?3f&D??69<Rt1`%??WpCjnQ|Z6`=35_XfKrXSVkQyCi3_cLzJ0
zYXi;EPoXt|X6VPz>OfP_cV_fR&Sviqc2CY~?+bQI&Jx!OO%1INv_!vz)&*LipF!Ul
z13UG2sAS0gEtrz*why3@(58SNO%H7hv_{iH8$jRbgkacY)Jeo)k`aev(WVd}&}da4
z9F5k7#z5bh(JMK-eK6QF*<*)-f#mqOHfS^ul#E8xfk@N<eP>KA`$(`)a!&hjuy=9}
z`w;pqv^CHU{TkX5Xp1^sfWFf{9_*i-U>^(iOU`W{4fZv61UjNWLfZo!(C?vbf%c&9
z%ovoMWS<NUOwMDU2o6Y2jO!8~8`>4<93K<f8R&%m1bt`BFZP+>kmP*!>EPhxy!NU1
zOrgDjZt-!UJ%O(AuF!7KcjhhRa^)?VX-CJOJ2Q>#ba|Ka_(ax(9+_rlyWb<hmAClJ
z*n6=t|6HjcJ3`A8{pGB53n|0?`763owk-c#C5{=`C;F4OWZanHL!v*?r?Wu+uTMk&
zYR!)98|VButw;Co75(|M^KAcYogK_Q{2vSQmdp;`$~QZ>L;Y8mWfQX{?5L9%6O(wL
zPQtmGiB~)^F?q`-UKo+^%oSa_XLQ0P*MBYjHa1~P(dbfPQ*44<^uHG0IvtxZw@7sP
zlUK0`w+jE)@+0$H2@n44W%WW=!lQq6qc3(P+&UZ^ZE)d2T*3|K;XhyY!2HY!$Np^b
zWnt!oPycH1Xi4S--TJe|jd@xBd>C!vyovQKkzqp_ZU8kb(6`qhCLz1Cu$U_;=g@&n
zzrj?m!GR$?`wi|x^{Z0(uP1W!>=zggEJyxBgJHwDVlkN4DLIf3eLDM~exv`p74Rg`
z=f6Fh#c7%g7)sCK)vG#14Wh64_p$u-M0`TQ1XogCuu-o-zd_8vN|kFCgPej0SlRB;
z-_Ga~Hy6n3%{#w%Hm~zxREx=$CGUTF?+Gqvy~NOn#K^GX+3F_!xmhRuxwR)gah3eH
zKV$NGohl4537MRp56y8WlukT1!=2DNruc;sf4)GrESd5q0q@uOZ2#*2QDWlctj?On
zzYX&y{kc~p{kfkc#{adW{PmZ!c_n0W{&W5LI)A>>3+I)}CN7`pPG}xe{M%S>0`kAS
z73ZV<>!nWJk4?PS(pk4|_3VG{WJ%He?cYbgc&<C4eBwQ)&-yXNoh|ggJN+O3mVb45
z%<1w;v*@b-YoEuQJ}V}!p7m#+&er{Bpa1D2`PVp~<Vie{wYaltadnQ%8y%~&J10f=
z?*H&HeEPHdTYq)$>?GcVEdTr*{?j(}*Qi$;uEZUNtGKheGwgr=05;BZCsd6o?(9;3
z_Ved3|HBsdS2vsUCLVJ~*Dbzzc4lXs|6z33oc=1t6nAz-Z$kV($MwH&rhj#NFFh{t
zLwa1aQS?LoA4j32|8ea4_jfRLzVi+em(O-5Y>ACYoc$PfHsa#h5>L%==Z+qvl9GmS
zeabUK8E4Ng|K~xV{K#J1uzo`ZCuOcuzH<4BF-fKV#~Q<a9a!EuUi><u*YNV(kWu9a
z^%~CfWQH?Ii!zTZJbcX1UWIEHcK-E71cnzbTex?>UITj$D_pzVxWa>(LD8px-O}0g
zoz?!?tl!|_z54XxoCSkN3>?U?1Dys=Prb?({;wA)TzlMv31$D)+242n|GU0`fjMvC
zzX#_0;T!}1&&LHG>;C`u^!Io8e>EuJ4Mq=^|2?kZ{RZ_KHk=tW^#A-#{(Wfw*WH1`
z``7XMzX#d#uVp2x)GS-2)c@B&drXK)YW>d*wcH@C;6JzQf|bfws$9NO!IJEVegk_J
ztjtskRIDDTR<k!#t7eVfHLEd9<;s<6RAhTqs@k(w<r+P!_3mB0SB;*f^3HcoW&iR2
Pm}XgHVv-xRt@Qr@<gcId

literal 0
HcmV?d00001

diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.reference b/tests/queries/0_stateless/02998_native_parquet_reader.reference
new file mode 100644
index 00000000000..38dd9f02b8b
--- /dev/null
+++ b/tests/queries/0_stateless/02998_native_parquet_reader.reference
@@ -0,0 +1,2000 @@
+103002316	1646595280	hsn	dxj	wrm	1987-05-04 19:24:06.618814000	1938-12-29 14:03:44.995783000	-10371879330867684414581450918534810.916
+1548158706	-1216519640	rdx	xsn	sey	1942-04-24 07:22:01.629877000	1963-05-31 09:22:00.597388000	9453586064049253908450649688266944.965
+-1409329494	215463808	ytf	idx	grm	1964-04-02 15:32:10.118860000	1987-11-29 12:21:52.464881000	20816451926961221076776482452655249.999
+-1328718984	215070586	toj	ykf	sny	1962-12-25 19:25:23.099217000	1987-09-25 08:35:27.030011000	3827321890837346905750385640207228.295
+116634012	1646857428	eyt	toa	fau	1991-01-19 01:53:43.664105000	1939-01-24 15:34:19.169731000	-18887700638539487859059986313378243.468
+1454047574	-1216257492	ugb	eyk	sni	1976-06-16 10:05:27.546241000	1963-07-22 12:23:08.945284000	13732428483989527846770435107306259.709
+1534658084	-1216650714	pkv	lgb	oau	1938-08-21 01:37:41.671559000	1963-05-18 08:36:43.510414000	18010946380424307642205251259343751.221
+-1315087288	215332734	aug	pbv	bvh	1966-09-11 01:55:00.144508000	1987-10-21 10:06:01.203959000	16579148515724197427732763209849308.959
+22522880	214677364	hcw	alg	oju	1952-02-07 10:06:44.026644000	1939-03-17 18:35:27.517627000	-14608858218599213920740200894338928.724
+103133390	1646726354	lwr	hcn	bvq	1987-05-17 20:09:23.705788000	1939-01-11 14:49:02.082757000	-10330340322164434125305384742301437.212
+1548289780	-1216388566	vhc	cwr	gbv	1942-05-07 08:07:18.716851000	1963-06-13 10:07:17.684362000	9495125072752504197726715864500318.669
+-1409198420	-1217043936	dxj	mhc	kvq	\N	1987-12-12 13:07:09.551855000	20857990935664471366052548628888623.703
+-1328587910	215201660	xsn	doj	wrd	1964-04-15 16:17:27.205834000	1987-10-08 09:20:44.116985000	3868860899540597195026451816440601.999
+116765086	1646988502	idx	xse	cwi	1963-01-07 20:10:40.186191000	1939-02-06 16:19:36.256705000	-18846161629836237569783920137144869.764
+1454178648	1646333132	ykf	ido	wrm	1991-02-01 02:39:00.751079000	1938-12-29 14:03:44.995783000	13773967492692778136046501283539633.413
+1534789158	-1216519640	toa	pkf	sey	1976-06-29 10:50:44.633215000	1963-05-31 09:22:00.597388000	18052485389127557931481317435577124.925
+-1314956214	215463808	eyk	tfa	xje	1938-09-03 02:22:58.758533000	1987-11-03 10:51:18.290933000	16620687524427447717008829386082682.663
+22653954	214808438	lgb	epk	sny	1966-09-24 02:40:17.231482000	1987-09-25 08:35:27.030011000	-14567319209895963631464134718105555.02
+103264464	1646857428	pbv	lgr	fau	1952-02-20 10:52:01.113618000	1939-01-24 15:34:19.169731000	-10288801313461183836029318566068063.508
+1548420854	-1216257492	alg	gbv	kfa	1987-05-30 20:54:40.792762000	1963-06-26 10:52:34.771336000	9536664081455754487002782040733692.373
+-1409067346	-1216912862	hcn	qlg	oau	1942-05-20 08:52:35.803825000	1963-05-18 08:36:43.510414000	20899529944367721655328614805121997.407
+-1328456836	215332734	cwr	hsn	bvh	1964-04-28 17:02:44.292808000	1987-10-21 10:06:01.203959000	3910399908243847484302517992673975.703
+116896160	214677364	mhc	cwi	gbm	1963-01-20 20:55:57.273165000	1939-02-19 17:04:53.343679000	-18804622621132987280507853960911496.06
+1454309722	1646464206	doj	mhs	bvq	1991-02-14 03:24:17.838053000	1939-01-11 14:49:02.082757000	13815506501396028425322567459773007.117
+1534920232	-1216388566	xse	toj	gbv	1976-07-12 11:36:01.720189000	1963-06-13 10:07:17.684362000	18094024397830808220757383611810498.629
+-1314825140	-1217043936	ido	xje	cni	1938-09-16 03:08:15.845507000	1987-11-16 11:36:35.377907000	16662226533130698006284895562316056.367
+22785028	214939512	pkf	ito	wrd	1966-10-07 03:25:34.318456000	1987-10-08 09:20:44.116985000	-14525780201192713342188068541872181.316
+103395538	1646988502	tfa	pkv	cwi	1952-03-04 11:37:18.200592000	1939-02-06 16:19:36.256705000	-10247262304757933546753252389834689.804
+1548551928	1646333132	epk	kfa	oje	1987-06-12 21:39:57.879736000	1963-07-09 11:37:51.858310000	9578203090159004776278848216967066.077
+-1408936272	-1216781788	lgr	upk	sey	1942-06-02 09:37:52.890799000	1963-05-31 09:22:00.597388000	20941068953070971944604680981355371.111
+-1328325762	215463808	gbv	lwr	xje	1964-05-11 17:48:01.379782000	1987-11-03 10:51:18.290933000	3951938916947097773578584168907349.407
+117027234	214808438	qlg	gbm	kfq	1963-02-02 21:41:14.360139000	1939-03-04 17:50:10.430653000	-18763083612429736991231787784678122.356
+1454440796	1646595280	hsn	qlw	fau	1991-02-27 04:09:34.925027000	1939-01-24 15:34:19.169731000	13857045510099278714598633636006380.821
+1535051306	-1216257492	cwi	xsn	kfa	1976-07-25 12:21:18.807163000	1963-06-26 10:52:34.771336000	18135563406534058510033449788043872.333
+-1314694066	-1216912862	mhs	cni	grm	1938-09-29 03:53:32.932481000	1987-11-29 12:21:52.464881000	16703765541833948295560961738549430.071
+22916102	215070586	toj	mxs	bvh	1966-10-20 04:10:51.405430000	1987-10-21 10:06:01.203959000	-14484241192489463052912002365638807.612
+103526612	214677364	xje	toa	gbm	1952-03-17 12:22:35.287566000	1939-02-19 17:04:53.343679000	-10205723296054683257477186213601316.1
+1548683002	1646464206	ito	oje	sni	1987-06-25 22:25:14.966710000	1963-07-22 12:23:08.945284000	9619742098862255065554914393200439.781
+-1408805198	-1216650714	pkv	yto	gbv	1942-06-15 10:23:09.977773000	1963-06-13 10:07:17.684362000	20982607961774222233880747157588744.815
+-1328194688	-1217043936	kfa	pbv	cni	1964-05-24 18:33:18.466755000	1987-11-16 11:36:35.377907000	3993477925650348062854650345140723.111
+117158308	214939512	upk	kfq	oju	1963-02-15 22:26:31.447112000	1939-03-17 18:35:27.517627000	-18721544603726486701955721608444748.652
+1454571870	1646726354	lwr	upb	cwi	1991-03-12 04:54:52.012001000	1939-02-06 16:19:36.256705000	13898584518802529003874699812239754.525
+1535182380	1646333132	gbm	cwr	oje	1976-08-07 13:06:35.894137000	1963-07-09 11:37:51.858310000	18177102415237308799309515964277246.037
+-1314562992	-1216781788	qlw	grm	kvq	1938-10-12 04:38:50.019455000	1987-12-12 13:07:09.551855000	16745304550537198584837027914782803.775
+23047176	215201660	xsn	qcw	xje	1966-11-02 04:56:08.492404000	1987-11-03 10:51:18.290933000	-14442702183786212763635936189405433.908
+103657686	214808438	cni	xse	kfq	1952-03-30 13:07:52.374540000	1939-03-04 17:50:10.430653000	-10164184287351432968201120037367942.396
+1548814076	1646595280	mxs	sni	wrm	1987-07-08 23:10:32.053684000	1938-12-29 14:03:44.995783000	9661281107565505354830980569433813.485
+-1408674124	-1216519640	toa	dxs	kfa	1942-06-28 11:08:27.064746000	1963-06-26 10:52:34.771336000	21024146970477472523156813333822118.519
+-1328063614	-1216912862	oje	tfa	grm	1964-06-06 19:18:35.553729000	1987-11-29 12:21:52.464881000	4035016934353598352130716521374096.815
+117289382	215070586	yto	oju	sny	1963-02-28 23:11:48.534086000	1987-09-25 08:35:27.030011000	-18680005595023236412679655432211374.948
+1454702944	1646857428	pbv	ytf	gbm	1991-03-25 05:40:09.098975000	1939-02-19 17:04:53.343679000	13940123527505779293150765988473128.229
+1535313454	1646464206	kfq	gbv	sni	1976-08-20 13:51:52.981111000	1963-07-22 12:23:08.945284000	18218641423940559088585582140510619.741
+-1314431918	-1216650714	upb	\N	oau	1938-10-25 05:24:07.106429000	1963-05-18 08:36:43.510414000	16786843559240448874113094091016177.479
+23178250	215332734	cwr	kvq	cni	1966-11-15 05:41:25.579378000	1987-11-16 11:36:35.377907000	-14401163175082962474359870013172060.204
+103788760	214939512	grm	ugb	oju	1952-04-12 13:53:09.461514000	1939-03-17 18:35:27.517627000	-10122645278648182678925053861134568.692
+1548945150	1646726354	qcw	cwi	bvq	1987-07-21 23:55:49.140658000	1939-01-11 14:49:02.082757000	9702820116268755644107046745667187.189
+-1408543050	-1216388566	xse	wrm	oje	1942-07-11 11:53:44.151720000	1963-07-09 11:37:51.858310000	21065685979180722812432879510055492.223
+-1327932540	-1216781788	sni	hcw	kvq	1964-06-19 20:03:52.640703000	1987-12-12 13:07:09.551855000	4076555943056848641406782697607470.519
+117420456	215201660	dxs	xje	wrd	1963-03-13 23:57:05.621060000	1987-10-08 09:20:44.116985000	-18638466586319986123403589255978001.244
+1454834018	1646988502	tfa	sny	kfq	1991-04-07 06:25:26.185949000	1939-03-04 17:50:10.430653000	13981662536209029582426832164706501.933
+1535444528	1646595280	oju	dxj	wrm	1976-09-02 14:37:10.068085000	1938-12-29 14:03:44.995783000	18260180432643809377861648316743993.445
+-1314300844	-1216519640	ytf	kfa	sey	1938-11-07 06:09:24.193403000	1963-05-31 09:22:00.597388000	16828382567943699163389160267249551.183
+23309324	215463808	gbv	oau	grm	1966-11-28 06:26:42.666352000	1987-11-29 12:21:52.464881000	-14359624166379712185083803836938686.5
+103919834	215070586	kvq	ykf	sny	1952-04-25 14:38:26.548488000	1987-09-25 08:35:27.030011000	-10081106269944932389648987684901194.988
+1549076224	1646857428	ugb	gbm	fau	1987-08-04 00:41:06.227632000	1939-01-24 15:34:19.169731000	9744359124972005933383112921900560.893
+-1408411976	-1216257492	cwi	bvq	sni	1942-07-24 12:39:01.238694000	1963-07-22 12:23:08.945284000	21107224987883973101708945686288865.927
+-1327801466	-1216650714	wrm	lgb	oau	1964-07-02 20:49:09.727677000	1963-05-18 08:36:43.510414000	4118094951760098930682848873840844.223
+117551530	215332734	hcw	cni	bvh	1963-03-27 00:42:22.708034000	1987-10-21 10:06:01.203959000	-18596927577616735834127523079744627.54
+1454965092	214677364	xje	wrd	oju	1991-04-20 07:10:43.272923000	1939-03-17 18:35:27.517627000	14023201544912279871702898340939875.637
+1535575602	1646726354	sny	hcn	bvq	1976-09-15 15:22:27.155059000	1939-01-11 14:49:02.082757000	18301719441347059667137714492977367.149
+-1314169770	-1216388566	dxj	oje	gbv	1938-11-20 06:54:41.280377000	1963-06-13 10:07:17.684362000	16869921576646949452665226443482924.887
+23440398	-1217043936	kfa	sey	kvq	1966-12-11 07:11:59.753326000	1987-12-12 13:07:09.551855000	-14318085157676461895807737660705312.796
+104050908	215201660	oau	doj	wrd	1952-05-08 15:23:43.635462000	1987-10-08 09:20:44.116985000	-10039567261241682100372921508667821.284
+1549207298	1646988502	ykf	kfq	cwi	1987-08-17 01:26:23.314606000	1939-02-06 16:19:36.256705000	9785898133675256222659179098133934.597
+-1408280902	1646333132	gbm	fau	wrm	1942-08-06 13:24:18.325668000	1938-12-29 14:03:44.995783000	21148763996587223390985011862522239.631
+-1327670392	-1216519640	bvq	pkf	sey	1964-07-15 21:34:26.814651000	1963-05-31 09:22:00.597388000	4159633960463349219958915050074217.927
+117682604	215463808	lgb	grm	xje	1963-04-09 01:27:39.795008000	1987-11-03 10:51:18.290933000	-18555388568913485544851456903511253.836
+1455096166	214808438	cni	bvh	sny	1991-05-03 07:56:00.359897000	1987-09-25 08:35:27.030011000	14064740553615530160978964517173249.341
+1535706676	1646857428	wrd	lgr	fau	1976-09-28 16:07:44.242033000	1939-01-24 15:34:19.169731000	18343258450050309956413780669210740.853
+-1314038696	-1216257492	hcn	sni	kfa	1938-12-03 07:39:58.367351000	1963-06-26 10:52:34.771336000	16911460585350199741941292619716298.591
+23571472	-1216912862	oje	wid	oau	1966-12-24 07:57:16.840300000	1963-05-18 08:36:43.510414000	-14276546148973211606531671484471939.092
+104181982	215332734	sey	hsn	bvh	1952-05-21 16:09:00.722436000	1987-10-21 10:06:01.203959000	-9998028252538431811096855332434447.58
+1549338372	214677364	doj	oju	gbm	1987-08-30 02:11:40.401579000	1939-02-19 17:04:53.343679000	9827437142378506511935245274367308.301
+-1408149828	1646464206	kfq	jey	bvq	1942-08-19 14:09:35.412642000	1939-01-11 14:49:02.082757000	21190303005290473680261078038755613.335
+-1327539318	-1216388566	fau	toj	gbv	1964-07-28 22:19:43.901625000	1963-06-13 10:07:17.684362000	4201172969166599509234981226307591.631
+117813678	-1217043936	pkf	kvq	cni	1963-04-22 02:12:56.881982000	1987-11-16 11:36:35.377907000	-18513849560210235255575390727277880.132
+1455227240	214939512	grm	fal	wrd	1991-05-16 08:41:17.446871000	1987-10-08 09:20:44.116985000	14106279562318780450255030693406623.045
+1535837750	1646988502	bvh	pkv	cwi	1976-10-11 16:53:01.329007000	1939-02-06 16:19:36.256705000	18384797458753560245689846845444114.557
+-1313907622	1646333132	lgr	wrm	oje	1938-12-16 08:25:15.454325000	1963-07-09 11:37:51.858310000	16952999594053450031217358795949672.295
+23702546	-1216781788	sni	\N	sey	1967-01-06 08:42:33.927274000	1963-05-31 09:22:00.597388000	-14235007140269961317255605308238565.388
+104313056	215463808	wid	bmh	xje	1952-06-03 16:54:17.809410000	1987-11-03 10:51:18.290933000	-9956489243835181521820789156201073.876
+1549469446	214808438	hsn	lwr	kfq	1987-09-12 02:56:57.488553000	1939-03-04 17:50:10.430653000	9868976151081756801211311450600682.005
+-1408018754	1646595280	oju	sny	fau	1942-09-01 14:54:52.499616000	1939-01-24 15:34:19.169731000	21231842013993723969537144214988987.039
+-1327408244	-1216257492	jey	nid	kfa	1964-08-10 23:05:00.988599000	1963-06-26 10:52:34.771336000	4242711977869849798511047402540965.335
+117944752	-1216912862	toj	xsn	grm	1963-05-05 02:58:13.968956000	1987-11-29 12:21:52.464881000	-18472310551506984966299324551044506.428
+1455358314	215070586	kvq	oau	bvh	1991-05-29 09:26:34.533845000	1987-10-21 10:06:01.203959000	14147818571022030739531096869639996.749
+1535968824	214677364	fal	jep	gbm	1976-10-24 17:38:18.415981000	1939-02-19 17:04:53.343679000	18426336467456810534965913021677488.261
+-1313776548	1646464206	pkv	toa	sni	1938-12-29 09:10:32.541299000	1963-07-22 12:23:08.945284000	16994538602756700320493424972183045.999
+23833620	-1216650714	wrm	bvq	gbv	1967-01-19 09:27:51.014248000	1963-06-13 10:07:17.684362000	-14193468131566711027979539132005191.684
+104444130	-1217043936	bmh	fql	cni	1952-06-16 17:39:34.896384000	1987-11-16 11:36:35.377907000	-9914950235131931232544722979967700.172
+1549600520	214939512	lwr	pbv	oju	1987-09-25 03:42:14.575527000	1939-03-17 18:35:27.517627000	\N
+-1407887680	1646726354	sny	wrd	cwi	1942-09-14 15:40:09.586590000	1939-02-06 16:19:36.256705000	9910515159785007090487377626834055.709
+-1327277170	1646333132	nid	rmh	oje	1964-08-23 23:50:18.075573000	1963-07-09 11:37:51.858310000	5733090138320292352297426736847.527
+118075826	-1216781788	xsn	cwr	kvq	1963-05-18 03:43:31.055930000	1987-12-12 13:07:09.551855000	4284250986573100087787113578774339.039
+1455489388	215201660	oau	sey	xje	1991-06-11 10:11:51.620819000	1987-11-03 10:51:18.290933000	-18430771542803734677023258374811132.724
+1536099898	214808438	jep	nit	kfq	1976-11-06 18:23:35.502955000	1939-03-04 17:50:10.430653000	14189357579725281028807163045873370.453
+-1313645474	1646595280	toa	xse	wrm	1939-01-11 09:55:49.628273000	1938-12-29 14:03:44.995783000	18467875476160060824241979197910861.965
+23964694	-1216519640	bvq	fau	kfa	1967-02-01 10:13:08.101222000	1963-06-26 10:52:34.771336000	17036077611459950609769491148416419.703
+104575204	-1216912862	fql	jup	grm	1952-06-29 18:24:51.983358000	1987-11-29 12:21:52.464881000	-14151929122863460738703472955771817.98
+1549731594	215070586	pbv	tfa	sny	1987-10-08 04:27:31.662501000	1987-09-25 08:35:27.030011000	-9873411226428680943268656803734326.468
+-1407756606	1646857428	wrd	bvh	gbm	1942-09-27 16:25:26.673564000	1939-02-19 17:04:53.343679000	9952054168488257379763443803067429.413
+-1327146096	1646464206	rmh	vql	sni	1964-09-06 00:35:35.162547000	1963-07-22 12:23:08.945284000	47272098841570581628363602970221.231
+118206900	-1216650714	cwr	gbv	oau	\N	1963-05-18 08:36:43.510414000	4325789995276350377063179755007712.743
+1455620462	215332734	sey	wid	cni	1963-05-31 04:28:48.142904000	1987-11-16 11:36:35.377907000	-18389232534100484387747192198577759.02
+1536230972	214939512	nit	rmx	oju	1991-06-24 10:57:08.707793000	1939-03-17 18:35:27.517627000	14230896588428531318083229222106744.157
+-1313514400	1646726354	xse	cwi	bvq	1976-11-19 19:08:52.589929000	1939-01-11 14:49:02.082757000	18509414484863311113518045374144235.669
+24095768	-1216388566	fau	jey	oje	1939-01-24 10:41:06.715247000	1963-07-09 11:37:51.858310000	17077616620163200899045557324649793.407
+104706278	-1216781788	jup	nyt	kvq	1967-02-14 10:58:25.188196000	1987-12-12 13:07:09.551855000	-14110390114160210449427406779538444.276
+1549862668	215201660	tfa	xje	wrd	1952-07-12 19:10:09.070331000	1987-10-08 09:20:44.116985000	-9831872217725430653992590627500952.764
+-1407625532	1646988502	bvh	fal	kfq	1987-10-21 05:12:48.749475000	1939-03-04 17:50:10.430653000	9993593177191507669039509979300803.117
+-1327015022	1646595280	vql	aup	wrm	1942-10-10 17:10:43.760538000	1938-12-29 14:03:44.995783000	88811107544820870904429779203594.935
+118337974	-1216519640	gbv	kfa	sey	1964-09-19 01:20:52.249521000	1963-05-31 09:22:00.597388000	4367329003979600666339245931241086.447
+1455751536	215463808	wid	bmh	grm	1963-06-13 05:14:05.229878000	1987-11-29 12:21:52.464881000	-18347693525397234098471126022344385.316
+1536362046	215070586	rmx	vqc	sny	1991-07-07 11:42:25.794767000	1987-09-25 08:35:27.030011000	14272435597131781607359295398340117.861
+-1313383326	1646857428	cwi	gbm	fau	1976-12-02 19:54:09.676903000	1939-01-24 15:34:19.169731000	18550953493566561402794111550377609.373
+24226842	-1216257492	jey	nid	sni	1939-02-06 11:26:23.802221000	1963-07-22 12:23:08.945284000	17119155628866451188321623500883167.111
+104837352	-1216650714	nyt	rdx	oau	1967-02-27 11:43:42.275169000	1963-05-18 08:36:43.510414000	-14068851105456960160151340603305070.572
+1549993742	215332734	xje	cni	bvh	1952-07-25 19:55:26.157305000	1987-10-21 10:06:01.203959000	-9790333209022180364716524451267579.06
+-1407494458	214677364	fal	jep	oju	1987-11-03 05:58:05.836449000	1939-03-17 18:35:27.517627000	10035132185894757958315576155534176.821
+-1326883948	1646726354	aup	eyt	bvq	1942-10-23 17:56:00.847512000	1939-01-11 14:49:02.082757000	130350116248071160180495955436968.639
+118469048	-1216388566	kfa	oje	gbv	1964-10-02 02:06:09.336495000	1963-06-13 10:07:17.684362000	4408868012682850955615312107474460.151
+1455882610	-1217043936	bmh	fql	kvq	1963-06-26 05:59:22.316852000	1987-12-12 13:07:09.551855000	-18306154516693983809195059846111011.612
+1536493120	215201660	vqc	aug	wrd	1991-07-20 12:27:42.881741000	1987-10-08 09:20:44.116985000	14313974605835031896635361574573491.565
+-1313252252	1646988502	gbm	kfq	cwi	1976-12-15 20:39:26.763877000	1939-02-06 16:19:36.256705000	18592492502269811692070177726610983.077
+24357916	1646333132	nid	rmh	wrm	1939-02-19 12:11:40.889195000	1938-12-29 14:03:44.995783000	17160694637569701477597689677116540.815
+104968426	-1216519640	rdx	vhc	sey	1967-03-12 12:28:59.362143000	1963-05-31 09:22:00.597388000	-14027312096753709870875274427071696.868
+1550124816	215463808	cni	grm	xje	1952-08-07 20:40:43.244279000	1987-11-03 10:51:18.290933000	-9748794200318930075440458275034205.356
+-1407363384	214808438	jep	nit	sny	1987-11-16 06:43:22.923423000	1987-09-25 08:35:27.030011000	10076671194598008247591642331767550.525
+-1326752874	1646857428	eyt	idx	fau	1942-11-05 18:41:17.934486000	1939-01-24 15:34:19.169731000	171889124951321449456562131670342.343
+118600122	-1216257492	oje	sni	kfa	1964-10-15 02:51:26.423469000	1963-06-26 10:52:34.771336000	4450407021386101244891378283707833.855
+1456013684	-1216912862	fql	jup	oau	1963-07-09 06:44:39.403826000	1963-05-18 08:36:43.510414000	-18264615507990733519918993669877637.908
+1536624194	215332734	aug	eyk	bvh	1991-08-02 13:12:59.968715000	1987-10-21 10:06:01.203959000	14355513614538282185911427750806865.269
+-1313121178	214677364	kfq	oju	gbm	1976-12-28 21:24:43.850851000	1939-02-19 17:04:53.343679000	18634031510973061981346243902844356.781
+24488990	1646464206	rmh	vql	bvq	1939-03-04 12:56:57.976169000	1939-01-11 14:49:02.082757000	17202233646272951766873755853349914.519
+105099500	-1216388566	vhc	alg	gbv	1967-03-25 13:14:16.449117000	1963-06-13 10:07:17.684362000	-13985773088050459581599208250838323.164
+1550255890	-1217043936	grm	kvq	cni	1952-08-20 21:26:00.331253000	1987-11-16 11:36:35.377907000	-9707255191615679786164392098800831.652
+-1407232310	214939512	nit	rmx	wrd	1987-11-29 07:28:40.010397000	1987-10-08 09:20:44.116985000	10118210203301258536867708508000924.229
+-1326621800	1646988502	idx	mhc	cwi	1942-11-18 19:26:35.021460000	1939-02-06 16:19:36.256705000	213428133654571738732628307903716.047
+118731196	1646333132	sni	wrm	oje	1964-10-28 03:36:43.510443000	1963-07-09 11:37:51.858310000	4491946030089351534167444459941207.559
+1456144758	-1216781788	jup	nyt	sey	1963-07-22 07:29:56.490800000	1963-05-31 09:22:00.597388000	-18223076499287483230642927493644264.204
+1536755268	215463808	eyk	ido	xje	1991-08-15 13:58:17.055689000	1987-11-03 10:51:18.290933000	14397052623241532475187493927040238.973
+-1312990104	214808438	oju	sny	kfq	1977-01-10 22:10:00.937825000	1939-03-04 17:50:10.430653000	18675570519676312270622310079077730.485
+24620064	1646595280	vql	aup	fau	1939-03-17 13:42:15.063143000	1939-01-24 15:34:19.169731000	17243772654976202056149822029583288.223
+105230574	-1216257492	alg	epk	kfa	1967-04-07 13:59:33.536091000	1963-06-26 10:52:34.771336000	-13944234079347209292323142074604949.46
+1550386964	-1216912862	kvq	oau	grm	1952-09-02 22:11:17.418227000	1987-11-29 12:21:52.464881000	-9665716182912429496888325922567457.948
+-1407101236	215070586	rmx	vqc	bvh	1987-12-12 08:13:57.097371000	1987-10-21 10:06:01.203959000	10159749212004508826143774684234297.933
+-1326490726	214677364	mhc	qlg	gbm	1942-12-01 20:11:52.108434000	1939-02-19 17:04:53.343679000	254967142357822028008694484137089.751
+118862270	1646464206	wrm	bvq	sni	1964-11-10 04:22:00.597417000	1963-07-22 12:23:08.945284000	4533485038792601823443510636174581.263
+1456275832	-1216650714	nyt	rdx	gbv	1963-08-04 08:15:13.577774000	1963-06-13 10:07:17.684362000	-18181537490584232941366861317410890.5
+1536886342	-1217043936	ido	mhs	cni	1991-08-28 14:43:34.142663000	1987-11-16 11:36:35.377907000	14438591631944782764463560103273612.677
+-1312859030	214939512	sny	wrd	oju	1977-01-23 22:55:18.024798000	1939-03-17 18:35:27.517627000	18717109528379562559898376255311104.189
+24751138	1646726354	aup	eyt	cwi	1939-03-30 14:27:32.150116000	1939-02-06 16:19:36.256705000	17285311663679452345425888205816661.927
+105361648	1646333132	epk	ito	oje	1967-04-20 14:44:50.623065000	1963-07-09 11:37:51.858310000	-13902695070643959003047075898371575.756
+1550518038	-1216781788	oau	sey	kvq	1952-09-15 22:56:34.505201000	1987-12-12 13:07:09.551855000	-9624177174209179207612259746334084.244
+-1406970162	215201660	vqc	aug	xje	1987-12-25 08:59:14.184345000	1987-11-03 10:51:18.290933000	10201288220707759115419840860467671.637
+-1326359652	214808438	qlg	upk	kfq	1942-12-14 20:57:09.195408000	1939-03-04 17:50:10.430653000	296506151061072317284760660370463.455
+118993344	1646595280	bvq	fau	wrm	1964-11-23 05:07:17.684391000	1938-12-29 14:03:44.995783000	4575024047495852112719576812407954.967
+1456406906	-1216519640	rdx	vhc	kfa	1963-08-17 09:00:30.664748000	1963-06-26 10:52:34.771336000	-18139998481880982652090795141177516.796
+1537017416	-1216912862	mhs	qlw	grm	1991-09-10 15:28:51.229636000	1987-11-29 12:21:52.464881000	14480130640648033053739626279506986.381
+-1312727956	215070586	wrd	bvh	sny	1977-02-05 23:40:35.111772000	1987-09-25 08:35:27.030011000	18758648537082812849174442431544477.893
+24882212	1646857428	eyt	idx	gbm	1939-04-12 15:12:49.237090000	1939-02-19 17:04:53.343679000	17326850672382702634701954382050035.631
+105492722	1646464206	ito	mxs	sni	1967-05-03 15:30:07.710039000	1963-07-22 12:23:08.945284000	-13861156061940708713771009722138202.052
+1550649112	-1216650714	sey	wid	oau	1952-09-28 23:41:51.592175000	1963-05-18 08:36:43.510414000	-9582638165505928918336193570100710.54
+-1406839088	215332734	aug	eyk	cni	1988-01-07 09:44:31.271319000	1987-11-16 11:36:35.377907000	10242827229411009404695907036701045.341
+-1326228578	214939512	upk	yto	oju	1942-12-27 21:42:26.282382000	1939-03-17 18:35:27.517627000	338045159764322606560826836603837.159
+119124418	1646726354	fau	jey	bvq	1964-12-06 05:52:34.771365000	1939-01-11 14:49:02.082757000	4616563056199102401995642988641328.671
+1456537980	-1216388566	vhc	alg	oje	1963-08-30 09:45:47.751722000	1963-07-09 11:37:51.858310000	-18098459473177732362814728964944143.092
+1537148490	-1216781788	qlw	upb	kvq	1991-09-23 16:14:08.316610000	1987-12-12 13:07:09.551855000	14521669649351283343015692455740360.085
+-1312596882	215201660	bvh	fal	wrd	1977-02-19 00:25:52.198746000	1987-10-08 09:20:44.116985000	18800187545786063138450508607777851.597
+25013286	1646988502	idx	mhc	kfq	1939-04-25 15:58:06.324064000	1939-03-04 17:50:10.430653000	17368389681085952923978020558283409.335
+105623796	1646595280	mxs	qcw	wrm	1967-05-16 16:15:24.797013000	1938-12-29 14:03:44.995783000	-13819617053237458424494943545904828.348
+1550780186	-1216519640	wid	bmh	sey	1952-10-12 00:27:08.679149000	1963-05-31 09:22:00.597388000	-9541099156802678629060127393867336.836
+-1406708014	215463808	eyk	ido	grm	1988-01-20 10:29:48.358293000	1987-11-29 12:21:52.464881000	10284366238114259693971973212934419.045
+-1326097504	215070586	yto	dxs	sny	1943-01-09 22:27:43.369356000	1987-09-25 08:35:27.030011000	379584168467572895836893012837210.863
+119255492	1646857428	jey	nid	fau	1964-12-19 06:37:51.858339000	1939-01-24 15:34:19.169731000	4658102064902352691271709164874702.375
+1456669054	-1216257492	alg	epk	sni	1963-09-12 10:31:04.838696000	1963-07-22 12:23:08.945284000	-18056920464474482073538662788710769.388
+1537279564	-1216650714	upb	ytf	oau	1991-10-06 16:59:25.403584000	1963-05-18 08:36:43.510414000	14563208658054533632291758631973733.789
+-1312465808	215332734	fal	jep	bvh	1977-03-04 01:11:09.285720000	1987-10-21 10:06:01.203959000	18841726554489313427726574784011225.301
+25144360	214677364	mhc	qlg	oju	1939-05-08 16:43:23.411038000	1939-03-17 18:35:27.517627000	17409928689789203213254086734516783.039
+105754870	1646726354	qcw	ugb	bvq	1967-05-29 17:00:41.883987000	1939-01-11 14:49:02.082757000	-13778078044534208135218877369671454.644
+1550911260	-1216388566	bmh	fql	gbv	1952-10-25 01:12:25.766123000	1963-06-13 10:07:17.684362000	-9499560148099428339784061217633963.132
+-1406576940	-1217043936	ido	mhs	kvq	1988-02-02 11:15:05.445267000	1987-12-12 13:07:09.551855000	10325905246817509983248039389167792.749
+-1325966430	215201660	dxs	hcw	wrd	1943-01-22 23:13:00.456330000	1987-10-08 09:20:44.116985000	421123177170823185112959189070584.567
+119386566	1646988502	nid	rmh	cwi	1965-01-01 07:23:08.945313000	1939-02-06 16:19:36.256705000	4699641073605602980547775341108076.079
+1456800128	1646333132	epk	ito	wrm	1963-09-25 11:16:21.925670000	1938-12-29 14:03:44.995783000	-18015381455771231784262596612477395.684
+1537410638	-1216519640	ytf	dxj	sey	1991-10-19 17:44:42.490558000	1963-05-31 09:22:00.597388000	14604747666757783921567824808207107.493
+-1312334734	215463808	jep	nit	xje	1977-03-17 01:56:26.372694000	1987-11-03 10:51:18.290933000	18883265563192563717002640960244599.005
+25275434	214808438	qlg	upk	sny	1939-05-21 17:28:40.498012000	1987-09-25 08:35:27.030011000	17451467698492453502530152910750156.743
+105885944	1646857428	ugb	ykf	fau	1967-06-11 17:45:58.970961000	1939-01-24 15:34:19.169731000	-13736539035830957845942811193438080.94
+1551042334	-1216257492	fql	jup	kfa	1952-11-07 01:57:42.853097000	1963-06-26 10:52:34.771336000	-9458021139396178050507995041400589.428
+-1406445866	-1216912862	mhs	qlw	oau	1988-02-15 12:00:22.532241000	1963-05-18 08:36:43.510414000	10367444255520760272524105565401166.453
+-1325835356	215332734	hcw	lgb	bvh	1943-02-04 23:58:17.543304000	1987-10-21 10:06:01.203959000	462662185874073474389025365303958.271
+119517640	214677364	rmh	vql	gbm	1965-01-14 08:08:26.032287000	1939-02-19 17:04:53.343679000	4741180082308853269823841517341449.783
+1456931202	1646464206	ito	mxs	bvq	1963-10-08 12:01:39.012644000	1939-01-11 14:49:02.082757000	-17973842447067981494986530436244021.98
+1537541712	-1216388566	dxj	hcn	gbv	1991-11-01 18:29:59.577532000	1963-06-13 10:07:17.684362000	14646286675461034210843890984440481.197
+-1312203660	-1217043936	nit	rmx	cni	1977-03-30 02:41:43.459668000	1987-11-16 11:36:35.377907000	18924804571895814006278707136477972.709
+25406508	214939512	upk	yto	wrd	1939-06-03 18:13:57.584986000	1987-10-08 09:20:44.116985000	17493006707195703791806219086983530.447
+106017018	1646988502	ykf	doj	cwi	1967-06-24 18:31:16.057935000	1939-02-06 16:19:36.256705000	-13695000027127707556666745017204707.236
+1551173408	1646333132	jup	nyt	oje	1952-11-20 02:42:59.940071000	1963-07-09 11:37:51.858310000	-9416482130692927761231928865167215.724
+-1406314792	-1216781788	qlw	upb	sey	1988-02-28 12:45:39.619215000	1963-05-31 09:22:00.597388000	10408983264224010561800171741634540.157
+-1325704282	215463808	lgb	pkf	xje	1943-02-18 00:43:34.630278000	1987-11-03 10:51:18.290933000	504201194577323763665091541537331.975
+\N	214808438	vql	aup	kfq	1965-01-27 08:53:43.119261000	1939-03-04 17:50:10.430653000	4782719091012103559099907693574823.487
+119648714	1646595280	mxs	qcw	fau	1963-10-21 12:46:56.099618000	1939-01-24 15:34:19.169731000	-17932303438364731205710464260010648.276
+1457062276	-1216257492	hcn	lgr	kfa	1991-11-14 19:15:16.664506000	1963-06-26 10:52:34.771336000	14687825684164284500119957160673854.901
+1537672786	-1216912862	rmx	vqc	grm	1977-04-12 03:27:00.546642000	1987-11-29 12:21:52.464881000	18966343580599064295554773312711346.413
+-1312072586	215070586	yto	dxs	bvh	1939-06-16 18:59:14.671960000	1987-10-21 10:06:01.203959000	17534545715898954081082285263216904.151
+25537582	214677364	doj	\N	gbm	1967-07-07 19:16:33.144909000	1939-02-19 17:04:53.343679000	-13653461018424457267390678840971333.532
+106148092	1646464206	nyt	hsn	sni	1952-12-03 03:28:17.027045000	1963-07-22 12:23:08.945284000	-9374943121989677471955862688933842.02
+1551304482	-1216650714	upb	rdx	gbv	1988-03-12 13:30:56.706189000	1963-06-13 10:07:17.684362000	10450522272927260851076237917867913.861
+-1406183718	-1217043936	pkf	ytf	cni	1943-03-03 01:28:51.717252000	1987-11-16 11:36:35.377907000	545740203280574052941157717770705.679
+-1325573208	214939512	aup	toj	oju	1965-02-09 09:39:00.206235000	1939-03-17 18:35:27.517627000	4824258099715353848375973869808197.191
+119779788	1646726354	qcw	eyt	cwi	1963-11-03 13:32:13.186592000	1939-02-06 16:19:36.256705000	-17890764429661480916434398083777274.572
+1457193350	1646333132	lgr	ugb	oje	1991-11-27 20:00:33.751480000	1963-07-09 11:37:51.858310000	14729364692867534789396023336907228.605
+1537803860	-1216781788	vqc	pkv	kvq	1977-04-25 04:12:17.633616000	1987-12-12 13:07:09.551855000	19007882589302314584830839488944720.117
+-1311941512	215201660	dxs	aug	xje	1939-06-29 19:44:31.758934000	1987-11-03 10:51:18.290933000	17576084724602204370358351439450277.855
+25668656	214808438	hsn	hcw	kfq	1967-07-20 20:01:50.231883000	1939-03-04 17:50:10.430653000	-13611922009721206978114612664737959.828
+106279166	1646595280	rdx	lwr	wrm	1952-12-16 04:13:34.114019000	1938-12-29 14:03:44.995783000	-9333404113286427182679796512700468.316
+1551435556	-1216519640	ytf	vhc	kfa	1988-03-25 14:16:13.793163000	1963-06-26 10:52:34.771336000	10492061281630511140352304094101287.565
+-1406052644	-1216912862	toj	dxj	grm	1943-03-16 02:14:08.804226000	1987-11-29 12:21:52.464881000	587279211983824342217223894004079.383
+-1325442134	215070586	eyt	xsn	sny	1965-02-22 10:24:17.293209000	1987-09-25 08:35:27.030011000	4865797108418604137652040046041570.895
+119910862	1646857428	ugb	idx	gbm	1963-11-16 14:17:30.273566000	1939-02-19 17:04:53.343679000	-17849225420958230627158331907543900.868
+1457324424	1646464206	pkv	ykf	sni	1991-12-10 20:45:50.838454000	1963-07-22 12:23:08.945284000	14770903701570785078672089513140602.309
+1537934934	-1216650714	aug	toa	oau	1977-05-08 04:57:34.720590000	1963-05-18 08:36:43.510414000	19049421598005564874106905665178093.821
+-1311810438	215332734	hcw	eyk	cni	1939-07-12 20:29:48.845908000	1987-11-16 11:36:35.377907000	17617623733305454659634417615683651.559
+25799730	214939512	lwr	lgb	oju	1967-08-02 20:47:07.318857000	1939-03-17 18:35:27.517627000	-13570383001017956688838546488504586.124
+106410240	1646726354	vhc	pbv	bvq	1952-12-29 04:58:51.200993000	1939-01-11 14:49:02.082757000	-9291865104583176893403730336467094.612
+1551566630	-1216388566	dxj	alg	oje	1988-04-07 15:01:30.880137000	1963-07-09 11:37:51.858310000	10533600290333761429628370270334661.269
+-1405921570	-1216781788	xsn	hcn	kvq	1943-03-29 02:59:25.891200000	1987-12-12 13:07:09.551855000	628818220687074631493290070237453.087
+-1325311060	215201660	idx	cwr	wrd	1965-03-07 11:09:34.380183000	1987-10-08 09:20:44.116985000	4907336117121854426928106222274944.599
+120041936	1646988502	ykf	mhc	kfq	1963-11-29 15:02:47.360539000	1939-03-04 17:50:10.430653000	-17807686412254980337882265731310527.164
+1457455498	1646595280	toa	doj	wrm	1991-12-23 21:31:07.925428000	1938-12-29 14:03:44.995783000	14812442710274035367948155689373976.013
+1538066008	-1216519640	eyk	xse	sey	1977-05-21 05:42:51.807564000	1963-05-31 09:22:00.597388000	19090960606708815163382971841411467.525
+-1311679364	215463808	lgb	ido	grm	1939-07-25 21:15:05.932882000	1987-11-29 12:21:52.464881000	17659162742008704948910483791917025.263
+25930804	215070586	pbv	pkf	\N	1967-08-15 21:32:24.405831000	1987-09-25 08:35:27.030011000	-13528843992314706399562480312271212.42
+106541314	1646857428	alg	tfa	sny	1953-01-11 05:44:08.287967000	1939-01-24 15:34:19.169731000	-9250326095879926604127664160233720.908
+1551697704	-1216257492	hcn	epk	fau	1988-04-20 15:46:47.967111000	1963-07-22 12:23:08.945284000	10575139299037011718904436446568034.973
+-1405790496	-1216650714	cwr	lgr	sni	1943-04-11 03:44:42.978174000	1963-05-18 08:36:43.510414000	670357229390324920769356246470826.791
+-1325179986	215332734	mhc	gbv	oau	1965-03-20 11:54:51.467156000	1987-10-21 10:06:01.203959000	4948875125825104716204172398508318.303
+120173010	214677364	doj	qlg	bvh	1963-12-12 15:48:04.447513000	1939-03-17 18:35:27.517627000	-17766147403551730048606199555077153.46
+1457586572	1646726354	xse	hsn	oju	1992-01-05 22:16:25.012402000	1939-01-11 14:49:02.082757000	14853981718977285657224221865607349.717
+1538197082	-1216388566	ido	cwi	bvq	1977-06-03 06:28:08.894538000	1963-06-13 10:07:17.684362000	19132499615412065452659038017644841.229
+-1311548290	-1217043936	pkf	mhs	gbv	1939-08-07 22:00:23.019856000	1987-12-12 13:07:09.551855000	17700701750711955238186549968150398.967
+26061878	215201660	tfa	toj	kvq	1967-08-28 22:17:41.492805000	1987-10-08 09:20:44.116985000	-13487304983611456110286414136037838.716
+106672388	1646988502	epk	xje	wrd	1953-01-24 06:29:25.374941000	1939-02-06 16:19:36.256705000	-9208787087176676314851597984000347.204
+1551828778	1646333132	lgr	ito	cwi	1988-05-03 16:32:05.054085000	1938-12-29 14:03:44.995783000	10616678307740262008180502622801408.677
+-1405659422	-1216519640	gbv	pkv	wrm	1943-04-24 04:30:00.065147000	1963-05-31 09:22:00.597388000	711896238093575210045422422704200.495
+-1325048912	215463808	qlg	kfa	sey	1965-04-02 12:40:08.554130000	1987-11-03 10:51:18.290933000	4990414134528355005480238574741692.007
+120304084	214808438	hsn	upk	xje	1963-12-25 16:33:21.534487000	1987-09-25 08:35:27.030011000	-17724608394848479759330133378843779.756
+1457717646	1646857428	cwi	lwr	sny	1992-01-18 23:01:42.099376000	1939-01-24 15:34:19.169731000	14895520727680535946500288041840723.421
+1538328156	-1216257492	mhs	gbm	fau	1977-06-16 07:13:25.981512000	1963-06-26 10:52:34.771336000	19174038624115315741935104193878214.933
+-1311417216	-1216912862	toj	qlw	kfa	1939-08-20 22:45:40.106830000	1963-05-18 08:36:43.510414000	17742240759415205527462616144383772.671
+26192952	215332734	xje	xsn	oau	1967-09-10 23:02:58.579779000	1987-10-21 10:06:01.203959000	-13445765974908205821010347959804465.012
+106803462	214677364	ito	cni	bvh	1953-02-06 07:14:42.461915000	1939-02-19 17:04:53.343679000	-9167248078473426025575531807766973.5
+1551959852	1646464206	pkv	mxs	gbm	1988-05-16 17:17:22.141059000	1939-01-11 14:49:02.082757000	10658217316443512297456568799034782.381
+-1405528348	-1216388566	kfa	toa	bvq	1943-05-07 05:15:17.152121000	1963-06-13 10:07:17.684362000	753435246796825499321488598937574.199
+-1324917838	-1217043936	upk	oje	gbv	1965-04-15 13:25:25.641104000	1987-11-16 11:36:35.377907000	5031953143231605294756304750975065.711
+120435158	214939512	lwr	yto	cni	1964-01-07 17:18:38.621461000	1987-10-08 09:20:44.116985000	-17683069386145229470054067202610406.052
+1457848720	1646988502	gbm	pbv	wrd	1992-01-31 23:46:59.186350000	1939-02-06 16:19:36.256705000	14937059736383786235776354218074097.125
+1538459230	1646333132	qlw	kfq	cwi	1977-06-29 07:58:43.068486000	1963-07-09 11:37:51.858310000	19215577632818566031211170370111588.637
+-1311286142	-1216781788	xsn	upb	oje	1939-09-02 23:30:57.193804000	1963-05-31 09:22:00.597388000	17783779768118455816738682320617146.375
+26324026	215463808	cni	cwr	sey	1967-09-23 23:48:15.666753000	1987-11-03 10:51:18.290933000	-13404226966204955531734281783571091.308
+106934536	214808438	mxs	grm	xje	1953-02-19 07:59:59.548889000	1939-03-04 17:50:10.430653000	-9125709069770175736299465631533599.796
+1552090926	1646595280	toa	qcw	kfq	1988-05-29 18:02:39.228033000	1939-01-24 15:34:19.169731000	10699756325146762586732634975268156.085
+-1405397274	-1216257492	oje	xse	fau	1943-05-20 06:00:34.239095000	1963-06-26 10:52:34.771336000	794974255500075788597554775170947.903
+-1324786764	-1216912862	yto	sni	kfa	1965-04-28 14:10:42.728078000	1987-11-29 12:21:52.464881000	5073492151934855584032370927208439.415
+120566232	215070586	pbv	dxs	grm	1964-01-20 18:03:55.708435000	1987-10-21 10:06:01.203959000	-17641530377441979180778001026377032.348
+1457979794	214677364	kfq	tfa	bvh	1992-02-14 00:32:16.273324000	1939-02-19 17:04:53.343679000	14978598745087036525052420394307470.829
+1538590304	1646464206	upb	oju	gbm	1977-07-12 08:44:00.155460000	1963-07-22 12:23:08.945284000	19257116641521816320487236546344962.341
+-1311155068	-1216650714	cwr	ytf	sni	1939-09-16 00:16:14.280778000	1963-06-13 10:07:17.684362000	17825318776821706106014748496850520.079
+26455100	-1217043936	grm	gbv	gbv	1967-10-07 00:33:32.753727000	1987-11-16 11:36:35.377907000	-13362687957501705242458215607337717.604
+107065610	214939512	qcw	kvq	cni	1953-03-04 08:45:16.635863000	1939-03-17 18:35:27.517627000	-9084170061066925447023399455300226.092
+1552222000	1646726354	xse	ugb	oju	1988-06-11 18:47:56.315006000	1939-02-06 16:19:36.256705000	10741295333850012876008701151501529.789
+-1405266200	1646333132	sni	cwi	cwi	1943-06-02 06:45:51.326069000	1963-07-09 11:37:51.858310000	836513264203326077873620951404321.607
+-1324655690	\N	dxs	wrm	oje	1965-05-11 14:55:59.815052000	1987-12-12 13:07:09.551855000	5115031160638105873308437103441813.119
+120697306	-1216781788	tfa	hcw	kvq	1964-02-02 18:49:12.795409000	1987-11-03 10:51:18.290933000	-17599991368738728891501934850143658.644
+1458110868	215201660	oju	xje	xje	1992-02-27 01:17:33.360298000	1939-03-04 17:50:10.430653000	15020137753790286814328486570540844.533
+1538721378	214808438	ytf	sny	kfq	1977-07-25 09:29:17.242434000	1938-12-29 14:03:44.995783000	19298655650225066609763302722578336.045
+-1311023994	1646595280	gbv	dxj	wrm	1939-09-29 01:01:31.367752000	1963-06-26 10:52:34.771336000	17866857785524956395290814673083893.783
+26586174	-1216519640	kvq	kfa	kfa	1967-10-20 01:18:49.840701000	1987-11-29 12:21:52.464881000	-13321148948798454953182149431104343.9
+107196684	-1216912862	ugb	oau	grm	1953-03-17 09:30:33.722837000	1987-09-25 08:35:27.030011000	-9042631052363675157747333279066852.388
+1552353074	215070586	cwi	ykf	sny	1988-06-24 19:33:13.401980000	1939-02-19 17:04:53.343679000	10782834342553263165284767327734903.493
+-1405135126	1646857428	wrm	gbm	gbm	1943-06-15 07:31:08.413043000	1963-07-22 12:23:08.945284000	878052272906576367149687127637695.311
+-1324524616	1646464206	hcw	bvq	sni	1965-05-24 15:41:16.902026000	1963-05-18 08:36:43.510414000	5156570169341356162584503279675186.823
+120828380	-1216650714	xje	lgb	oau	1964-02-15 19:34:29.882383000	1987-11-16 11:36:35.377907000	-17558452360035478602225868673910284.94
+1458241942	215332734	sny	cni	cni	1992-03-11 02:02:50.447272000	1939-03-17 18:35:27.517627000	15061676762493537103604552746774218.237
+1538852452	214939512	dxj	wrd	oju	1977-08-07 10:14:34.329408000	1939-01-11 14:49:02.082757000	19340194658928316899039368898811709.749
+-1310892920	1646726354	kfa	hcn	bvq	1939-10-12 01:46:48.454726000	1963-07-09 11:37:51.858310000	17908396794228206684566880849317267.487
+26717248	-1216388566	oau	oje	oje	1967-11-02 02:04:06.927675000	1987-12-12 13:07:09.551855000	-13279609940095204663906083254870970.196
+107327758	-1216781788	ykf	sey	kvq	1953-03-30 10:15:50.809811000	1987-10-08 09:20:44.116985000	-9001092043660424868471267102833478.684
+1552484148	215201660	gbm	doj	wrd	1988-07-07 20:18:30.488954000	1939-03-04 17:50:10.430653000	10824373351256513454560833503968277.197
+-1405004052	1646988502	bvq	kfq	kfq	1943-06-28 08:16:25.500017000	1938-12-29 14:03:44.995783000	919591281609826656425753303871069.015
+-1324393542	1646595280	lgb	fau	wrm	1965-06-06 16:26:33.989000000	1963-05-31 09:22:00.597388000	5198109178044606451860569455908560.527
+120959454	-1216519640	cni	pkf	sey	1964-02-28 20:19:46.969357000	1987-11-29 12:21:52.464881000	-17516913351332228312949802497676911.236
+1458373016	215463808	wrd	grm	grm	1992-03-24 02:48:07.534246000	1987-09-25 08:35:27.030011000	15103215771196787392880618923007591.941
+1538983526	215070586	hcn	bvh	sny	1977-08-20 10:59:51.416382000	1939-01-24 15:34:19.169731000	19381733667631567188315435075045083.453
+-1310761846	1646857428	oje	lgr	fau	1939-10-25 02:32:05.541700000	1963-07-22 12:23:08.945284000	17949935802931456973842947025550641.191
+26848322	-1216257492	sey	sni	sni	1967-11-15 02:49:24.014649000	1963-05-18 08:36:43.510414000	-13238070931391954374630017078637596.492
+107458832	-1216650714	doj	wid	oau	1953-04-12 11:01:07.896785000	1987-10-21 10:06:01.203959000	-8959553034957174579195200926600104.98
+1552615222	215332734	kfq	hsn	bvh	1988-07-20 21:03:47.575928000	1939-03-17 18:35:27.517627000	10865912359959763743836899680201650.901
+-1404872978	214677364	fau	oju	oju	1943-07-11 09:01:42.586991000	1939-01-11 14:49:02.082757000	961130290313076945701819480104442.719
+-1324262468	1646726354	pkf	jey	bvq	1965-06-19 17:11:51.075974000	1963-06-13 10:07:17.684362000	5239648186747856741136635632141934.231
+121090528	-1216388566	grm	toj	gbv	1964-03-12 21:05:04.056331000	1987-12-12 13:07:09.551855000	-17475374342628978023673736321443537.532
+1458504090	-1217043936	bvh	kvq	kvq	1992-04-06 03:33:24.621220000	1987-10-08 09:20:44.116985000	15144754779900037682156685099240965.645
+1539114600	215201660	lgr	fal	wrd	1977-09-02 11:45:08.503356000	1939-02-06 16:19:36.256705000	19423272676334817477591501251278457.157
+-1310630772	1646988502	sni	pkv	cwi	1939-11-07 03:17:22.628674000	1938-12-29 14:03:44.995783000	17991474811634707263119013201784014.895
+26979396	1646333132	wid	wrm	wrm	1967-11-28 03:34:41.101623000	1963-05-31 09:22:00.597388000	-13196531922688704085353950902404222.788
+107589906	-1216519640	hsn	bmh	sey	1953-04-25 11:46:24.983759000	1987-11-03 10:51:18.290933000	-8918014026253924289919134750366731.276
+1552746296	215463808	oju	lwr	xje	1988-08-02 21:49:04.662902000	1987-09-25 08:35:27.030011000	10907451368663014033112965856435024.605
+-1404741904	214808438	jey	sny	sny	1943-07-24 09:46:59.673965000	1939-01-24 15:34:19.169731000	1002669299016327234977885656337816.423
+-1324131394	1646857428	toj	nid	fau	1965-07-02 17:57:08.162948000	1963-06-26 10:52:34.771336000	5281187195451107030412701808375307.935
+121221602	-1216257492	kvq	xsn	kfa	1964-03-25 21:50:21.143305000	1963-05-18 08:36:43.510414000	-17433835333925727734397670145210163.828
+1458635164	-1216912862	fal	oau	oau	1992-04-19 04:18:41.708194000	1987-10-21 10:06:01.203959000	15186293788603287971432751275474339.349
+1539245674	215332734	pkv	jep	bvh	1977-09-15 12:30:25.590330000	1939-02-19 17:04:53.343679000	19464811685038067766867567427511830.861
+-1310499698	214677364	wrm	toa	gbm	1939-11-20 04:02:39.715648000	1939-01-11 14:49:02.082757000	18033013820337957552395079378017388.599
+27110470	1646464206	bmh	bvq	bvq	1967-12-11 04:19:58.188597000	1963-06-13 10:07:17.684362000	-13154992913985453796077884726170849.084
+107720980	-1216388566	lwr	fql	gbv	1953-05-08 12:31:42.070732000	1987-11-16 11:36:35.377907000	-8876475017550674000643068574133357.572
+1552877370	-1217043936	sny	pbv	cni	1988-08-15 22:34:21.749876000	1987-10-08 09:20:44.116985000	10948990377366264322389032032668398.309
+-1404610830	214939512	nid	wrd	wrd	1943-08-06 10:32:16.760939000	1939-02-06 16:19:36.256705000	1044208307719577524253951832571190.127
+-1324000320	1646988502	xsn	rmh	cwi	1965-07-15 18:42:25.249922000	1963-07-09 11:37:51.858310000	5322726204154357319688767984608681.639
+121352676	1646333132	oau	cwr	oje	1964-04-07 22:35:38.230279000	1963-05-31 09:22:00.597388000	-17392296325222477445121603968976790.124
+1458766238	-1216781788	jep	sey	sey	1992-05-02 05:03:58.795168000	1987-11-03 10:51:18.290933000	15227832797306538260708817451707713.053
+1539376748	215463808	toa	nit	xje	1977-09-28 13:15:42.677304000	1939-03-04 17:50:10.430653000	19506350693741318056143633603745204.565
+-1310368624	214808438	bvq	xse	kfq	1939-12-03 04:47:56.802622000	1939-01-24 15:34:19.169731000	18074552829041207841671145554250762.303
+27241544	1646595280	fql	fau	fau	1967-12-24 05:05:15.275570000	1963-06-26 10:52:34.771336000	-13113453905282203506801818549937475.38
+107852054	-1216257492	pbv	jup	kfa	1953-05-21 13:16:59.157706000	1987-11-29 12:21:52.464881000	-8834936008847423711367002397899983.868
+1553008444	-1216912862	wrd	tfa	grm	1988-08-28 23:19:38.836850000	1987-10-21 10:06:01.203959000	10990529386069514611665098208901772.013
+-1404479756	215070586	rmh	bvh	bvh	1943-08-19 11:17:33.847913000	1939-02-19 17:04:53.343679000	1085747316422827813530018008804563.831
+-1323869246	214677364	cwr	vql	gbm	1965-07-28 19:27:42.336896000	1963-07-22 12:23:08.945284000	5364265212857607608964834160842055.343
+121483750	1646464206	sey	gbv	sni	1964-04-20 23:20:55.317253000	1963-06-13 10:07:17.684362000	-17350757316519227155845537792743416.42
+1458897312	-1216650714	nit	wid	gbv	1992-05-15 05:49:15.882142000	1987-11-16 11:36:35.377907000	15269371806009788549984883627941086.757
+1539507822	-1217043936	xse	rmx	cni	1977-10-11 14:00:59.764278000	1939-03-17 18:35:27.517627000	19547889702444568345419699779978578.269
+-1310237550	214939512	fau	cwi	oju	1939-12-16 05:33:13.889596000	1939-02-06 16:19:36.256705000	18116091837744458130947211730484136.007
+27372618	1646726354	jup	jey	cwi	1968-01-06 05:50:32.362544000	1963-07-09 11:37:51.858310000	-13071914896578953217525752373704101.676
+107983128	1646333132	tfa	nyt	oje	1953-06-03 14:02:16.244680000	1987-12-12 13:07:09.551855000	-8793397000144173422090936221666610.164
+1553139518	-1216781788	bvh	xje	kvq	1988-09-11 00:04:55.923824000	1987-11-03 10:51:18.290933000	11032068394772764900941164385135145.717
+-1404348682	215201660	vql	fal	xje	1943-09-01 12:02:50.934887000	1939-03-04 17:50:10.430653000	1127286325126078102806084185037937.535
+-1323738172	214808438	gbv	aup	kfq	1965-08-10 20:12:59.423870000	1938-12-29 14:03:44.995783000	5405804221560857898240900337075429.047
+121614824	1646595280	wid	kfa	wrm	1964-05-04 00:06:12.404227000	1963-06-26 10:52:34.771336000	-17309218307815976866569471616510042.716
+1459028386	-1216519640	rmx	bmh	kfa	1992-05-28 06:34:32.969116000	1987-11-29 12:21:52.464881000	15310910814713038839260949804174460.461
+1539638896	-1216912862	cwi	vqc	grm	1977-10-24 14:46:16.851252000	1987-09-25 08:35:27.030011000	19589428711147818634695765956211951.973
+-1310106476	215070586	jey	gbm	sny	1939-12-29 06:18:30.976570000	1939-02-19 17:04:53.343679000	18157630846447708420223277906717509.711
+27503692	1646857428	nyt	nid	gbm	1968-01-19 06:35:49.449518000	1963-07-22 12:23:08.945284000	-13030375887875702928249686197470727.972
+108114202	1646464206	xje	rdx	sni	1953-06-16 14:47:33.331654000	1963-05-18 08:36:43.510414000	-8751857991440923132814870045433236.46
+1553270592	-1216650714	fal	cni	oau	1988-09-24 00:50:13.010798000	1987-11-16 11:36:35.377907000	11073607403476015190217230561368519.421
+-1404217608	215332734	aup	jep	cni	1943-09-14 12:48:08.021861000	1939-03-17 18:35:27.517627000	1168825333829328392082150361271311.239
+-1323607098	214939512	kfa	eyt	oju	1965-08-23 20:58:16.510844000	1939-01-11 14:49:02.082757000	5447343230264108187516966513308802.751
+121745898	1646726354	bmh	oje	bvq	1964-05-17 00:51:29.491201000	1963-07-09 11:37:51.858310000	-17267679299112726577293405440276669.012
+1459159460	-1216388566	vqc	fql	oje	1992-06-10 07:19:50.056090000	1987-12-12 13:07:09.551855000	15352449823416289128537015980407834.165
+1539769970	-1216781788	gbm	aug	kvq	1977-11-06 15:31:33.938226000	1987-10-08 09:20:44.116985000	19630967719851068923971832132445325.677
+-1309975402	215201660	nid	kfq	wrd	1940-01-11 07:03:48.063544000	1939-03-04 17:50:10.430653000	18199169855150958709499344082950883.415
+27634766	1646988502	rdx	rmh	kfq	1968-02-01 07:21:06.536492000	1938-12-29 14:03:44.995783000	-12988836879172452638973620021237354.268
+108245276	1646595280	cni	vhc	wrm	1953-06-29 15:32:50.418628000	1963-05-31 09:22:00.597388000	-8710318982737672843538803869199862.756
+1553401666	-1216519640	jep	grm	sey	1988-10-07 01:35:30.097772000	1987-11-29 12:21:52.464881000	11115146412179265479493296737601893.125
+-1404086534	215463808	eyt	nit	grm	1943-09-27 13:33:25.108835000	1987-09-25 08:35:27.030011000	1210364342532578681358216537504684.943
+-1323476024	215070586	oje	idx	sny	1965-09-05 21:43:33.597818000	1939-01-24 15:34:19.169731000	-14457305820759193856084943006068793.4
+121876972	1646857428	fql	sni	fau	1964-05-30 01:36:46.578175000	1963-07-22 12:23:08.945284000	-17226140290409476288017339264043295.308
+1459290534	-1216257492	aug	jup	sni	1992-06-23 08:05:07.143064000	1963-05-18 08:36:43.510414000	15393988832119539417813082156641207.869
+1539901044	-1216650714	kfq	eyk	oau	1977-11-19 16:16:51.025199000	1987-10-21 10:06:01.203959000	-263296658230627690673753078742955.402
+-1309844328	215332734	rmh	oju	bvh	1940-01-24 07:49:05.150517000	1939-03-17 18:35:27.517627000	18240708863854208998775410259184257.119
+27765840	214677364	vhc	vql	oju	1968-02-14 08:06:23.623466000	1939-01-11 14:49:02.082757000	-12947297870469202349697553845003980.564
+108376350	1646726354	grm	alg	bvq	1953-07-12 16:18:07.505602000	1963-06-13 10:07:17.684362000	13935904563474403629658083479999083.81
+1553532740	-1216388566	nit	kvq	gbv	\N	1987-12-12 13:07:09.551855000	11156685420882515768769362913835266.829
+-1403955460	-1217043936	idx	rmx	kvq	1988-10-20 02:20:47.184746000	1987-10-08 09:20:44.116985000	1251903351235828970634282713738058.647
+-1323344950	215201660	sni	mhc	wrd	1943-10-10 14:18:42.195809000	1939-02-06 16:19:36.256705000	-14415766812055943566808876829835419.696
+122008046	1646988502	jup	wrm	cwi	1965-09-18 22:28:50.684792000	1938-12-29 14:03:44.995783000	-17184601281706225998741273087809921.604
+1459421608	1646333132	eyk	nyt	wrm	1964-06-12 02:22:03.665149000	1963-05-31 09:22:00.597388000	15435527840822789707089148332874581.573
+1540032118	-1216519640	oju	ido	sey	1992-07-06 08:50:24.230037000	1987-11-03 10:51:18.290933000	-221757649527377401397686902509581.698
+-1309713254	215463808	vql	sny	xje	1977-12-02 17:02:08.112173000	1987-09-25 08:35:27.030011000	18282247872557459288051476435417630.823
+27896914	214808438	alg	aup	sny	1940-02-06 08:34:22.237491000	1939-01-24 15:34:19.169731000	-12905758861765952060421487668770606.86
+108507424	1646857428	kvq	epk	fau	1968-02-27 08:51:40.710440000	1963-06-26 10:52:34.771336000	13977443572177653918934149656232457.514
+1553663814	-1216257492	rmx	oau	kfa	1953-07-25 17:03:24.592576000	1963-05-18 08:36:43.510414000	11198224429585766058045429090068640.533
+-1403824386	-1216912862	mhc	vqc	oau	1988-11-02 03:06:04.271720000	1987-10-21 10:06:01.203959000	1293442359939079259910348889971432.351
+-1323213876	215332734	wrm	qlg	bvh	1943-10-23 15:03:59.282783000	1939-02-19 17:04:53.343679000	-14374227803352693277532810653602045.992
+122139120	214677364	nyt	bvq	gbm	1965-10-01 23:14:07.771766000	1939-01-11 14:49:02.082757000	-17143062273002975709465206911576547.9
+1459552682	1646464206	ido	rdx	bvq	1964-06-25 03:07:20.752123000	1963-06-13 10:07:17.684362000	15477066849526039996365214509107955.277
+1540163192	-1216388566	sny	mhs	gbv	1992-07-19 09:35:41.317011000	1987-11-16 11:36:35.377907000	-180218640824127112121620726276207.994
+-1309582180	-1217043936	aup	wrd	cni	1977-12-15 17:47:25.199147000	1987-10-08 09:20:44.116985000	18323786881260709577327542611651004.527
+28027988	214939512	epk	eyt	wrd	1940-02-19 09:19:39.324465000	1939-02-06 16:19:36.256705000	-12864219853062701771145421492537233.156
+108638498	1646988502	oau	ito	cwi	1968-03-11 09:36:57.797414000	1963-07-09 11:37:51.858310000	14018982580880904208210215832465831.218
+1553794888	1646333132	vqc	sey	oje	1953-08-07 17:48:41.679550000	1963-05-31 09:22:00.597388000	11239763438289016347321495266302014.237
+-1403693312	-1216781788	qlg	aug	sey	1988-11-15 03:51:21.358694000	1987-11-03 10:51:18.290933000	1334981368642329549186415066204806.055
+-1323082802	215463808	bvq	upk	xje	1943-11-05 15:49:16.369757000	1939-03-04 17:50:10.430653000	-14332688794649442988256744477368672.288
+\N	214808438	rdx	fau	kfq	1965-10-14 23:59:24.858740000	1939-01-24 15:34:19.169731000	-17101523264299725420189140735343174.196
+122270194	1646595280	mhs	vhc	fau	1964-07-08 03:52:37.839097000	1963-06-26 10:52:34.771336000	15518605858229290285641280685341328.981
+1459683756	-1216257492	wrd	qlw	kfa	1992-08-01 10:20:58.403985000	1987-11-29 12:21:52.464881000	-138679632120876822845554550042834.29
+1540294266	-1216912862	eyt	bvh	grm	1977-12-28 18:32:42.286121000	1987-10-21 10:06:01.203959000	18365325889963959866603608787884378.231
+-1309451106	215070586	ito	idx	bvh	1940-03-03 10:04:56.411439000	1939-02-19 17:04:53.343679000	-12822680844359451481869355316303859.452
+28159062	214677364	sey	mxs	gbm	1968-03-24 10:22:14.884388000	1963-07-22 12:23:08.945284000	14060521589584154497486282008699204.922
+108769572	1646464206	aug	wid	sni	1953-08-20 18:33:58.766524000	1963-06-13 10:07:17.684362000	11281302446992266636597561442535387.941
+1553925962	-1216650714	upk	eyk	gbv	1988-11-28 04:36:38.445668000	1987-11-16 11:36:35.377907000	1376520377345579838462481242438179.759
+-1403562238	-1217043936	fau	yto	cni	1943-11-18 16:34:33.456731000	1939-03-17 18:35:27.517627000	-14291149785946192698980678301135298.584
+-1322951728	214939512	vhc	jey	oju	1965-10-28 00:44:41.945714000	1939-02-06 16:19:36.256705000	-17059984255596475130913074559109800.492
+122401268	1646726354	qlw	alg	cwi	1964-07-21 04:37:54.926071000	1963-07-09 11:37:51.858310000	15560144866932540574917346861574702.685
+1459814830	1646333132	bvh	upb	oje	1992-08-14 11:06:15.490959000	1987-12-12 13:07:09.551855000	-97140623417626533569488373809460.586
+1540425340	-1216781788	idx	fal	kvq	1978-01-10 19:17:59.373095000	1987-11-03 10:51:18.290933000	18406864898667210155879674964117751.935
+-1309320032	215201660	mxs	mhc	xje	1940-03-16 10:50:13.498413000	1939-03-04 17:50:10.430653000	-12781141835656201192593289140070485.748
+28290136	214808438	wid	qcw	kfq	1968-04-06 11:07:31.971362000	1938-12-29 14:03:44.995783000	14102060598287404786762348184932578.626
+108900646	1646595280	eyk	bmh	wrm	1953-09-02 19:19:15.853498000	1963-06-26 10:52:34.771336000	11322841455695516925873627618768761.645
+1554057036	-1216519640	yto	ido	kfa	1988-12-11 05:21:55.532642000	1987-11-29 12:21:52.464881000	1418059386048830127738547418671553.463
+-1403431164	-1216912862	jey	dxs	grm	1943-12-01 17:19:50.543705000	1987-09-25 08:35:27.030011000	-14249610777242942409704612124901924.88
+-1322820654	215070586	alg	nid	sny	1965-11-10 01:29:59.032688000	1939-02-19 17:04:53.343679000	-17018445246893224841637008382876426.788
+122532342	1646857428	upb	epk	gbm	1964-08-03 05:23:12.013045000	1963-07-22 12:23:08.945284000	15601683875635790864193413037808076.389
+1459945904	1646464206	fal	ytf	sni	1992-08-27 11:51:32.577933000	1963-05-18 08:36:43.510414000	-55601614714376244293422197576086.882
+1540556414	-1216650714	mhc	jep	oau	1978-01-23 20:03:16.460069000	1987-11-16 11:36:35.377907000	18448403907370460445155741140351125.639
+-1309188958	215332734	qcw	qlg	cni	1940-03-29 11:35:30.585387000	1939-03-17 18:35:27.517627000	-12739602826952950903317222963837112.044
+28421210	214939512	bmh	ugb	oju	1968-04-19 11:52:49.058336000	1939-01-11 14:49:02.082757000	14143599606990655076038414361165952.33
+109031720	1646726354	ido	fql	bvq	1953-09-15 20:04:32.940472000	1963-07-09 11:37:51.858310000	11364380464398767215149693795002135.349
+1554188110	-1216388566	dxs	mhs	oje	1988-12-24 06:07:12.619616000	1987-12-12 13:07:09.551855000	1459598394752080417014613594904927.167
+-1403300090	-1216781788	nid	hcw	kvq	1943-12-14 18:05:07.630679000	1987-10-08 09:20:44.116985000	\N
+-1322689580	215201660	epk	rmh	wrd	1965-11-23 02:15:16.119662000	1939-03-04 17:50:10.430653000	-14208071768539692120428545948668551.176
+122663416	1646988502	ytf	ito	kfq	1964-08-16 06:08:29.100019000	1938-12-29 14:03:44.995783000	-16976906238189974552360942206643053.084
+1460076978	1646595280	jep	dxj	wrm	1992-09-09 12:36:49.664907000	1963-05-31 09:22:00.597388000	15643222884339041153469479214041450.093
+1540687488	-1216519640	qlg	nit	sey	1978-02-05 20:48:33.547043000	1987-11-29 12:21:52.464881000	-14062606011125955017356021342713.178
+-1309057884	215463808	ugb	upk	grm	1940-04-11 12:20:47.672361000	1987-09-25 08:35:27.030011000	18489942916073710734431807316584499.343
+28552284	215070586	fql	ykf	sny	1968-05-02 12:38:06.145310000	1939-01-24 15:34:19.169731000	-12698063818249700614041156787603738.34
+109162794	1646857428	mhs	jup	fau	1953-09-28 20:49:50.027446000	1963-07-22 12:23:08.945284000	14185138615693905365314480537399326.034
+1554319184	-1216257492	hcw	qlw	sni	1989-01-06 06:52:29.706590000	1963-05-18 08:36:43.510414000	11405919473102017504425759971235509.053
+-1403169016	-1216650714	rmh	lgb	oau	1943-12-27 18:50:24.717653000	1987-10-21 10:06:01.203959000	1501137403455330706290679771138300.871
+-1322558506	215332734	ito	vql	bvh	1965-12-06 03:00:33.206636000	1939-03-17 18:35:27.517627000	-14166532759836441831152479772435177.472
+122794490	214677364	dxj	mxs	oju	1964-08-29 06:53:46.186993000	1939-01-11 14:49:02.082757000	-16935367229486724263084876030409679.38
+1460208052	1646726354	nit	hcn	bvq	1992-09-22 13:22:06.751881000	1963-06-13 10:07:17.684362000	15684761893042291442745545390274823.797
+1540818562	-1216388566	upk	rmx	gbv	1978-02-18 21:33:50.634017000	1987-12-12 13:07:09.551855000	-21240171529866529632202202809594852.69
+-1308926810	-1217043936	ykf	yto	kvq	1940-04-24 13:06:04.759335000	1987-10-08 09:20:44.116985000	18531481924776961023707873492817873.047
+28683358	215201660	jup	doj	wrd	1968-05-15 13:23:23.232284000	1939-02-06 16:19:36.256705000	-12656524809546450324765090611370364.636
+109293868	1646988502	qlw	nyt	cwi	1953-10-11 21:35:07.114420000	1938-12-29 14:03:44.995783000	14226677624397155654590546713632699.738
+1554450258	1646333132	lgb	upb	wrm	1989-01-19 07:37:46.793564000	1963-05-31 09:22:00.597388000	11447458481805267793701826147468882.757
+-1403037942	-1216519640	vql	pkf	sey	1944-01-09 19:35:41.804627000	1987-11-03 10:51:18.290933000	1542676412158580995566745947371674.575
+-1322427432	215463808	mxs	aup	xje	1965-12-19 03:45:50.293610000	1987-09-25 08:35:27.030011000	-14124993751133191541876413596201803.768
+122925564	214808438	hcn	qcw	sny	1964-09-11 07:39:03.273967000	1939-01-24 15:34:19.169731000	-16893828220783473973808809854176305.676
+1460339126	1646857428	rmx	lgr	fau	1992-10-05 14:07:23.838855000	1963-06-26 10:52:34.771336000	15726300901745541732021611566508197.501
+1540949636	-1216257492	yto	vqc	kfa	1978-03-03 22:19:07.720991000	1963-05-18 08:36:43.510414000	-21198632521163279342926136633361478.986
+-1308795736	-1216912862	doj	dxs	oau	1940-05-07 13:51:21.846309000	1987-10-21 10:06:01.203959000	18573020933480211312983939669051246.751
+28814432	215332734	nyt	hsn	bvh	1968-05-28 14:08:40.319258000	1939-02-19 17:04:53.343679000	-12614985800843200035489024435136990.932
+109424942	214677364	upb	rdx	gbm	1953-10-24 22:20:24.201394000	1939-01-11 14:49:02.082757000	14268216633100405943866612889866073.442
+1554581332	1646464206	pkf	ytf	bvq	1989-02-01 08:23:03.880538000	1963-06-13 10:07:17.684362000	11488997490508518082977892323702256.461
+-1402906868	-1216388566	aup	toj	gbv	1944-01-22 20:20:58.891601000	1987-11-16 11:36:35.377907000	1584215420861831284842812123605048.279
+-1322296358	-1217043936	qcw	eyt	cni	1966-01-01 04:31:07.380583000	1987-10-08 09:20:44.116985000	-14083454742429941252600347419968430.064
+123056638	214939512	lgr	ugb	wrd	1964-09-24 08:24:20.360940000	1939-02-06 16:19:36.256705000	-16852289212080223684532743677942931.972
+1460470200	1646988502	vqc	pkv	cwi	1992-10-18 14:52:40.925829000	1963-07-09 11:37:51.858310000	15767839910448792021297677742741571.205
+1541080710	1646333132	dxs	aug	oje	1978-03-16 23:04:24.807965000	1963-05-31 09:22:00.597388000	-21157093512460029053650070457128105.282
+-1308664662	-1216781788	hsn	hcw	sey	1940-05-20 14:36:38.933283000	1987-11-03 10:51:18.290933000	18614559942183461602260005845284620.455
+28945506	215463808	rdx	lwr	xje	1968-06-10 14:53:57.406232000	1939-03-04 17:50:10.430653000	-12573446792139949746212958258903617.228
+109556016	214808438	ytf	vhc	kfq	1953-11-06 23:05:41.288368000	1939-01-24 15:34:19.169731000	14309755641803656233142679066099447.146
+1554712406	1646595280	toj	dxj	fau	1989-02-14 09:08:20.967512000	1963-06-26 10:52:34.771336000	11530536499211768372253958499935630.165
+-1402775794	-1216257492	eyt	xsn	kfa	1944-02-04 21:06:15.978575000	1987-11-29 12:21:52.464881000	1625754429565081574118878299838421.983
+-1322165284	-1216912862	ugb	idx	grm	1966-01-14 05:16:24.467557000	1987-10-21 10:06:01.203959000	-14041915733726690963324281243735056.36
+123187712	215070586	pkv	ykf	bvh	1964-10-07 09:09:37.447914000	1939-02-19 17:04:53.343679000	-16810750203376973395256677501709558.268
+1460601274	214677364	aug	toa	gbm	1992-10-31 15:37:58.012803000	1963-07-22 12:23:08.945284000	15809378919152042310573743918974944.909
+1541211784	1646464206	hcw	eyk	sni	1978-03-29 23:49:41.894939000	1963-06-13 10:07:17.684362000	-21115554503756778764374004280894731.578
+-1308533588	-1216650714	lwr	lgb	gbv	1940-06-02 15:21:56.020257000	1987-11-16 11:36:35.377907000	18656098950886711891536072021517994.159
+29076580	-1217043936	vhc	pbv	cni	1968-06-23 15:39:14.493206000	1939-03-17 18:35:27.517627000	-12531907783436699456936892082670243.524
+109687090	214939512	dxj	alg	oju	1953-11-19 23:50:58.375342000	1939-02-06 16:19:36.256705000	14351294650506906522418745242332820.85
+1554843480	1646726354	xsn	hcn	cwi	1989-02-27 09:53:38.054486000	1963-07-09 11:37:51.858310000	11572075507915018661530024676169003.869
+-1402644720	1646333132	idx	cwr	oje	1944-02-17 21:51:33.065548000	1987-12-12 13:07:09.551855000	1667293438268331863394944476071795.687
+-1322034210	-1216781788	ykf	mhc	kvq	1966-01-27 06:01:41.554531000	1987-11-03 10:51:18.290933000	-14000376725023440674048215067501682.656
+123318786	215201660	toa	doj	xje	1964-10-20 09:54:54.534888000	1939-03-04 17:50:10.430653000	-16769211194673723105980611325476184.564
+1460732348	214808438	eyk	xse	kfq	1992-11-13 16:23:15.099777000	1938-12-29 14:03:44.995783000	15850917927855292599849810095208318.613
+1541342858	1646595280	lgb	ido	wrm	1978-04-12 00:34:58.981913000	1963-06-26 10:52:34.771336000	-21074015495053528475097938104661357.874
+-1308402514	-1216519640	pbv	pkf	kfa	1940-06-15 16:07:13.107231000	1987-11-29 12:21:52.464881000	18697637959589962180812138197751367.863
+29207654	-1216912862	alg	tfa	grm	1968-07-06 16:24:31.580180000	1987-09-25 08:35:27.030011000	-12490368774733449167660825906436869.82
+109818164	215070586	hcn	epk	sny	1953-12-03 00:36:15.462316000	1939-02-19 17:04:53.343679000	14392833659210156811694811418566194.554
+1554974554	1646857428	cwr	lgr	gbm	1989-03-12 10:38:55.141460000	1963-07-22 12:23:08.945284000	11613614516618268950806090852402377.573
+-1402513646	1646464206	mhc	gbv	sni	1944-03-01 22:36:50.152522000	1963-05-18 08:36:43.510414000	1708832446971582152671010652305169.391
+-1321903136	-1216650714	doj	qlg	oau	1966-02-09 06:46:58.641505000	1987-11-16 11:36:35.377907000	-13958837716320190384772148891268308.952
+123449860	215332734	xse	hsn	cni	1964-11-02 10:40:11.621862000	1939-03-17 18:35:27.517627000	-16727672185970472816704545149242810.86
+1460863422	214939512	ido	cwi	oju	1992-11-26 17:08:32.186751000	1939-01-11 14:49:02.082757000	15892456936558542889125876271441692.317
+1541473932	1646726354	pkf	mhs	bvq	1978-04-25 01:20:16.068887000	1963-07-09 11:37:51.858310000	-21032476486350278185821871928427984.17
+-1308271440	-1216388566	tfa	toj	oje	1940-06-28 16:52:30.194205000	1987-12-12 13:07:09.551855000	18739176968293212470088204373984741.567
+29338728	-1216781788	epk	xje	kvq	1968-07-19 17:09:48.667154000	1987-10-08 09:20:44.116985000	-12448829766030198878384759730203496.116
+109949238	215201660	lgr	ito	wrd	1953-12-16 01:21:32.549290000	1939-03-04 17:50:10.430653000	14434372667913407100970877594799568.258
+1555105628	1646988502	gbv	pkv	kfq	1989-03-25 11:24:12.228434000	1938-12-29 14:03:44.995783000	11655153525321519240082157028635751.277
+-1402382572	1646595280	qlg	kfa	wrm	1944-03-14 23:22:07.239496000	1963-05-31 09:22:00.597388000	1750371455674832441947076828538543.095
+-1321772062	\N	hsn	upk	sey	1966-02-22 07:32:15.728479000	1987-11-29 12:21:52.464881000	-13917298707616940095496082715034935.248
+123580934	-1216519640	cwi	lwr	grm	1964-11-15 11:25:28.708836000	1987-09-25 08:35:27.030011000	-16686133177267222527428478973009437.156
+1460994496	215463808	mhs	gbm	sny	1992-12-09 17:53:49.273725000	1939-01-24 15:34:19.169731000	15933995945261793178401942447675066.021
+1541605006	215070586	toj	qlw	fau	1978-05-08 02:05:33.155861000	1963-07-22 12:23:08.945284000	-20990937477647027896545805752194610.466
+-1308140366	1646857428	xje	xsn	sni	1940-07-11 17:37:47.281179000	1963-05-18 08:36:43.510414000	18780715976996462759364270550218115.271
+29469802	-1216257492	ito	cni	oau	1968-08-01 17:55:05.754128000	1987-10-21 10:06:01.203959000	-12407290757326948589108693553970122.412
+110080312	-1216650714	pkv	mxs	bvh	1953-12-29 02:06:49.636264000	1939-03-17 18:35:27.517627000	14475911676616657390246943771032941.962
+1555236702	215332734	kfa	toa	oju	1989-04-07 12:09:29.315407000	1939-01-11 14:49:02.082757000	11696692534024769529358223204869124.981
+-1402251498	214677364	upk	oje	bvq	1944-03-28 00:07:24.326470000	1963-06-13 10:07:17.684362000	1791910464378082731223143004771916.799
+-1321640988	1646726354	lwr	yto	gbv	1966-03-07 08:17:32.815453000	1987-12-12 13:07:09.551855000	-13875759698913689806220016538801561.544
+123712008	-1216388566	gbm	pbv	kvq	1964-11-28 12:10:45.795810000	1987-10-08 09:20:44.116985000	-16644594168563972238152412796776063.452
+1461125570	-1217043936	qlw	kfq	wrd	1992-12-22 18:39:06.360699000	1939-02-06 16:19:36.256705000	15975534953965043467678008623908439.725
+1541736080	215201660	xsn	upb	cwi	1978-05-21 02:50:50.242835000	1938-12-29 14:03:44.995783000	-20949398468943777607269739575961236.762
+-1308009292	1646988502	cni	cwr	wrm	1940-07-24 18:23:04.368153000	1963-05-31 09:22:00.597388000	18822254985699713048640336726451488.975
+29600876	1646333132	mxs	grm	sey	1968-08-14 18:40:22.841102000	1987-11-03 10:51:18.290933000	-12365751748623698299832627377736748.708
+110211386	-1216519640	toa	qcw	xje	1954-01-11 02:52:06.723238000	1987-09-25 08:35:27.030011000	14517450685319907679523009947266315.666
+1555367776	215463808	oje	xse	sny	1989-04-20 12:54:46.402381000	1939-01-24 15:34:19.169731000	11738231542728019818634289381102498.685
+-1402120424	214808438	yto	sni	fau	1944-04-10 00:52:41.413444000	1963-06-26 10:52:34.771336000	1833449473081333020499209181005290.503
+-1321509914	1646857428	pbv	dxs	kfa	1966-03-20 09:02:49.902427000	1963-05-18 08:36:43.510414000	-13834220690210439516943950362568187.84
+123843082	-1216257492	kfq	tfa	oau	1964-12-11 12:56:02.882784000	1987-10-21 10:06:01.203959000	-16603055159860721948876346620542689.748
+1461256644	-1216912862	upb	oju	bvh	1993-01-04 19:24:23.447673000	1939-02-19 17:04:53.343679000	16017073962668293756954074800141813.429
+1541867154	215332734	cwr	ytf	gbm	1978-06-03 03:36:07.329809000	1939-01-11 14:49:02.082757000	-20907859460240527317993673399727863.058
+-1307878218	214677364	grm	gbv	bvq	1940-08-06 19:08:21.455127000	1963-06-13 10:07:17.684362000	18863793994402963337916402902684862.679
+29731950	1646464206	qcw	kvq	gbv	1968-08-27 19:25:39.928076000	1987-11-16 11:36:35.377907000	-12324212739920448010556561201503375.004
+110342460	-1216388566	xse	ugb	cni	1954-01-24 03:37:23.810212000	1987-10-08 09:20:44.116985000	14558989694023157968799076123499689.37
+1555498850	-1217043936	sni	cwi	wrd	1989-05-03 13:40:03.489355000	1939-02-06 16:19:36.256705000	11779770551431270107910355557335872.389
+-1401989350	214939512	dxs	wrm	cwi	1944-04-23 01:37:58.500418000	1963-07-09 11:37:51.858310000	1874988481784583309775275357238664.207
+-1321378840	1646988502	tfa	hcw	oje	1966-04-02 09:48:06.989401000	1963-05-31 09:22:00.597388000	-13792681681507189227667884186334814.136
+123974156	1646333132	oju	xje	sey	1964-12-24 13:41:19.969758000	1987-11-03 10:51:18.290933000	-16561516151157471659600280444309316.044
+1461387718	-1216781788	ytf	sny	xje	1993-01-17 20:09:40.534647000	1939-03-04 17:50:10.430653000	16058612971371544046230140976375187.133
+\N	215463808	gbv	dxj	kfq	1978-06-16 04:21:24.416783000	1939-01-24 15:34:19.169731000	-20866320451537277028717607223494489.354
+1541998228	214808438	kvq	kfa	fau	1940-08-19 19:53:38.542101000	1963-06-26 10:52:34.771336000	18905333003106213627192469078918236.383
+-1307747144	1646595280	ugb	oau	kfa	1968-09-09 20:10:57.015050000	1987-11-29 12:21:52.464881000	-12282673731217197721280495025270001.3
+29863024	-1216257492	cwi	ykf	grm	1954-02-06 04:22:40.897186000	1987-10-21 10:06:01.203959000	14600528702726408258075142299733063.074
+110473534	-1216912862	wrm	gbm	bvh	1989-05-16 14:25:20.576329000	1939-02-19 17:04:53.343679000	11821309560134520397186421733569246.093
+1555629924	215070586	hcw	bvq	gbm	1944-05-06 02:23:15.587392000	1963-07-22 12:23:08.945284000	1916527490487833599051341533472037.911
+-1401858276	214677364	xje	lgb	sni	1966-04-15 10:33:24.076375000	1963-06-13 10:07:17.684362000	-13751142672803938938391818010101440.432
+-1321247766	1646464206	sny	cni	gbv	1965-01-06 14:26:37.056732000	1987-11-16 11:36:35.377907000	-16519977142454221370324214268075942.34
+124105230	-1216650714	dxj	wrd	cni	1993-01-30 20:54:57.621621000	1939-03-17 18:35:27.517627000	16100151980074794335506207152608560.837
+1461518792	-1217043936	kfa	hcn	oju	1978-06-29 05:06:41.503757000	1939-02-06 16:19:36.256705000	-20824781442834026739441541047261115.65
+1542129302	214939512	oau	oje	cwi	1940-09-01 20:38:55.629075000	1963-07-09 11:37:51.858310000	18946872011809463916468535255151610.087
+-1307616070	1646726354	ykf	sey	oje	1968-09-22 20:56:14.102024000	1987-12-12 13:07:09.551855000	-12241134722513947432004428849036627.596
+29994098	1646333132	gbm	doj	kvq	1954-02-19 05:07:57.984160000	1987-11-03 10:51:18.290933000	14642067711429658547351208475966436.778
+110604608	-1216781788	bvq	kfq	xje	1989-05-29 15:10:37.663303000	1939-03-04 17:50:10.430653000	11862848568837770686462487909802619.797
+1555760998	215201660	lgb	fau	kfq	1944-05-19 03:08:32.674366000	1938-12-29 14:03:44.995783000	1958066499191083888327407709705411.615
+-1401727202	214808438	cni	pkf	wrm	1966-04-28 11:18:41.163349000	1963-06-26 10:52:34.771336000	-13709603664100688649115751833868066.728
+-1321116692	1646595280	wrd	grm	kfa	1965-01-19 15:11:54.143706000	1987-11-29 12:21:52.464881000	-16478438133750971081048148091842568.636
+124236304	-1216519640	hcn	bvh	grm	1993-02-12 21:40:14.708595000	1987-09-25 08:35:27.030011000	16141690988778044624782273328841934.541
+1461649866	-1216912862	oje	lgr	sny	1978-07-12 05:51:58.590731000	1939-02-19 17:04:53.343679000	-20783242434130776450165474871027741.946
+1542260376	215070586	sey	sni	gbm	1940-09-14 21:24:12.716049000	1963-07-22 12:23:08.945284000	18988411020512714205744601431384983.791
+-1307484996	1646857428	doj	wid	sni	1968-10-05 21:41:31.188998000	1963-05-18 08:36:43.510414000	-12199595713810697142728362672803253.892
+30125172	1646464206	kfq	hsn	oau	1954-03-04 05:53:15.071133000	1987-11-16 11:36:35.377907000	14683606720132908836627274652199810.482
+110735682	-1216650714	fau	oju	cni	1989-06-11 15:55:54.750277000	1939-03-17 18:35:27.517627000	11904387577541020975738554086035993.501
+1555892072	215332734	pkf	jey	oju	1944-06-01 03:53:49.761340000	1939-01-11 14:49:02.082757000	1999605507894334177603473885938785.319
+-1401596128	214939512	grm	\N	bvq	1966-05-11 12:03:58.250323000	1963-07-09 11:37:51.858310000	-13668064655397438359839685657634693.024
+-1320985618	1646726354	bvh	toj	oje	1965-02-01 15:57:11.230680000	1987-12-12 13:07:09.551855000	-16436899125047720791772081915609194.932
+124367378	-1216388566	lgr	kvq	kvq	1993-02-25 22:25:31.795569000	1987-10-08 09:20:44.116985000	16183229997481294914058339505075308.245
+1461780940	-1216781788	sni	fal	wrd	1978-07-25 06:37:15.677705000	1939-03-04 17:50:10.430653000	-20741703425427526160889408694794368.242
+1542391450	215201660	wid	pkv	kfq	1940-09-27 22:09:29.803023000	1938-12-29 14:03:44.995783000	19029950029215964495020667607618357.495
+-1307353922	1646988502	hsn	wrm	wrm	1968-10-18 22:26:48.275971000	1963-05-31 09:22:00.597388000	-12158056705107446853452296496569880.188
+30256246	1646595280	oju	bmh	sey	1954-03-17 06:38:32.158107000	1987-11-29 12:21:52.464881000	14725145728836159125903340828433184.186
+110866756	-1216519640	jey	lwr	grm	1989-06-24 16:41:11.837251000	1987-09-25 08:35:27.030011000	11945926586244271265014620262269367.205
+1556023146	215463808	toj	sny	sny	1944-06-14 04:39:06.848314000	1939-01-24 15:34:19.169731000	2041144516597584466879540062172159.023
+-1401465054	215070586	kvq	nid	fau	1966-05-24 12:49:15.337297000	1963-07-22 12:23:08.945284000	-13626525646694188070563619481401319.32
+-1320854544	1646857428	fal	xsn	sni	1965-02-14 16:42:28.317654000	1963-05-18 08:36:43.510414000	-16395360116344470502496015739375821.228
+124498452	-1216257492	pkv	oau	oau	1993-03-10 23:10:48.882543000	1987-10-21 10:06:01.203959000	16224769006184545203334405681308681.949
+1461912014	-1216650714	wrm	jep	bvh	1978-08-07 07:22:32.764679000	1939-03-17 18:35:27.517627000	-20700164416724275871613342518560994.538
+1542522524	215332734	bmh	toa	oju	1940-10-10 22:54:46.889997000	1939-01-11 14:49:02.082757000	19071489037919214784296733783851731.199
+-1307222848	214677364	lwr	bvq	bvq	1968-10-31 23:12:05.362945000	1963-06-13 10:07:17.684362000	-12116517696404196564176230320336506.484
+30387320	1646726354	sny	fql	\N	1954-03-30 07:23:49.245081000	1987-12-12 13:07:09.551855000	14766684737539409415179407004666557.89
+110997830	-1216388566	nid	pbv	gbv	1989-07-07 17:26:28.924225000	1987-10-08 09:20:44.116985000	11987465594947521554290686438502740.909
+\N	-1217043936	xsn	wrd	kvq	1944-06-27 05:24:23.935288000	1939-02-06 16:19:36.256705000	2082683525300834756155606238405532.727
+1556154220	215201660	oau	upb	wrd	1966-06-06 13:34:32.424271000	1938-12-29 14:03:44.995783000	-13584986637990937781287553305167945.616
+-1401333980	1646988502	jep	cwr	cwi	1965-02-27 17:27:45.404628000	1963-05-31 09:22:00.597388000	-16353821107641220213219949563142447.524
+-1320723470	1646333132	toa	sey	wrm	1993-03-23 23:56:05.969517000	1987-11-03 10:51:18.290933000	16266308014887795492610471857542055.653
+124629526	-1216519640	bvq	qcw	sey	1978-08-20 08:07:49.851653000	1987-09-25 08:35:27.030011000	-20658625408021025582337276342327620.834
+1462043088	215463808	fql	xse	xje	1940-10-23 23:40:03.976971000	1939-01-24 15:34:19.169731000	19113028046622465073572799960085104.903
+1542653598	214808438	pbv	fau	sny	1968-11-13 23:57:22.449919000	1963-06-26 10:52:34.771336000	-12074978687700946274900164144103132.78
+-1307091774	1646857428	wrd	dxs	fau	1954-04-12 08:09:06.332055000	1963-05-18 08:36:43.510414000	14808223746242659704455473180899931.594
+30518394	-1216257492	rmh	tfa	kfa	1989-07-20 18:11:46.011199000	1987-10-21 10:06:01.203959000	12029004603650771843566752614736114.613
+111128904	-1216912862	cwr	bvh	oau	1944-07-10 06:09:41.022262000	1939-02-19 17:04:53.343679000	2124222534004085045431672414638906.431
+1556285294	215332734	sey	ytf	bvh	1966-06-19 14:19:49.511245000	1939-01-11 14:49:02.082757000	-13543447629287687492011487128934571.912
+-1401202906	214677364	nit	gbv	gbm	1965-03-12 18:13:02.491602000	1963-06-13 10:07:17.684362000	-16312282098937969923943883386909073.82
+-1320592396	1646464206	xse	wid	bvq	1993-04-06 00:41:23.056491000	1987-11-16 11:36:35.377907000	16307847023591045781886538033775429.357
+124760600	-1216388566	fau	ugb	gbv	1978-09-02 08:53:06.938627000	1987-10-08 09:20:44.116985000	-20617086399317775293061210166094247.13
+1462174162	-1217043936	jup	cwi	cni	1940-11-06 00:25:21.063945000	1939-02-06 16:19:36.256705000	19154567055325715362848866136318478.607
+1542784672	214939512	tfa	jey	wrd	1968-11-27 00:42:39.536893000	1963-07-09 11:37:51.858310000	-12033439678997695985624097967869759.076
+-1306960700	1646988502	bvh	hcw	cwi	1954-04-25 08:54:23.419029000	1963-05-31 09:22:00.597388000	14849762754945909993731539357133305.298
+30649468	1646333132	vql	xje	oje	1989-08-02 18:57:03.098173000	1987-11-03 10:51:18.290933000	12070543612354022132842818790969488.317
+111259978	-1216781788	gbv	fal	sey	1944-07-23 06:54:58.109236000	1939-03-04 17:50:10.430653000	2165761542707335334707738590872280.135
+1556416368	215463808	wid	dxj	xje	1966-07-02 15:05:06.598219000	1939-01-24 15:34:19.169731000	-13501908620584437202735420952701198.208
+-1401071832	214808438	rmx	kfa	kfq	1965-03-25 18:58:19.578576000	1963-06-26 10:52:34.771336000	-16270743090234719634667817210675700.116
+-1320461322	1646595280	cwi	bmh	fau	1993-04-19 01:26:40.143465000	1987-11-29 12:21:52.464881000	16349386032294296071162604210008803.061
+124891674	-1216257492	jey	ykf	kfa	1978-09-15 09:38:24.025600000	1987-10-21 10:06:01.203959000	-20575547390614525003785143989860873.426
+1462305236	-1216912862	nyt	gbm	grm	1940-11-19 01:10:38.150918000	1939-02-19 17:04:53.343679000	19196106064028965652124932312551852.311
+1542915746	215070586	xje	nid	bvh	1968-12-10 01:27:56.623867000	1963-07-22 12:23:08.945284000	-11991900670294445696348031791636385.372
+-1306829626	214677364	fal	lgb	gbm	1954-05-08 09:39:40.506003000	1963-06-13 10:07:17.684362000	14891301763649160283007605533366679.002
+30780542	1646464206	aup	cni	sni	1989-08-15 19:42:20.185147000	1987-11-16 11:36:35.377907000	12112082621057272422118884967202862.021
+111391052	\N	kfa	jep	gbv	1944-08-05 07:40:15.196210000	1939-03-17 18:35:27.517627000	2207300551410585623983804767105653.839
+1556547442	-1216650714	bmh	hcn	cni	1966-07-15 15:50:23.685193000	1939-02-06 16:19:36.256705000	-13460369611881186913459354776467824.504
+-1400940758	-1217043936	vqc	oje	oju	1965-04-07 19:43:36.665550000	1963-07-09 11:37:51.858310000	-16229204081531469345391751034442326.412
+-1320330248	214939512	gbm	fql	cwi	1993-05-02 02:11:57.230438000	1987-12-12 13:07:09.551855000	16390925040997546360438670386242176.765
+125022748	1646726354	nid	doj	oje	1978-09-28 10:23:41.112574000	1987-11-03 10:51:18.290933000	-20534008381911274714509077813627499.722
+1462436310	1646333132	rdx	kfq	kvq	1940-12-02 01:55:55.237892000	1939-03-04 17:50:10.430653000	19237645072732215941400998488785226.015
+1543046820	-1216781788	cni	rmh	xje	1968-12-23 02:13:13.710841000	1938-12-29 14:03:44.995783000	-11950361661591195407071965615403011.668
+-1306698552	215201660	jep	pkf	kfq	1954-05-21 10:24:57.592977000	1963-06-26 10:52:34.771336000	14932840772352410572283671709600052.706
+30911616	214808438	eyt	grm	wrm	1989-08-28 20:27:37.272121000	1987-11-29 12:21:52.464881000	12153621629760522711394951143436235.725
+111522126	1646595280	oje	nit	kfa	1944-08-18 08:25:32.283184000	1987-09-25 08:35:27.030011000	2248839560113835913259870943339027.543
+1556678516	-1216519640	fql	lgr	grm	1966-07-28 16:35:40.772167000	1939-02-19 17:04:53.343679000	-13418830603177936624183288600234450.8
+-1400809684	-1216912862	aug	sni	sny	1965-04-20 20:28:53.752524000	1963-07-22 12:23:08.945284000	-16187665072828219056115684858208952.708
+-1320199174	215070586	kfq	jup	gbm	1993-05-15 02:57:14.317412000	1963-05-18 08:36:43.510414000	16432464049700796649714736562475550.469
+125153822	1646857428	rmh	hsn	sni	1978-10-11 11:08:58.199548000	1987-11-16 11:36:35.377907000	-20492469373208024425233011637394126.018
+1462567384	1646464206	vhc	oju	oau	1940-12-15 02:41:12.324866000	1939-03-17 18:35:27.517627000	19279184081435466230677064665018599.719
+1543177894	-1216650714	grm	vql	cni	1969-01-05 02:58:30.797815000	1939-01-11 14:49:02.082757000	-11908822652887945117795899439169637.964
+-1306567478	215332734	nit	toj	oju	1954-06-03 11:10:14.679951000	1963-07-09 11:37:51.858310000	14974379781055660861559737885833426.41
+31042690	214939512	idx	kvq	bvq	1989-09-10 21:12:54.359095000	1987-12-12 13:07:09.551855000	12195160638463773000671017319669609.429
+111653200	1646726354	sni	rmx	oje	1944-08-31 09:10:49.370158000	1987-10-08 09:20:44.116985000	2290378568817086202535937119572401.247
+1556809590	-1216388566	jup	pkv	kvq	1966-08-10 17:20:57.859141000	1939-03-04 17:50:10.430653000	-13377291594474686334907222424001077.096
+-1400678610	-1216781788	eyk	wrm	wrd	1965-05-03 21:14:10.839498000	1938-12-29 14:03:44.995783000	-16146126064124968766839618681975579.004
+-1320068100	215201660	oju	nyt	kfq	1993-05-28 03:42:31.404386000	1963-05-31 09:22:00.597388000	16474003058404046938990802738708924.173
+125284896	1646988502	vql	lwr	wrm	1978-10-24 11:54:15.286522000	1987-11-29 12:21:52.464881000	-20450930364504774135956945461160752.314
+1462698458	1646595280	alg	sny	sey	1940-12-28 03:26:29.411840000	1987-09-25 08:35:27.030011000	19320723090138716519953130841251973.423
+1543308968	-1216519640	kvq	aup	grm	1969-01-18 03:43:47.884789000	1939-01-24 15:34:19.169731000	-11867283644184694828519833262936264.26
+-1306436404	215463808	rmx	xsn	sny	1954-06-16 11:55:31.766925000	1963-07-22 12:23:08.945284000	15015918789758911150835804062066800.114
+31173764	215070586	mhc	oau	fau	1989-09-23 21:58:11.446069000	1963-05-18 08:36:43.510414000	12236699647167023289947083495902983.133
+111784274	1646857428	wrm	vqc	sni	1944-09-13 09:56:06.457132000	1987-10-21 10:06:01.203959000	2331917577520336491812003295805774.951
+1556940664	-1216257492	nyt	toa	oau	1966-08-23 18:06:14.946115000	1939-03-17 18:35:27.517627000	-13335752585771436045631156247767703.392
+-1400547536	-1216650714	ido	bvq	bvh	1965-05-16 21:59:27.926472000	1939-01-11 14:49:02.082757000	-16104587055421718477563552505742205.3
+-1319937026	215332734	sny	rdx	oju	1993-06-10 04:27:48.491360000	1963-06-13 10:07:17.684362000	16515542067107297228266868914942297.877
+125415970	214677364	aup	pbv	bvq	1978-11-06 12:39:32.373496000	1987-12-12 13:07:09.551855000	-20409391355801523846680879284927378.61
+1462829532	1646726354	epk	wrd	gbv	1941-01-10 04:11:46.498814000	1987-10-08 09:20:44.116985000	19362262098841966809229197017485347.127
+1543440042	-1216388566	oau	eyt	kvq	1969-01-31 04:29:04.971763000	1939-02-06 16:19:36.256705000	-11825744635481444539243767086702890.556
+-1306305330	-1217043936	vqc	cwr	wrd	1954-06-29 12:40:48.853899000	1938-12-29 14:03:44.995783000	15057457798462161440111870238300173.818
+31304838	215201660	qlg	sey	cwi	1989-10-06 22:43:28.533043000	1963-05-31 09:22:00.597388000	12278238655870273579223149672136356.837
+111915348	1646988502	bvq	aug	wrm	1944-09-26 10:41:23.544106000	1987-11-03 10:51:18.290933000	2373456586223586781088069472039148.655
+1557071738	1646333132	rdx	xse	sey	1966-09-05 18:51:32.033089000	1987-09-25 08:35:27.030011000	-13294213577068185756355090071534329.688
+-1400416462	-1216519640	mhs	fau	xje	1965-05-29 22:44:45.013446000	1939-01-24 15:34:19.169731000	-16063048046718468188287486329508831.596
+-1319805952	215463808	wrd	vhc	sny	1993-06-23 05:13:05.578334000	1963-06-26 10:52:34.771336000	16557081075810547517542935091175671.581
+125547044	214808438	eyt	tfa	fau	1978-11-19 13:24:49.460470000	1963-05-18 08:36:43.510414000	-20367852347098273557404813108694004.906
+1462960606	1646857428	ito	bvh	kfa	1941-01-23 04:57:03.585788000	1987-10-21 10:06:01.203959000	19403801107545217098505263193718720.831
+\N	-1216257492	sey	idx	oau	1969-02-13 05:14:22.058737000	1939-02-19 17:04:53.343679000	-11784205626778194249967700910469516.852
+1543571116	-1216912862	aug	gbv	bvh	1954-07-12 13:26:05.940873000	1939-01-11 14:49:02.082757000	15098996807165411729387936414533547.522
+-1306174256	215332734	upk	wid	gbm	1989-10-19 23:28:45.620017000	1963-06-13 10:07:17.684362000	12319777664573523868499215848369730.541
+31435912	214677364	fau	\N	bvq	1944-10-09 11:26:40.631080000	1987-11-16 11:36:35.377907000	2414995594926837070364135648272522.359
+112046422	1646464206	vhc	eyk	gbv	1966-09-18 19:36:49.120063000	1987-10-08 09:20:44.116985000	-13252674568364935467079023895300955.984
+1557202812	-1216388566	qlw	cwi	cni	1965-06-11 23:30:02.100420000	1939-02-06 16:19:36.256705000	-16021509038015217899011420153275457.892
+\N	-1217043936	bvh	jey	wrd	1993-07-06 05:58:22.665308000	1963-07-09 11:37:51.858310000	16598620084513797806819001267409045.285
+-1400285388	214939512	idx	alg	cwi	1978-12-02 14:10:06.547444000	1963-05-31 09:22:00.597388000	-20326313338395023268128746932460631.202
+-1319674878	1646988502	mxs	xje	oje	1941-02-05 05:42:20.672762000	1987-11-03 10:51:18.290933000	19445340116248467387781329369952094.535
+125678118	1646333132	wid	fal	sey	1969-02-26 05:59:39.145711000	1939-03-04 17:50:10.430653000	-11742666618074943960691634734236143.148
+1463091680	-1216781788	eyk	mhc	xje	1954-07-25 14:11:23.027847000	1939-01-24 15:34:19.169731000	15140535815868662018664002590766921.226
+1543702190	215463808	yto	kfa	kfq	1989-11-02 00:14:02.706991000	1963-06-26 10:52:34.771336000	12361316673276774157775282024603104.245
+\N	214808438	jey	bmh	fau	1944-10-22 12:11:57.718054000	1987-11-29 12:21:52.464881000	2456534603630087359640201824505896.063
+-1306043182	1646595280	alg	ido	kfa	1966-10-01 20:22:06.207037000	1987-10-21 10:06:01.203959000	-13211135559661685177802957719067582.28
+31566986	-1216257492	upb	gbm	grm	1965-06-25 00:15:19.187394000	1939-02-19 17:04:53.343679000	-15979970029311967609735353977042084.188
+112177496	-1216912862	fal	nid	bvh	1993-07-19 06:43:39.752282000	1963-07-22 12:23:08.945284000	16640159093217048096095067443642418.989
+1557333886	215070586	mhc	epk	gbm	1978-12-15 14:55:23.634418000	1963-06-13 10:07:17.684362000	-20284774329691772978852680756227257.498
+-1400154314	214677364	qcw	cni	sni	1941-02-18 06:27:37.759736000	1987-11-16 11:36:35.377907000	19486879124951717677057395546185468.239
+\N	1646464206	bmh	jep	gbv	1969-03-11 06:44:56.232685000	1939-03-17 18:35:27.517627000	-11701127609371693671415568558002769.444
+-1319543804	-1216650714	ido	qlg	cni	1954-08-07 14:56:40.114821000	1939-02-06 16:19:36.256705000	15182074824571912307940068767000294.93
+125809192	-1217043936	dxs	oje	oju	1989-11-15 00:59:19.793965000	1963-07-09 11:37:51.858310000	12402855681980024447051348200836477.949
+1463222754	214939512	nid	fql	cwi	1944-11-04 12:57:14.805028000	1987-12-12 13:07:09.551855000	2498073612333337648916268000739269.767
+1543833264	1646726354	epk	mhs	oje	1966-10-14 21:07:23.294011000	\N	-13169596550958434888526891542834208.576
+-1305912108	1646333132	ytf	kfq	kvq	1965-07-08 01:00:36.274368000	1987-11-03 10:51:18.290933000	-15938431020608717320459287800808710.484
+\N	-1216781788	jep	rmh	xje	1993-08-01 07:28:56.839256000	1939-03-04 17:50:10.430653000	16681698101920298385371133619875792.693
+31698060	215201660	qlg	ito	kfq	1978-12-28 15:40:40.721392000	1938-12-29 14:03:44.995783000	-20243235320988522689576614579993883.794
+112308570	214808438	ugb	grm	wrm	1941-03-03 07:12:54.846710000	1963-06-26 10:52:34.771336000	19528418133654967966333461722418841.943
+1557464960	1646595280	fql	nit	kfa	1969-03-24 07:30:13.319659000	1987-11-29 12:21:52.464881000	-11659588600668443382139502381769395.74
+-1400023240	-1216519640	mhs	upk	grm	1954-08-20 15:41:57.201795000	1987-09-25 08:35:27.030011000	15223613833275162597216134943233668.634
+-1319412730	-1216912862	hcw	sni	sny	1989-11-28 01:44:36.880939000	1939-02-19 17:04:53.343679000	12444394690683274736327414377069851.653
+\N	215070586	rmh	jup	gbm	1944-11-17 13:42:31.892002000	1963-07-22 12:23:08.945284000	2539612621036587938192334176972643.471
+125940266	1646857428	ito	qlw	sni	1966-10-27 21:52:40.380984000	1963-05-18 08:36:43.510414000	-13128057542255184599250825366600834.872
+1463353828	1646464206	dxj	oju	oau	1965-07-21 01:45:53.361341000	1987-11-16 11:36:35.377907000	-15896892011905467031183221624575336.78
+1543964338	-1216650714	nit	vql	cni	1993-08-14 08:14:13.926230000	1939-03-17 18:35:27.517627000	16723237110623548674647199796109166.397
+-1305781034	215332734	upk	mxs	oju	1979-01-10 16:25:57.808366000	1939-01-11 14:49:02.082757000	-20201696312285272400300548403760510.09
+31829134	214939512	ykf	kvq	bvq	1941-03-16 07:58:11.933684000	1963-07-09 11:37:51.858310000	19569957142358218255609527898652215.647
+\N	1646726354	jup	rmx	oje	1969-04-06 08:15:30.406633000	1987-12-12 13:07:09.551855000	-11618049591965193092863436205536022.036
+112439644	-1216388566	qlw	yto	kvq	1954-09-02 16:27:14.288769000	1987-10-08 09:20:44.116985000	15265152841978412886492201119467042.338
+1557596034	-1216781788	lgb	wrm	wrd	1989-12-11 02:29:53.967913000	1939-03-04 17:50:10.430653000	12485933699386525025603480553303225.357
+-1399892166	215201660	vql	nyt	kfq	1944-11-30 14:27:48.978976000	1938-12-29 14:03:44.995783000	2581151629739838227468400353206017.175
+-1319281656	1646988502	mxs	upb	wrm	1966-11-09 22:37:57.467958000	1963-05-31 09:22:00.597388000	-13086518533551934309974759190367461.168
+126071340	1646595280	hcn	sny	sey	1965-08-03 02:31:10.448315000	1987-11-29 12:21:52.464881000	-15855353003202216741907155448341963.076
+\N	-1216519640	rmx	aup	grm	1993-08-27 08:59:31.013204000	1987-09-25 08:35:27.030011000	16764776119326798963923265972342540.101
+1463484902	215463808	yto	qcw	sny	1979-01-23 17:11:14.895340000	1939-01-24 15:34:19.169731000	-20160157303582022111024482227527136.386
+1544095412	215070586	doj	oau	fau	1941-03-29 08:43:29.020658000	1963-07-22 12:23:08.945284000	19611496151061468544885594074885589.351
+-1305649960	1646857428	nyt	vqc	sni	1969-04-19 09:00:47.493607000	1963-05-18 08:36:43.510414000	-11576510583261942803587370029302648.332
+31960208	-1216257492	upb	dxs	oau	1954-09-15 17:12:31.375743000	1987-10-21 10:06:01.203959000	15306691850681663175768267295700416.042
+112570718	-1216650714	pkf	bvq	bvh	1989-12-24 03:15:11.054887000	1939-03-17 18:35:27.517627000	12527472708089775314879546729536599.061
+\N	215332734	aup	rdx	oju	1944-12-13 15:13:06.065949000	1939-01-11 14:49:02.082757000	2622690638443088516744466529439390.879
+1557727108	214677364	qcw	ytf	bvq	1966-11-22 23:23:14.554932000	1963-06-13 10:07:17.684362000	-13044979524848684020698693014134087.464
+-1399761092	1646726354	lgr	wrd	gbv	1965-08-16 03:16:27.535289000	1987-12-12 13:07:09.551855000	-15813813994498966452631089272108589.372
+-1319150582	-1216388566	vqc	eyt	kvq	1993-09-09 09:44:48.100178000	1987-10-08 09:20:44.116985000	16806315128030049253199332148575913.805
+126202414	-1217043936	dxs	ugb	wrd	1979-02-05 17:56:31.982314000	1939-02-06 16:19:36.256705000	-20118618294878771821748416051293762.682
+1463615976	215201660	hsn	sey	cwi	1941-04-11 09:28:46.107632000	1938-12-29 14:03:44.995783000	19653035159764718834161660251118963.055
+\N	1646988502	rdx	aug	wrm	1969-05-02 09:46:04.580581000	1963-05-31 09:22:00.597388000	-11534971574558692514311303853069274.628
+1544226486	1646333132	ytf	hcw	sey	1954-09-28 17:57:48.462717000	1987-11-03 10:51:18.290933000	15348230859384913465044333471933789.746
+-1305518886	-1216519640	toj	fau	xje	1990-01-06 04:00:28.141861000	1987-09-25 08:35:27.030011000	12569011716793025604155612905769972.765
+32091282	215463808	eyt	vhc	sny	1944-12-26 15:58:23.152923000	1939-01-24 15:34:19.169731000	2664229647146338806020532705672764.583
+112701792	214808438	ugb	dxj	fau	1966-12-06 00:08:31.641906000	1963-06-26 10:52:34.771336000	-13003440516145433731422626837900713.76
+1557858182	1646857428	pkv	bvh	kfa	1965-08-29 04:01:44.622263000	1963-05-18 08:36:43.510414000	-15772274985795716163355023095875215.668
+\N	-1216257492	aug	idx	oau	1993-09-22 10:30:05.187152000	1987-10-21 10:06:01.203959000	16847854136733299542475398324809287.509
+-1399630018	-1216912862	hcw	ykf	bvh	1979-02-18 18:41:49.069288000	1939-02-19 17:04:53.343679000	-20077079286175521532472349875060388.978
+-1319019508	215332734	lwr	wid	gbm	1941-04-24 10:14:03.194606000	1939-01-11 14:49:02.082757000	19694574168467969123437726427352336.759
+126333488	214677364	vhc	eyk	bvq	1969-05-15 10:31:21.667555000	1963-06-13 10:07:17.684362000	-11493432565855442225035237676835900.924
+1463747050	1646464206	dxj	lgb	gbv	1954-10-11 18:43:05.549691000	1987-11-16 11:36:35.377907000	15389769868088163754320399648167163.45
+1544357560	-1216388566	xsn	jey	cni	1990-01-19 04:45:45.228835000	1987-10-08 09:20:44.116985000	12610550725496275893431679082003346.469
+\N	-1217043936	idx	alg	wrd	1945-01-08 16:43:40.239897000	1939-02-06 16:19:36.256705000	2705768655849589095296598881906138.287
+-1305387812	214939512	ykf	hcn	cwi	1966-12-19 00:53:48.728880000	1963-07-09 11:37:51.858310000	-12961901507442183442146560661667340.056
+32222356	1646988502	\N	fal	oje	1965-09-11 04:47:01.709237000	1963-05-31 09:22:00.597388000	-15730735977092465874078956919641841.964
+112832866	1646333132	toa	mhc	sey	1993-10-05 11:15:22.274126000	1987-11-03 10:51:18.290933000	16889393145436549831751464501042661.213
+1557989256	-1216781788	eyk	doj	xje	1979-03-03 19:27:06.156262000	1939-03-04 17:50:10.430653000	-20035540277472271243196283698827015.274
+-1399498944	215463808	lgb	bmh	kfq	1941-05-07 10:59:20.281580000	1939-01-24 15:34:19.169731000	19736113177171219412713792603585710.463
+\N	214808438	pbv	ido	fau	1969-05-28 11:16:38.754529000	1963-06-26 10:52:34.771336000	-11451893557152191935759171500602527.22
+-1318888434	1646595280	alg	pkf	kfa	1954-10-24 19:28:22.636665000	1987-11-29 12:21:52.464881000	15431308876791414043596465824400537.154
+126464562	-1216257492	hcn	nid	grm	1990-02-01 05:31:02.315808000	1987-10-21 10:06:01.203959000	12652089734199526182707745258236720.173
+1463878124	-1216912862	cwr	epk	bvh	1945-01-21 17:28:57.326871000	1939-02-19 17:04:53.343679000	2747307664552839384572665058139511.991
+1544488634	215070586	mhc	lgr	gbm	1967-01-01 01:39:05.815854000	1963-07-22 12:23:08.945284000	-12920362498738933152870494485433966.352
+-1305256738	214677364	doj	jep	sni	1965-09-24 05:32:18.796211000	1963-06-13 10:07:17.684362000	-15689196968389215584802890743408468.26
+\N	1646464206	xse	qlg	gbv	1993-10-18 12:00:39.361100000	1987-11-16 11:36:35.377907000	16930932154139800121027530677276034.917
+32353430	-1216650714	ido	hsn	cni	1979-03-16 20:12:23.243236000	1939-03-17 18:35:27.517627000	-19994001268769020953920217522593641.57
+112963940	-1217043936	pkf	fql	oju	1941-05-20 11:44:37.368554000	1939-02-06 16:19:36.256705000	19777652185874469701989858779819084.167
+1558120330	214939512	tfa	mhs	cwi	1969-06-10 12:01:55.841503000	1963-07-09 11:37:51.858310000	-11410354548448941646483105324369153.516
+-1399367870	1646726354	epk	toj	oje	1954-11-06 20:13:39.723639000	1987-12-12 13:07:09.551855000	15472847885494664332872532000633910.858
+-1318757360	1646333132	lgr	rmh	kvq	1990-02-14 06:16:19.402782000	1987-11-03 10:51:18.290933000	12693628742902776471983811434470093.877
+\N	-1216781788	gbv	ito	xje	1945-02-03 18:14:14.413845000	1939-03-04 17:50:10.430653000	2788846673256089673848731234372885.695
+126595636	215201660	qlg	pkv	kfq	1967-01-14 02:24:22.902828000	1938-12-29 14:03:44.995783000	-12878823490035682863594428309200592.648
+1464009198	214808438	hsn	nit	wrm	1965-10-07 06:17:35.883185000	1963-06-26 10:52:34.771336000	-15647657959685965295526824567175094.556
+1544619708	1646595280	cwi	upk	kfa	1993-10-31 12:45:56.448074000	1987-11-29 12:21:52.464881000	16972471162843050410303596853509408.621
+-1305125664	-1216519640	mhs	lwr	grm	1979-03-29 20:57:40.330210000	1987-09-25 08:35:27.030011000	-19952462260065770664644151346360267.866
+32484504	-1216912862	toj	jup	sny	1941-06-02 12:29:54.455528000	1939-02-19 17:04:53.343679000	19819191194577719991265924956052457.871
+\N	215070586	xje	qlw	gbm	1969-06-23 12:47:12.928477000	1963-07-22 12:23:08.945284000	-11368815539745691357207039148135779.812
+113095014	1646857428	ito	xsn	sni	1954-11-19 20:58:56.810613000	1963-05-18 08:36:43.510414000	15514386894197914622148598176867284.562
+1558251404	1646464206	pkv	vql	oau	1990-02-27 07:01:36.489756000	1987-11-16 11:36:35.377907000	12735167751606026761259877610703467.581
+-1399236796	-1216650714	kfa	mxs	cni	1945-02-16 18:59:31.500819000	1939-03-17 18:35:27.517627000	2830385681959339963124797410606259.399
+-1318626286	215332734	upk	toa	oju	1967-01-27 03:09:39.989802000	1939-01-11 14:49:02.082757000	-12837284481332432574318362132967218.944
+126726710	214939512	lwr	rmx	bvq	1965-10-20 07:02:52.970159000	1963-07-09 11:37:51.858310000	-15606118950982715006250758390941720.852
+\N	1646726354	gbm	yto	oje	1993-11-13 13:31:13.535048000	1987-12-12 13:07:09.551855000	17014010171546300699579663029742782.325
+1464140272	-1216388566	qlw	pbv	kvq	1979-04-11 21:42:57.417184000	1987-10-08 09:20:44.116985000	-19910923251362520375368085170126894.162
+1544750782	-1216781788	xsn	nyt	wrd	1941-06-15 13:15:11.542502000	1939-03-04 17:50:10.430653000	19860730203280970280541991132285831.575
+-1304994590	215201660	cni	upb	kfq	1969-07-06 13:32:30.015451000	1938-12-29 14:03:44.995783000	-11327276531042441067930972971902406.108
+32615578	1646988502	mxs	cwr	wrm	1954-12-02 21:44:13.897587000	1963-05-31 09:22:00.597388000	15555925902901164911424664353100658.266
+113226088	1646595280	toa	aup	sey	1990-03-12 07:46:53.576730000	1987-11-29 12:21:52.464881000	12776706760309277050535943786936841.285
+\N	-1216519640	oje	qcw	grm	1945-03-01 19:44:48.587793000	1987-09-25 08:35:27.030011000	2871924690662590252400863586839633.103
+1558382478	215463808	yto	xse	sny	1967-02-09 03:54:57.076776000	1939-01-24 15:34:19.169731000	-12795745472629182285042295956733845.24
+-1399105722	215070586	pbv	vqc	fau	1965-11-02 07:48:10.057133000	1963-07-22 12:23:08.945284000	-15564579942279464716974692214708347.148
+-1318495212	1646857428	kfq	dxs	sni	1993-11-26 14:16:30.622022000	1963-05-18 08:36:43.510414000	17055549180249550988855729205976156.029
+126857784	-1216257492	upb	tfa	oau	1979-04-24 22:28:14.504158000	1987-10-21 10:06:01.203959000	-19869384242659270086092018993893520.458
+1464271346	-1216650714	cwr	rdx	bvh	1941-06-28 14:00:28.629476000	1939-03-17 18:35:27.517627000	19902269211984220569818057308519205.279
+\N	215332734	grm	ytf	oju	1969-07-19 14:17:47.102425000	1939-01-11 14:49:02.082757000	-11285737522339190778654906795669032.404
+1544881856	214677364	qcw	gbv	bvq	1954-12-15 22:29:30.984561000	1963-06-13 10:07:17.684362000	15597464911604415200700730529334031.97
+-1304863516	1646726354	xse	eyt	gbv	1990-03-25 08:32:10.663704000	1987-12-12 13:07:09.551855000	12818245769012527339812009963170214.989
+32746652	-1216388566	sni	ugb	kvq	1945-03-14 20:30:05.674767000	1987-10-08 09:20:44.116985000	2913463699365840541676929763073006.807
+113357162	-1217043936	dxs	cwi	wrd	1967-02-22 04:40:14.163750000	1939-02-06 16:19:36.256705000	-12754206463925931995766229780500471.536
+1558513552	215201660	tfa	aug	cwi	1965-11-15 08:33:27.144107000	1938-12-29 14:03:44.995783000	-15523040933576214427698626038474973.444
+\N	1646988502	oju	hcw	wrm	1993-12-09 15:01:47.708996000	1963-05-31 09:22:00.597388000	17097088188952801278131795382209529.733
+-1398974648	1646333132	ytf	xje	sey	1979-05-07 23:13:31.591132000	1987-11-03 10:51:18.290933000	-19827845233956019796815952817660146.754
+-1318364138	-1216519640	gbv	vhc	xje	1941-07-11 14:45:45.716450000	1987-09-25 08:35:27.030011000	19943808220687470859094123484752578.983
+126988858	215463808	kvq	dxj	sny	1969-08-01 15:03:04.189398000	1939-01-24 15:34:19.169731000	-11244198513635940489378840619435658.7
+1464402420	214808438	ugb	kfa	fau	1954-12-28 23:14:48.071534000	1963-06-26 10:52:34.771336000	15639003920307665489976796705567405.674
+1545012930	1646857428	cwi	idx	kfa	1990-04-07 09:17:27.750678000	1963-05-18 08:36:43.510414000	12859784777715777629088076139403588.693
+\N	-1216257492	wrm	ykf	oau	1945-03-27 21:15:22.761741000	1987-10-21 10:06:01.203959000	2955002708069090830952995939306380.511
+-1304732442	-1216912862	hcw	gbm	bvh	1967-03-07 05:25:31.250724000	1939-02-19 17:04:53.343679000	-12712667455222681706490163604267097.832
+32877726	215332734	xje	eyk	gbm	1965-11-28 09:18:44.231081000	1939-01-11 14:49:02.082757000	-15481501924872964138422559862241599.74
+113488236	214677364	sny	lgb	bvq	1993-12-22 15:47:04.795970000	1963-06-13 10:07:17.684362000	17138627197656051567407861558442903.437
+1558644626	1646464206	dxj	cni	gbv	1979-05-20 23:58:48.678106000	1987-11-16 11:36:35.377907000	-19786306225252769507539886641426773.05
+-1398843574	-1216388566	kfa	alg	cni	1941-07-24 15:31:02.803424000	1987-10-08 09:20:44.116985000	19985347229390721148370189660985952.687
+\N	-1217043936	oau	hcn	wrd	1969-08-14 15:48:21.276372000	1939-02-06 16:19:36.256705000	-11202659504932690200102774443202284.996
+-1318233064	214939512	ykf	oje	cwi	1955-01-11 00:00:05.158508000	1963-07-09 11:37:51.858310000	15680542929010915779252862881800779.378
+127119932	1646988502	gbm	mhc	oje	1990-04-20 10:02:44.837652000	1963-05-31 09:22:00.597388000	12901323786419027918364142315636962.397
+1464533494	1646333132	bvq	doj	sey	1945-04-09 22:00:39.848715000	1987-11-03 10:51:18.290933000	2996541716772341120229062115539754.215
+1545144004	-1216781788	lgb	kfq	xje	1967-03-20 06:10:48.337698000	1939-03-04 17:50:10.430653000	-12671128446519431417214097428033724.128
+-1304601368	215463808	cni	ido	kfq	1965-12-11 10:04:01.318055000	1939-01-24 15:34:19.169731000	-15439962916169713849146493686008226.036
+\N	214808438	wrd	pkf	fau	1994-01-04 16:32:21.882944000	1963-06-26 10:52:34.771336000	17180166206359301856683927734676277.141
+33008800	1646595280	hcn	grm	kfa	1979-06-03 00:44:05.765080000	1987-11-29 12:21:52.464881000	-19744767216549519218263820465193399.346
+113619310	-1216257492	oje	epk	grm	1941-08-06 16:16:19.890398000	1987-10-21 10:06:01.203959000	20026886238093971437646255837219326.391
+1558775700	-1216912862	sey	lgr	bvh	1969-08-27 16:33:38.363346000	1939-02-19 17:04:53.343679000	-11161120496229439910826708266968911.292
+-1398712500	215070586	doj	sni	gbm	1955-01-24 00:45:22.245482000	1963-07-22 12:23:08.945284000	15722081937714166068528929058034153.082
+-1318101990	214677364	kfq	qlg	sni	1990-05-03 10:48:01.924626000	1963-06-13 10:07:17.684362000	12942862795122278207640208491870336.101
+\N	1646464206	fau	hsn	gbv	1945-04-22 22:45:56.935689000	1987-11-16 11:36:35.377907000	3038080725475591409505128291773127.919
+127251006	-1216650714	pkf	oju	cni	1967-04-02 06:56:05.424672000	1939-03-17 18:35:27.517627000	-12629589437816181127938031251800350.424
+1464664568	-1217043936	grm	mhs	oju	1965-12-24 10:49:18.405029000	1939-02-06 16:19:36.256705000	-15398423907466463559870427509774852.332
+1545275078	214939512	bvh	toj	cwi	1994-01-17 17:17:38.969918000	1963-07-09 11:37:51.858310000	17221705215062552145959993910909650.845
+-1304470294	1646726354	lgr	kvq	oje	1979-06-16 01:29:22.852054000	1987-12-12 13:07:09.551855000	-19703228207846268928987754288960025.642
+33139874	1646333132	sni	ito	kvq	1941-08-19 17:01:36.977372000	1987-11-03 10:51:18.290933000	20068425246797221726922322013452700.095
+\N	-1216781788	wid	pkv	xje	1969-09-09 17:18:55.450320000	1939-03-04 17:50:10.430653000	-11119581487526189621550642090735537.588
+113750384	215201660	hsn	wrm	kfq	1955-02-06 01:30:39.332456000	1938-12-29 14:03:44.995783000	15763620946417416357804995234267526.786
+1558906774	214808438	oju	upk	wrm	1990-05-16 11:33:19.011600000	1963-06-26 10:52:34.771336000	12984401803825528496916274668103709.805
+-1398581426	1646595280	jey	lwr	kfa	1945-05-05 23:31:14.022663000	1987-11-29 12:21:52.464881000	3079619734178841698781194468006501.623
+-1317970916	-1216519640	toj	sny	grm	1967-04-15 07:41:22.511646000	1987-09-25 08:35:27.030011000	-12588050429112930838661965075566976.72
+127382080	-1216912862	kvq	qlw	sny	1966-01-06 11:34:35.492003000	1939-02-19 17:04:53.343679000	-15356884898763213270594361333541478.628
+\N	215070586	fal	xsn	gbm	1994-01-30 18:02:56.056892000	1963-07-22 12:23:08.945284000	17263244223765802435236060087143024.549
+1464795642	1646857428	pkv	oau	sni	1979-06-29 02:14:39.939028000	1963-05-18 08:36:43.510414000	-19661689199143018639711688112726651.938
+1545406152	1646464206	wrm	mxs	oau	1941-09-01 17:46:54.064346000	1987-11-16 11:36:35.377907000	20109964255500472016198388189686073.799
+-1304339220	-1216650714	bmh	toa	cni	1969-09-22 18:04:12.537294000	1939-03-17 18:35:27.517627000	-11078042478822939332274575914502163.884
+33270948	215332734	lwr	bvq	oju	1955-02-19 02:15:56.419430000	1939-01-11 14:49:02.082757000	15805159955120666647081061410500900.49
+113881458	214939512	sny	yto	bvq	1990-05-29 12:18:36.098574000	1963-07-09 11:37:51.858310000	13025940812528778786192340844337083.509
+\N	1646726354	nid	pbv	oje	1945-05-19 00:16:31.109637000	1987-12-12 13:07:09.551855000	3121158742882091988057260644239875.327
+1559037848	-1216388566	xsn	wrd	kvq	1967-04-28 08:26:39.598620000	1987-10-08 09:20:44.116985000	-12546511420409680549385898899333603.016
+-1398450352	-1216781788	oau	upb	wrd	1966-01-19 12:19:52.578977000	1939-03-04 17:50:10.430653000	-15315345890059962981318295157308104.924
+-1317839842	215201660	jep	cwr	kfq	1994-02-12 18:48:13.143865000	1938-12-29 14:03:44.995783000	17304783232469052724512126263376398.253
+127513154	1646988502	toa	sey	wrm	1979-07-12 02:59:57.026001000	1963-05-31 09:22:00.597388000	-19620150190439768350435621936493278.234
+1464926716	1646595280	bvq	qcw	sey	1941-09-14 18:32:11.151319000	1987-11-29 12:21:52.464881000	20151503264203722305474454365919447.503
+\N	-1216519640	fql	xse	grm	1969-10-05 18:49:29.624268000	1987-09-25 08:35:27.030011000	-11036503470119689042998509738268790.18
+1545537226	215463808	pbv	fau	sny	1955-03-04 03:01:13.506404000	1939-01-24 15:34:19.169731000	15846698963823916936357127586734274.194
+-1304208146	215070586	wrd	dxs	fau	1990-06-11 13:03:53.185548000	1963-07-22 12:23:08.945284000	13067479821232029075468407020570457.213
+33402022	1646857428	rmh	tfa	sni	1945-06-01 01:01:48.196611000	1963-05-18 08:36:43.510414000	3162697751585342277333326820473249.031
+114012532	-1216257492	cwr	bvh	oau	1967-05-11 09:11:56.685594000	1987-10-21 10:06:01.203959000	-12504972411706430260109832723100229.312
+1559168922	-1216650714	sey	ytf	bvh	1966-02-01 13:05:09.665951000	1939-03-17 18:35:27.517627000	-15273806881356712692042228981074731.22
+\N	215332734	nit	gbv	oju	1994-02-25 19:33:30.230839000	1939-01-11 14:49:02.082757000	17346322241172303013788192439609771.957
+-1398319278	214677364	xse	wid	bvq	1979-07-25 03:45:14.112975000	1963-06-13 10:07:17.684362000	-19578611181736518061159555760259904.53
+-1317708768	1646726354	fau	ugb	gbv	1941-09-27 19:17:28.238293000	1987-12-12 13:07:09.551855000	20193042272906972594750520542152821.207
+127644228	-1216388566	jup	cwi	kvq	1969-10-18 19:34:46.711242000	1987-10-08 09:20:44.116985000	-10994964461416438753722443562035416.476
+1465057790	-1217043936	tfa	jey	wrd	1955-03-17 03:46:30.593378000	1939-02-06 16:19:36.256705000	15888237972527167225633193762967647.898
+1545668300	215201660	bvh	hcw	cwi	\N	1938-12-29 14:03:44.995783000	13109018829935279364744473196803830.917
+\N	1646988502	vql	xje	wrm	1990-06-24 13:49:10.272522000	1963-05-31 09:22:00.597388000	3204236760288592566609392996706622.735
+-1304077072	1646333132	gbv	fal	sey	1945-06-14 01:47:05.283585000	1987-11-03 10:51:18.290933000	-12463433403003179970833766546866855.608
+33533096	-1216519640	wid	dxj	xje	1967-05-24 09:57:13.772568000	1987-09-25 08:35:27.030011000	-15232267872653462402766162804841357.516
+114143606	215463808	rmx	kfa	sny	1966-02-14 13:50:26.752925000	1939-01-24 15:34:19.169731000	17387861249875553303064258615843145.661
+1559299996	214808438	cwi	bmh	fau	1994-03-10 20:18:47.317813000	1963-06-26 10:52:34.771336000	-19537072173033267771883489584026530.826
+-1398188204	1646857428	jey	ykf	kfa	1979-08-07 04:30:31.199949000	1963-05-18 08:36:43.510414000	20234581281610222884026586718386194.911
+\N	-1216257492	nyt	gbm	oau	1941-10-10 20:02:45.325267000	1987-10-21 10:06:01.203959000	-10953425452713188464446377385802042.772
+-1317577694	-1216912862	xje	nid	bvh	1969-10-31 20:20:03.798216000	1939-02-19 17:04:53.343679000	15929776981230417514909259939201021.602
+127775302	215332734	fal	lgb	gbm	1955-03-30 04:31:47.680352000	1939-01-11 14:49:02.082757000	13150557838638529654020539373037204.621
+1465188864	214677364	aup	cni	bvq	1990-07-07 14:34:27.359496000	1963-06-13 10:07:17.684362000	3245775768991842855885459172939996.439
+1545799374	1646464206	kfa	jep	gbv	1945-06-27 02:32:22.370559000	1987-11-16 11:36:35.377907000	-12421894394299929681557700370633481.904
+-1303945998	-1216388566	bmh	hcn	cni	1967-06-06 10:42:30.859542000	1987-10-08 09:20:44.116985000	-15190728863950212113490096628607983.812
+\N	-1217043936	vqc	oje	wrd	1966-02-27 14:35:43.839899000	1939-02-06 16:19:36.256705000	17429400258578803592340324792076519.365
+33664170	214939512	gbm	fql	cwi	1994-03-23 21:04:04.404787000	1963-07-09 11:37:51.858310000	-19495533164330017482607423407793157.122
+114274680	1646988502	nid	doj	oje	1979-08-20 05:15:48.286923000	1963-05-31 09:22:00.597388000	20276120290313473173302652894619568.615
+1559431070	1646333132	rdx	kfq	sey	1941-10-23 20:48:02.412241000	1987-11-03 10:51:18.290933000	-10911886444009938175170311209568669.068
+-1398057130	-1216781788	cni	rmh	xje	1969-11-13 21:05:20.885190000	1939-03-04 17:50:10.430653000	15971315989933667804185326115434395.306
+-1317446620	215463808	jep	pkf	kfq	1955-04-12 05:17:04.767326000	1939-01-24 15:34:19.169731000	13192096847341779943296605549270578.325
+\N	214808438	eyt	grm	fau	1990-07-20 15:19:44.446470000	1963-06-26 10:52:34.771336000	3287314777695093145161525349173370.143
+127906376	1646595280	oje	nit	kfa	1945-07-10 03:17:39.457533000	1987-11-29 12:21:52.464881000	-12380355385596679392281634194400108.2
+1465319938	-1216257492	fql	lgr	grm	1967-06-19 11:27:47.946516000	1987-10-21 10:06:01.203959000	-15149189855246961824214030452374610.108
+1545930448	-1216912862	aug	sni	bvh	1966-03-12 15:21:00.926873000	1939-02-19 17:04:53.343679000	17470939267282053881616390968309893.069
+-1303814924	215070586	\N	jup	gbm	1994-04-05 21:49:21.491761000	1963-07-22 12:23:08.945284000	-19453994155626767193331357231559783.418
+33795244	214677364	kfq	hsn	sni	1979-09-02 06:01:05.373897000	1963-06-13 10:07:17.684362000	20317659299016723462578719070852942.319
+\N	1646464206	rmh	oju	gbv	1941-11-05 21:33:19.499215000	1987-11-16 11:36:35.377907000	-10870347435306687885894245033335295.364
+114405754	-1216650714	vhc	vql	cni	1969-11-26 21:50:37.972164000	1939-03-17 18:35:27.517627000	16012854998636918093461392291667769.01
+1559562144	-1217043936	grm	toj	oju	1955-04-25 06:02:21.854300000	1939-02-06 16:19:36.256705000	13233635856045030232572671725503952.029
+-1397926056	214939512	nit	kvq	cwi	1990-08-02 16:05:01.533444000	1963-07-09 11:37:51.858310000	3328853786398343434437591525406743.847
+-1317315546	1646726354	idx	rmx	oje	1945-07-23 04:02:56.544507000	1987-12-12 13:07:09.551855000	-12338816376893429103005568018166734.496
+128037450	1646333132	sni	pkv	kvq	1967-07-02 12:13:05.033490000	1987-11-03 10:51:18.290933000	-15107650846543711534937964276141236.404
+\N	-1216781788	jup	wrm	xje	1966-03-25 16:06:18.013847000	1939-03-04 17:50:10.430653000	17512478275985304170892457144543266.773
+1465451012	215201660	eyk	nyt	kfq	1994-04-18 22:34:38.578735000	1938-12-29 14:03:44.995783000	-19412455146923516904055291055326409.714
+1546061522	214808438	oju	lwr	wrm	1979-09-15 06:46:22.460871000	1963-06-26 10:52:34.771336000	20359198307719973751854785247086316.023
+-1303683850	1646595280	vql	sny	kfa	1941-11-18 22:18:36.586189000	1987-11-29 12:21:52.464881000	-10828808426603437596618178857101921.66
+33926318	-1216519640	alg	aup	grm	1969-12-09 22:35:55.059138000	1987-09-25 08:35:27.030011000	16054394007340168382737458467901142.714
+114536828	-1216912862	kvq	xsn	sny	1955-05-08 06:47:38.941274000	1939-02-19 17:04:53.343679000	13275174864748280521848737901737325.733
+\N	215070586	rmx	oau	gbm	1990-08-15 16:50:18.620418000	1963-07-22 12:23:08.945284000	3370392795101593723713657701640117.551
+1559693218	1646857428	mhc	vqc	sni	1945-08-05 04:48:13.631481000	1963-05-18 08:36:43.510414000	-12297277368190178813729501841933360.792
+-1397794982	1646464206	wrm	toa	oau	1967-07-15 12:58:22.120464000	1987-11-16 11:36:35.377907000	-15066111837840461245661898099907862.7
+-1317184472	-1216650714	nyt	bvq	cni	1966-04-07 16:51:35.100821000	1939-03-17 18:35:27.517627000	17554017284688554460168523320776640.477
+128168524	215332734	ido	rdx	oju	1994-05-01 23:19:55.665709000	1939-01-11 14:49:02.082757000	-19370916138220266614779224879093036.01
+1465582086	214939512	sny	pbv	bvq	1979-09-28 07:31:39.547845000	1963-07-09 11:37:51.858310000	20400737316423224041130851423319689.727
+\N	1646726354	aup	wrd	oje	1941-12-01 23:03:53.673163000	1987-12-12 13:07:09.551855000	-10787269417900187307342112680868547.956
+1546192596	-1216388566	epk	eyt	kvq	1969-12-22 23:21:12.146112000	1987-10-08 09:20:44.116985000	16095933016043418672013524644134516.418
+-1303552776	-1216781788	oau	cwr	wrd	1955-05-21 07:32:56.028248000	1939-03-04 17:50:10.430653000	13316713873451530811124804077970699.437
+34057392	215201660	vqc	sey	kfq	1990-08-28 17:35:35.707392000	1938-12-29 14:03:44.995783000	3411931803804844012989723877873491.255
+114667902	1646988502	qlg	aug	wrm	1945-08-18 05:33:30.718455000	1963-05-31 09:22:00.597388000	-12255738359486928524453435665699987.088
+1559824292	1646595280	bvq	xse	sey	1967-07-28 13:43:39.207438000	1987-11-29 12:21:52.464881000	-15024572829137210956385831923674488.996
+\N	-1216519640	rdx	fau	grm	1966-04-20 17:36:52.187795000	1987-09-25 08:35:27.030011000	17595556293391804749444589497010014.181
+-1397663908	215463808	mhs	vhc	sny	1994-05-15 00:05:12.752683000	1939-01-24 15:34:19.169731000	-19329377129517016325503158702859662.306
+-1317053398	215070586	wrd	tfa	fau	1979-10-11 08:16:56.634819000	1963-07-22 12:23:08.945284000	20442276325126474330406917599553063.431
+128299598	1646857428	eyt	bvh	sni	1941-12-14 23:49:10.760137000	1963-05-18 08:36:43.510414000	-10745730409196937018066046504635174.252
+1465713160	-1216257492	ito	idx	oau	1933-06-24 00:08:04.626239000	1987-10-21 10:06:01.203959000	16137472024746668961289590820367890.122
+1546323670	-1216650714	sey	gbv	bvh	1955-06-03 08:18:13.115222000	1939-03-17 18:35:27.517627000	13358252882154781100400870254204073.141
+\N	215332734	aug	wid	oju	1990-09-10 18:20:52.794366000	1939-01-11 14:49:02.082757000	3453470812508094302265790054106864.959
+-1303421702	214677364	upk	eyk	bvq	1945-08-31 06:18:47.805429000	1963-06-13 10:07:17.684362000	-12214199350783678235177369489466613.384
+34188466	1646726354	fau	cwi	gbv	1967-08-10 14:28:56.294412000	1987-12-12 13:07:09.551855000	\N
+114798976	-1216388566	vhc	jey	kvq	1966-05-03 18:22:09.274768000	1987-10-08 09:20:44.116985000	-14983033820433960667109765747441115.292
+1559955366	-1217043936	qlw	alg	wrd	1994-05-28 00:50:29.839657000	1939-02-06 16:19:36.256705000	17637095302095055038720655673243387.885
+-1397532834	215201660	bvh	xje	cwi	1979-10-24 09:02:13.721793000	1938-12-29 14:03:44.995783000	-19287838120813766036227092526626288.602
+\N	1646988502	idx	fal	wrm	1941-12-28 00:34:27.847111000	1963-05-31 09:22:00.597388000	20483815333829724619682983775786437.135
+-1316922324	1646333132	mxs	mhc	sey	1933-07-07 00:53:21.713213000	1987-11-03 10:51:18.290933000	-10704191400493686728789980328401800.548
+128430672	-1216519640	wid	kfa	xje	1955-06-16 09:03:30.202196000	1987-09-25 08:35:27.030011000	16179011033449919250565656996601263.826
+1465844234	215463808	eyk	bmh	sny	1990-09-23 19:06:09.881340000	1939-01-24 15:34:19.169731000	13399791890858031389676936430437446.845
+1546454744	214808438	yto	ido	fau	1945-09-13 07:04:04.892403000	1963-06-26 10:52:34.771336000	3495009821211344591541856230340238.663
+-1303290628	1646857428	jey	gbm	kfa	1967-08-23 15:14:13.381385000	1963-05-18 08:36:43.510414000	-12172660342080427945901303313233239.68
+\N	-1216257492	alg	nid	oau	1966-05-16 19:07:26.361742000	1987-10-21 10:06:01.203959000	-14941494811730710377833699571207741.588
+34319540	-1216912862	upb	epk	bvh	1994-06-10 01:35:46.926631000	1939-02-19 17:04:53.343679000	17678634310798305327996721849476761.589
+114930050	215332734	fal	cni	gbm	1979-11-06 09:47:30.808767000	1939-01-11 14:49:02.082757000	-19246299112110515746951026350392914.898
+1560086440	214677364	mhc	jep	bvq	1942-01-10 01:19:44.934085000	1963-06-13 10:07:17.684362000	20525354342532974908959049952019810.839
+-1397401760	1646464206	qcw	qlg	gbv	1933-07-20 01:38:38.800187000	1987-11-16 11:36:35.377907000	-10662652391790436439513914152168426.844
+-1316791250	-1216388566	bmh	oje	cni	1955-06-29 09:48:47.289170000	1987-10-08 09:20:44.116985000	16220550042153169539841723172834637.53
+\N	-1217043936	ido	fql	wrd	1990-10-06 19:51:26.968314000	1939-02-06 16:19:36.256705000	13441330899561281678953002606670820.549
+128561746	214939512	dxs	mhs	\N	1945-09-26 07:49:21.979376000	1963-07-09 11:37:51.858310000	3536548829914594880817922406573612.367
+1465975308	1646988502	nid	kfq	cwi	1967-09-05 15:59:30.468359000	1963-05-31 09:22:00.597388000	-12131121333377177656625237136999865.976
+1546585818	1646333132	epk	rmh	oje	1966-05-29 19:52:43.448716000	1987-11-03 10:51:18.290933000	-14899955803027460088557633394974367.884
+-1303159554	-1216781788	ytf	ito	sey	1994-06-23 02:21:04.013605000	1939-03-04 17:50:10.430653000	17720173319501555617272788025710135.293
+34450614	215463808	jep	grm	xje	1979-11-19 10:32:47.895741000	1939-01-24 15:34:19.169731000	-19204760103407265457674960174159541.194
+\N	214808438	qlg	nit	kfq	1942-01-23 02:05:02.021059000	1963-06-26 10:52:34.771336000	20566893351236225198235116128253184.543
+115061124	1646595280	ugb	upk	fau	1933-08-02 02:23:55.887161000	1987-11-29 12:21:52.464881000	-10621113383087186150237847975935053.14
+1560217514	-1216257492	fql	sni	kfa	1955-07-12 10:34:04.376144000	1987-10-21 10:06:01.203959000	16262089050856419829117789349068011.234
+-1397270686	-1216912862	mhs	jup	grm	1990-10-19 20:36:44.055288000	1939-02-19 17:04:53.343679000	13482869908264531968229068782904194.253
+-1316660176	215070586	hcw	qlw	bvh	1945-10-09 08:34:39.066350000	1963-07-22 12:23:08.945284000	3578087838617845170093988582806986.071
+128692820	214677364	rmh	oju	gbm	1967-09-18 16:44:47.555333000	1963-06-13 10:07:17.684362000	-12089582324673927367349170960766492.272
+\N	1646464206	ito	vql	sni	1966-06-11 20:38:00.535690000	1987-11-16 11:36:35.377907000	-14858416794324209799281567218740994.18
+1466106382	-1216650714	dxj	mxs	gbv	1994-07-06 03:06:21.100579000	1939-03-17 18:35:27.517627000	17761712328204805906548854201943508.997
+1546716892	-1217043936	nit	kvq	cni	1979-12-02 11:18:04.982715000	1939-02-06 16:19:36.256705000	-19163221094704015168398893997926167.49
+-1303028480	214939512	upk	rmx	oju	1942-02-05 02:50:19.108033000	1963-07-09 11:37:51.858310000	20608432359939475487511182304486558.247
+34581688	1646726354	ykf	yto	cwi	1933-08-15 03:09:12.974135000	1987-12-12 13:07:09.551855000	-10579574374383935860961781799701679.436
+115192198	1646333132	jup	wrm	oje	1955-07-25 11:19:21.463118000	1987-11-03 10:51:18.290933000	16303628059559670118393855525301384.938
+\N	-1216781788	qlw	nyt	kvq	1990-11-01 21:22:01.142262000	1939-03-04 17:50:10.430653000	13524408916967782257505134959137567.957
+1560348588	215201660	lgb	upb	xje	1945-10-22 09:19:56.153324000	1938-12-29 14:03:44.995783000	3619626847321095459370054759040359.775
+-1397139612	214808438	vql	sny	kfq	1967-10-01 17:30:04.642307000	1963-06-26 10:52:34.771336000	-12048043315970677078073104784533118.568
+-1316529102	1646595280	mxs	aup	wrm	1966-06-24 21:23:17.622664000	1987-11-29 12:21:52.464881000	-14816877785620959510005501042507620.476
+128823894	-1216519640	hcn	qcw	kfa	1994-07-19 03:51:38.187553000	1987-09-25 08:35:27.030011000	17803251336908056195824920378176882.701
+1466237456	-1216912862	rmx	oau	grm	1979-12-15 12:03:22.069689000	1939-02-19 17:04:53.343679000	-19121682086000764879122827821692793.786
+\N	215070586	yto	vqc	sny	1942-02-18 03:35:36.195007000	1963-07-22 12:23:08.945284000	20649971368642725776787248480719931.951
+1546847966	1646857428	doj	dxs	gbm	1933-08-28 03:54:30.061109000	1963-05-18 08:36:43.510414000	-10538035365680685571685715623468305.732
+-1302897406	1646464206	nyt	bvq	\N	1955-08-07 12:04:38.550092000	1987-11-16 11:36:35.377907000	16345167068262920407669921701534758.642
+34712762	-1216650714	upb	rdx	sni	1990-11-14 22:07:18.229236000	1939-03-17 18:35:27.517627000	13565947925671032546781201135370941.661
+115323272	215332734	pkf	ytf	oau	1945-11-04 10:05:13.240298000	1939-01-11 14:49:02.082757000	\N
+1560479662	214939512	aup	wrd	cni	1967-10-14 18:15:21.729281000	1963-07-09 11:37:51.858310000	3661165856024345748646120935273733.479
+\N	1646726354	qcw	eyt	oju	1966-07-07 22:08:34.709638000	1987-12-12 13:07:09.551855000	-12006504307267426788797038608299744.864
+-1397008538	-1216388566	lgr	ugb	bvq	1994-08-01 04:36:55.274527000	1987-10-08 09:20:44.116985000	-14775338776917709220729434866274246.772
+-1316398028	-1216781788	vqc	sey	oje	1979-12-28 12:48:39.156663000	1939-03-04 17:50:10.430653000	17844790345611306485100986554410256.405
+128954968	215201660	dxs	aug	kvq	1942-03-03 04:20:53.281981000	1938-12-29 14:03:44.995783000	-19080143077297514589846761645459420.082
+1466368530	1646988502	hsn	hcw	wrd	1933-09-10 04:39:47.148083000	1963-05-31 09:22:00.597388000	20691510377345976066063314656953305.655
+1546979040	1646595280	rdx	fau	kfq	1955-08-20 12:49:55.637066000	1987-11-29 12:21:52.464881000	-10496496356977435282409649447234932.028
+\N	-1216519640	ytf	vhc	wrm	1990-11-27 22:52:35.316209000	1987-09-25 08:35:27.030011000	16386706076966170696945987877768132.346
+-1302766332	215463808	toj	dxj	sey	1945-11-17 10:50:30.327272000	1939-01-24 15:34:19.169731000	13607486934374282836057267311604315.365
+34843836	215070586	eyt	bvh	grm	1967-10-27 19:00:38.816255000	1963-07-22 12:23:08.945284000	3702704864727596037922187111507107.183
+115454346	1646857428	ugb	idx	sny	1966-07-20 22:53:51.796612000	1963-05-18 08:36:43.510414000	-11964965298564176499520972432066371.16
+1560610736	-1216257492	pkv	ykf	fau	1994-08-14 05:22:12.361501000	1987-10-21 10:06:01.203959000	-14733799768214458931453368690040873.068
+-1396877464	-1216650714	aug	wid	sni	1980-01-10 13:33:56.243637000	1939-03-17 18:35:27.517627000	17886329354314556774377052730643630.109
+\N	215332734	hcw	eyk	oau	1942-03-16 05:06:10.368955000	1939-01-11 14:49:02.082757000	-19038604068594264300570695469226046.378
+-1316266954	214677364	lwr	lgb	bvh	1933-09-23 05:25:04.235057000	1963-06-13 10:07:17.684362000	20733049386049226355339380833186679.359
+129086042	1646726354	vhc	jey	oju	1955-09-02 13:35:12.724040000	1987-12-12 13:07:09.551855000	-10454957348274184993133583271001558.324
+1466499604	-1216388566	dxj	alg	bvq	1990-12-10 23:37:52.403183000	1987-10-08 09:20:44.116985000	16428245085669420986222054054001506.05
+1547110114	-1217043936	xsn	hcn	gbv	1945-11-30 11:35:47.414246000	1939-02-06 16:19:36.256705000	13649025943077533125333333487837689.069
+-1302635258	215201660	idx	fal	kvq	1967-11-09 19:45:55.903229000	1938-12-29 14:03:44.995783000	3744243873430846327198253287740480.887
+\N	1646988502	ykf	mhc	wrd	1966-08-02 23:39:08.883586000	1963-05-31 09:22:00.597388000	-11923426289860926210244906255832997.456
+34974910	\N	toa	doj	cwi	1994-08-27 06:07:29.448475000	1987-11-03 10:51:18.290933000	-14692260759511208642177302513807499.364
+115585420	1646333132	eyk	bmh	wrm	1980-01-23 14:19:13.330611000	1987-09-25 08:35:27.030011000	17927868363017807063653118906877003.813
+1560741810	-1216519640	lgb	ido	sey	1942-03-29 05:51:27.455929000	1939-01-24 15:34:19.169731000	-18997065059891014011294629292992672.674
+-1396746390	215463808	pbv	pkf	xje	1933-10-06 06:10:21.322031000	1963-06-26 10:52:34.771336000	20774588394752476644615447009420053.063
+-1316135880	214808438	alg	nid	sny	1955-09-15 14:20:29.811014000	1963-05-18 08:36:43.510414000	-10413418339570934703857517094768184.62
+\N	1646857428	hcn	epk	fau	1990-12-24 00:23:09.490157000	1987-10-21 10:06:01.203959000	16469784094372671275498120230234879.754
+129217116	-1216257492	cwr	lgr	kfa	1945-12-13 12:21:04.501220000	1939-02-19 17:04:53.343679000	13690564951780783414609399664071062.773
+1466630678	-1216912862	mhc	jep	oau	1967-11-22 20:31:12.990203000	1939-01-11 14:49:02.082757000	3785782882134096616474319463973854.591
+1547241188	215332734	doj	qlg	bvh	1966-08-16 00:24:25.970560000	1963-06-13 10:07:17.684362000	-11881887281157675920968840079599623.752
+-1302504184	214677364	xse	hsn	gbm	1994-09-09 06:52:46.535449000	1987-11-16 11:36:35.377907000	-14650721750807958352901236337574125.66
+35105984	1646464206	ido	fql	bvq	1980-02-05 15:04:30.417585000	1987-10-08 09:20:44.116985000	17969407371721057352929185083110377.517
+\N	-1216388566	\N	mhs	gbv	1942-04-11 06:36:44.542903000	1939-02-06 16:19:36.256705000	-18955526051187763722018563116759298.97
+115716494	-1217043936	pkf	toj	cni	1933-10-19 06:55:38.409005000	1963-07-09 11:37:51.858310000	20816127403455726933891513185653426.767
+1560872884	214939512	tfa	rmh	wrd	1955-09-28 15:05:46.897988000	1963-05-31 09:22:00.597388000	-10371879330867684414581450918534810.916
+-1396615316	1646988502	epk	ito	cwi	1991-01-06 01:08:26.577131000	1987-11-03 10:51:18.290933000	16511323103075921564774186406468253.458
+-1316004806	1646333132	lgr	pkv	oje	1945-12-26 13:06:21.588194000	1939-03-04 17:50:10.430653000	13732103960484033703885465840304436.477
+129348190	-1216781788	gbv	nit	sey	1967-12-05 21:16:30.077177000	1939-01-24 15:34:19.169731000	3827321890837346905750385640207228.295
+\N	215463808	qlg	upk	xje	1966-08-29 01:09:43.057534000	1963-06-26 10:52:34.771336000	-11840348272454425631692773903366250.048
+1466761752	214808438	hsn	lwr	kfq	1994-09-22 07:38:03.622423000	1987-11-29 12:21:52.464881000	-14609182742104708063625170161340751.956
+1547372262	1646595280	cwi	jup	fau	1980-02-18 15:49:47.504559000	1987-10-21 10:06:01.203959000	18010946380424307642205251259343751.221
+-1302373110	-1216257492	mhs	qlw	kfa	1942-04-24 07:22:01.629877000	1939-02-19 17:04:53.343679000	-18913987042484513432742496940525925.266
+35237058	-1216912862	toj	xsn	grm	1933-11-01 07:40:55.495979000	1963-07-22 12:23:08.945284000	20857666412158977223167579361886800.471
+115847568	215070586	xje	vql	bvh	1955-10-11 15:51:03.984961000	1963-06-13 10:07:17.684362000	-10330340322164434125305384742301437.212
+\N	214677364	ito	mxs	gbm	1991-01-19 01:53:43.664105000	1987-11-16 11:36:35.377907000	16552862111779171854050252582701627.162
+1561003958	1646464206	pkv	toa	sni	1946-01-08 13:51:38.675168000	1939-03-17 18:35:27.517627000	13773642969187283993161532016537810.181
+-1396484242	-1216650714	kfa	rmx	gbv	1967-12-18 22:01:47.164151000	1939-02-06 16:19:36.256705000	3868860899540597195026451816440601.999
+-1315873732	-1217043936	upk	yto	cni	1966-09-11 01:55:00.144508000	1963-07-09 11:37:51.858310000	-11798809263751175342416707727132876.344
+129479264	214939512	lwr	pbv	oju	1994-10-05 08:23:20.709397000	1987-12-12 13:07:09.551855000	-14567643733401457774349103985107378.252
+1466892826	1646726354	gbm	nyt	cwi	1980-03-02 16:35:04.591533000	1987-11-03 10:51:18.290933000	18052485389127557931481317435577124.925
+\N	1646333132	qlw	upb	oje	1942-05-07 08:07:18.716851000	1939-03-04 17:50:10.430653000	-18872448033781263143466430764292551.562
+1547503336	-1216781788	xsn	cwr	kvq	1933-11-14 08:26:12.582953000	1938-12-29 14:03:44.995783000	20899205420862227512443645538120174.175
+-1302242036	215201660	cni	aup	xje	1955-10-24 16:36:21.071935000	1963-06-26 10:52:34.771336000	-10288801313461183836029318566068063.508
+35368132	214808438	mxs	qcw	kfq	1991-02-01 02:39:00.751079000	1987-11-29 12:21:52.464881000	16594401120482422143326318758935000.866
+115978642	1646595280	toa	xse	wrm	1946-01-21 14:36:55.762142000	1987-09-25 08:35:27.030011000	13815181977890534282437598192771183.885
+1561135032	-1216519640	oje	vqc	kfa	1967-12-31 22:47:04.251125000	1939-02-19 17:04:53.343679000	3910399908243847484302517992673975.703
+\N	-1216912862	yto	dxs	grm	1966-09-24 02:40:17.231482000	1963-07-22 12:23:08.945284000	-11757270255047925053140641550899502.64
+-1396353168	215070586	pbv	tfa	sny	1994-10-18 09:08:37.796371000	1963-05-18 08:36:43.510414000	-14526104724698207485073037808874004.548
+-1315742658	1646857428	kfq	rdx	gbm	1980-03-15 17:20:21.678507000	1987-11-16 11:36:35.377907000	18094024397830808220757383611810498.629
+129610338	1646464206	upb	ytf	sni	1942-05-20 08:52:35.803825000	1939-03-17 18:35:27.517627000	-18830909025078012854190364588059177.858
+1467023900	-1216650714	cwr	gbv	oau	1933-11-27 09:11:29.669926000	1939-01-11 14:49:02.082757000	20940744429565477801719711714353547.879
+1547634410	215332734	grm	eyt	cni	1955-11-06 17:21:38.158909000	1963-07-09 11:37:51.858310000	-10247262304757933546753252389834689.804
+\N	214939512	qcw	ugb	oju	1991-02-14 03:24:17.838053000	1987-12-12 13:07:09.551855000	16635940129185672432602384935168374.57
+-1302110962	1646726354	xse	cwi	bvq	1946-02-03 15:22:12.849116000	1987-10-08 09:20:44.116985000	13856720986593784571713664369004557.589
+35499206	-1216388566	sni	aug	oje	1968-01-13 23:32:21.338099000	1939-03-04 17:50:10.430653000	3951938916947097773578584168907349.407
+116109716	-1216781788	dxs	hcw	kvq	1966-10-07 03:25:34.318456000	1938-12-29 14:03:44.995783000	-11715731246344674763864575374666128.936
+1561266106	215201660	tfa	xje	wrd	1994-10-31 09:53:54.883345000	1963-05-31 09:22:00.597388000	-14484565715994957195796971632640630.844
+-1396222094	1646988502	oju	vhc	kfq	1980-03-28 18:05:38.765481000	1987-11-29 12:21:52.464881000	18135563406534058510033449788043872.333
+\N	1646595280	ytf	dxj	wrm	1942-06-02 09:37:52.890799000	1987-09-25 08:35:27.030011000	-18789370016374762564914298411825804.154
+-1315611584	-1216519640	gbv	kfa	sey	1933-12-10 09:56:46.756900000	1939-01-24 15:34:19.169731000	20982283438268728090995777890586921.583
+129741412	215463808	kvq	idx	grm	1955-11-19 18:06:55.245883000	1963-07-22 12:23:08.945284000	-10205723296054683257477186213601316.1
+1467154974	215070586	ugb	ykf	sny	1991-02-27 04:09:34.925027000	1963-05-18 08:36:43.510414000	16677479137888922721878451111401748.274
+1547765484	1646857428	cwi	gbm	fau	1946-02-16 16:07:29.936090000	1987-10-21 10:06:01.203959000	13898259995297034860989730545237931.293
+-1301979888	-1216257492	wrm	eyk	sni	1968-01-27 00:17:38.425073000	1939-03-17 18:35:27.517627000	3993477925650348062854650345140723.111
+\N	-1216650714	hcw	lgb	oau	1966-10-20 04:10:51.405430000	1939-01-11 14:49:02.082757000	-11674192237641424474588509198432755.232
+35630280	215332734	xje	cni	bvh	1994-11-13 10:39:11.970319000	1963-06-13 10:07:17.684362000	-14443026707291706906520905456407257.14
+116240790	214677364	sny	alg	oju	1980-04-10 18:50:55.852455000	1987-12-12 13:07:09.551855000	18177102415237308799309515964277246.037
+1561397180	1646726354	dxj	hcn	bvq	1942-06-15 10:23:09.977773000	1987-10-08 09:20:44.116985000	-18747831007671512275638232235592430.45
+-1396091020	-1216388566	kfa	oje	gbv	1933-12-23 10:42:03.843874000	1939-02-06 16:19:36.256705000	21023822446971978380271844066820295.287
+-1315480510	-1217043936	oau	mhc	kvq	1955-12-02 18:52:12.332857000	1938-12-29 14:03:44.995783000	-10164184287351432968201120037367942.396
+\N	215201660	ykf	doj	wrd	1991-03-12 04:54:52.012001000	1963-05-31 09:22:00.597388000	16719018146592173011154517287635121.978
+129872486	1646988502	gbm	kfq	cwi	1946-03-01 16:52:47.023064000	1987-11-03 10:51:18.290933000	13939799004000285150265796721471304.997
+1467286048	1646333132	bvq	ido	wrm	1968-02-09 01:02:55.512047000	1987-09-25 08:35:27.030011000	4035016934353598352130716521374096.815
+1547896558	-1216519640	lgb	pkf	sey	1966-11-02 04:56:08.492404000	1939-01-24 15:34:19.169731000	-11632653228938174185312443022199381.528
+-1301848814	215463808	cni	grm	xje	1994-11-26 11:24:29.057293000	1963-06-26 10:52:34.771336000	-14401487698588456617244839280173883.436
+35761354	214808438	wrd	epk	sny	1980-04-23 19:36:12.939428000	1963-05-18 08:36:43.510414000	18218641423940559088585582140510619.741
+\N	1646857428	hcn	lgr	fau	1942-06-28 11:08:27.064746000	1987-10-21 10:06:01.203959000	-18706291998968261986362166059359056.746
+116371864	-1216257492	oje	sni	kfa	1934-01-05 11:27:20.930848000	1939-02-19 17:04:53.343679000	21065361455675228669547910243053668.991
+1561528254	-1216912862	sey	qlg	oau	1955-12-15 19:37:29.419831000	1939-01-11 14:49:02.082757000	-10122645278648182678925053861134568.692
+-1395959946	215332734	doj	hsn	bvh	1991-03-25 05:40:09.098975000	1963-06-13 10:07:17.684362000	16760557155295423300430583463868495.682
+-1315349436	214677364	kfq	oju	gbm	1946-03-14 17:38:04.110038000	1987-11-16 11:36:35.377907000	13981338012703535439541862897704678.701
+130003560	1646464206	fau	mhs	bvq	1968-02-22 01:48:12.599021000	1987-10-08 09:20:44.116985000	4076555943056848641406782697607470.519
+\N	-1216388566	pkf	toj	gbv	1966-11-15 05:41:25.579378000	1939-02-06 16:19:36.256705000	-11591114220234923896036376845966007.824
+1467417122	-1217043936	grm	kvq	cni	1994-12-09 12:09:46.144266000	1963-07-09 11:37:51.858310000	-14359948689885206327968773103940509.732
+1548027632	214939512	bvh	ito	wrd	1980-05-06 20:21:30.026402000	1963-05-31 09:22:00.597388000	18260180432643809377861648316743993.445
+-1301717740	1646988502	lgr	pkv	cwi	1942-07-11 11:53:44.151720000	1987-11-03 10:51:18.290933000	-18664752990265011697086099883125683.042
+35892428	1646333132	sni	wrm	oje	1934-01-18 12:12:38.017822000	1939-03-04 17:50:10.430653000	21106900464378478958823976419287042.695
+116502938	-1216781788	wid	upk	sey	1955-12-28 20:22:46.506805000	1939-01-24 15:34:19.169731000	-10081106269944932389648987684901194.988
+\N	215463808	hsn	lwr	xje	1991-04-07 06:25:26.185949000	1963-06-26 10:52:34.771336000	16802096163998673589706649640101869.386
+1561659328	214808438	oju	sny	kfq	1946-03-27 18:23:21.197012000	1987-11-29 12:21:52.464881000	14022877021406785728817929073938052.405
+-1395828872	1646595280	jey	qlw	fau	1968-03-06 02:33:29.685995000	1987-10-21 10:06:01.203959000	4118094951760098930682848873840844.223
+-1315218362	-1216257492	toj	xsn	kfa	1966-11-28 06:26:42.666352000	1939-02-19 17:04:53.343679000	-11549575211531673606760310669732634.12
+130134634	-1216912862	kvq	oau	grm	1994-12-22 12:55:03.231240000	1963-07-22 12:23:08.945284000	-14318409681181956038692706927707136.028
+1467548196	215070586	fal	mxs	bvh	1980-05-19 21:06:47.113376000	1963-06-13 10:07:17.684362000	18301719441347059667137714492977367.149
+\N	214677364	pkv	toa	gbm	1942-07-24 12:39:01.238694000	1987-11-16 11:36:35.377907000	-18623213981561761407810033706892309.338
+\N	1646464206	wrm	bvq	sni	1934-01-31 12:57:55.104796000	1939-03-17 18:35:27.517627000	21148439473081729248100042595520416.399
+1548158706	-1216650714	bmh	yto	gbv	1956-01-10 21:08:03.593779000	1939-02-06 16:19:36.256705000	-10039567261241682100372921508667821.284
+-1301586666	-1217043936	lwr	pbv	cni	1991-04-20 07:10:43.272923000	1963-07-09 11:37:51.858310000	16843635172701923878982715816335243.09
+36023502	214939512	sny	wrd	oju	1946-04-09 19:08:38.283986000	1987-12-12 13:07:09.551855000	14064416030110036018093995250171426.109
+116634012	1646726354	nid	upb	cwi	1968-03-19 03:18:46.772969000	1987-11-03 10:51:18.290933000	4159633960463349219958915050074217.927
+\N	1646333132	xsn	cwr	oje	1966-12-11 07:11:59.753326000	1939-03-04 17:50:10.430653000	-11508036202828423317484244493499260.416
+1561790402	-1216781788	oau	sey	kvq	1995-01-04 13:40:20.318214000	1938-12-29 14:03:44.995783000	-14276870672478705749416640751473762.324
+-1395697798	215201660	jep	qcw	xje	1980-06-01 21:52:04.200350000	1963-06-26 10:52:34.771336000	18343258450050309956413780669210740.853
+-1315087288	214808438	toa	xse	kfq	1942-08-06 13:24:18.325668000	1987-11-29 12:21:52.464881000	-18581674972858511118533967530658935.634
+130265708	1646595280	bvq	fau	wrm	1934-02-13 13:43:12.191770000	1987-09-25 08:35:27.030011000	21189978481784979537376108771753790.103
+1467679270	-1216519640	fql	dxs	kfa	1956-01-23 21:53:20.680753000	1939-02-19 17:04:53.343679000	-9998028252538431811096855332434447.58
+\N	-1216912862	pbv	tfa	grm	1991-05-03 07:56:00.359897000	1963-07-22 12:23:08.945284000	16885174181405174168258781992568616.794
+1548289780	215070586	wrd	bvh	sny	1946-04-22 19:53:55.370960000	1963-05-18 08:36:43.510414000	14105955038813286307370061426404799.813
+-1301455592	1646857428	rmh	ytf	gbm	1968-04-01 04:04:03.859943000	1987-11-16 11:36:35.377907000	4201172969166599509234981226307591.631
+36154576	1646464206	cwr	gbv	sni	1966-12-24 07:57:16.840300000	1939-03-17 18:35:27.517627000	-11466497194125173028208178317265886.712
+116765086	-1216650714	sey	wid	oau	1995-01-17 14:25:37.405188000	1939-01-11 14:49:02.082757000	-14235331663775455460140574575240388.62
+1561921476	215332734	nit	ugb	cni	1980-06-14 22:37:21.287324000	1963-07-09 11:37:51.858310000	18384797458753560245689846845444114.557
+\N	214939512	xse	cwi	oju	1942-08-19 14:09:35.412642000	1987-12-12 13:07:09.551855000	-18540135964155260829257901354425561.93
+-1395566724	1646726354	fau	jey	bvq	1934-02-26 14:28:29.278744000	1987-10-08 09:20:44.116985000	21231517490488229826652174947987163.807
+-1314956214	-1216388566	jup	hcw	oje	1956-02-05 22:38:37.767727000	1939-03-04 17:50:10.430653000	-9956489243835181521820789156201073.876
+130396782	-1216781788	tfa	xje	kvq	1991-05-16 08:41:17.446871000	1938-12-29 14:03:44.995783000	16926713190108424457534848168801990.498
+1467810344	215201660	bvh	fal	wrd	1946-05-05 20:39:12.457934000	1963-05-31 09:22:00.597388000	14147494047516536596646127602638173.517
+1548420854	1646988502	vql	dxj	kfq	1968-04-14 04:49:20.946917000	1987-11-29 12:21:52.464881000	4242711977869849798511047402540965.335
+\N	1646595280	gbv	kfa	wrm	1967-01-06 08:42:33.927274000	1987-09-25 08:35:27.030011000	-11424958185421922738932112141032513.008
+-1301324518	-1216519640	wid	bmh	sey	1995-01-30 15:10:54.492162000	1939-01-24 15:34:19.169731000	-14193792655072205170864508399007014.916
+36285650	215463808	rmx	ykf	grm	1980-06-27 23:22:38.374298000	1963-07-22 12:23:08.945284000	18426336467456810534965913021677488.261
+116896160	215070586	cwi	gbm	sny	1942-09-01 14:54:52.499616000	1963-05-18 08:36:43.510414000	-18498596955452010539981835178192188.226
+1562052550	1646857428	jey	nid	fau	1934-03-11 15:13:46.365718000	1987-10-21 10:06:01.203959000	5408566632826149467328159735024.295
+-1395435650	-1216257492	nyt	lgb	sni	1956-02-18 23:23:54.854701000	1939-03-17 18:35:27.517627000	-9914950235131931232544722979967700.172
+\N	-1216650714	xje	cni	oau	1991-05-29 09:26:34.533845000	1939-01-11 14:49:02.082757000	16968252198811674746810914345035364.202
+-1314825140	215332734	fal	jep	bvh	1946-05-18 21:24:29.544908000	1963-06-13 10:07:17.684362000	14189033056219786885922193778871547.221
+130527856	214677364	aup	hcn	oju	1968-04-27 05:34:38.033891000	1987-12-12 13:07:09.551855000	4284250986573100087787113578774339.039
+1467941418	1646726354	kfa	oje	bvq	1967-01-19 09:27:51.014248000	1987-10-08 09:20:44.116985000	-11383419176718672449656045964799139.304
+1548551928	-1216388566	bmh	fql	gbv	1995-02-12 15:56:11.579136000	1939-02-06 16:19:36.256705000	-14152253646368954881588442222773641.212
+-1301193444	-1217043936	vqc	doj	kvq	1980-07-11 00:07:55.461272000	1938-12-29 14:03:44.995783000	18467875476160060824241979197910861.965
+\N	215201660	gbm	kfq	wrd	1942-09-14 15:40:09.586590000	1963-05-31 09:22:00.597388000	-18457057946748760250705769001958814.522
+36416724	1646988502	nid	rmh	cwi	1934-03-24 15:59:03.452692000	1987-11-03 10:51:18.290933000	46947575336076438743394335968397.999
+117027234	1646333132	rdx	pkf	wrm	1956-03-03 00:09:11.941675000	1987-09-25 08:35:27.030011000	-9873411226428680943268656803734326.468
+1562183624	-1216519640	cni	grm	sey	1991-06-11 10:11:51.620819000	1939-01-24 15:34:19.169731000	17009791207514925036086980521268737.906
+-1395304576	215463808	jep	nit	xje	1946-05-31 22:09:46.631882000	1963-06-26 10:52:34.771336000	14230572064923037175198259955104920.925
+-1314694066	214808438	eyt	lgr	sny	1968-05-10 06:19:55.120865000	1963-05-18 08:36:43.510414000	4325789995276350377063179755007712.743
+\N	1646857428	oje	sni	fau	1967-02-01 10:13:08.101222000	1987-10-21 10:06:01.203959000	-11341880168015422160379979788565765.6
+130658930	-1216257492	fql	jup	kfa	1995-02-25 16:41:28.666110000	1939-02-19 17:04:53.343679000	-14110714637665704592312376046540267.508
+1468072492	-1216912862	aug	hsn	oau	1980-07-24 00:53:12.548246000	1939-01-11 14:49:02.082757000	18509414484863311113518045374144235.669
+1548683002	215332734	kfq	oju	bvh	1942-09-27 16:25:26.673564000	1963-06-13 10:07:17.684362000	-18415518938045509961429702825725440.818
+-1301062370	214677364	rmh	vql	gbm	1934-04-06 16:44:20.539666000	1987-11-16 11:36:35.377907000	88486584039326728019460512201771.703
+36547798	1646464206	vhc	toj	bvq	1956-03-16 00:54:29.028649000	1987-10-08 09:20:44.116985000	-9831872217725430653992590627500952.764
+\N	-1216388566	grm	kvq	gbv	1991-06-24 10:57:08.707793000	1939-02-06 16:19:36.256705000	17051330216218175325363046697502111.61
+117158308	-1217043936	nit	rmx	cni	1946-06-13 22:55:03.718856000	1963-07-09 11:37:51.858310000	14272111073626287464474326131338294.629
+1562314698	214939512	idx	pkv	wrd	1968-05-23 07:05:12.207839000	1963-05-31 09:22:00.597388000	4367329003979600666339245931241086.447
+-1395173502	1646988502	sni	wrm	cwi	1967-02-14 10:58:25.188196000	1987-11-03 10:51:18.290933000	-11300341159312171871103913612332391.896
+-1314562992	1646333132	jup	nyt	oje	1995-03-10 17:26:45.753084000	1939-03-04 17:50:10.430653000	-14069175628962454303036309870306893.804
+130790004	-1216781788	eyk	lwr	sey	1980-08-06 01:38:29.635220000	1939-01-24 15:34:19.169731000	18550953493566561402794111550377609.373
+\N	215463808	oju	sny	xje	1942-10-10 17:10:43.760538000	1963-06-26 10:52:34.771336000	-18373979929342259672153636649492067.114
+1468203566	214808438	vql	aup	kfq	1934-04-19 17:29:37.626640000	1987-11-29 12:21:52.464881000	130025592742577017295526688435145.407
+1548814076	1646595280	alg	xsn	fau	1956-03-29 01:39:46.115623000	1987-10-21 10:06:01.203959000	-9790333209022180364716524451267579.06
+-1300931296	-1216257492	kvq	oau	kfa	1991-07-07 11:42:25.794767000	1939-02-19 17:04:53.343679000	17092869224921425614639112873735485.314
+36678872	-1216912862	rmx	vqc	grm	1946-06-26 23:40:20.805830000	1963-07-22 12:23:08.945284000	14313650082329537753750392307571668.333
+117289382	215070586	mhc	toa	bvh	1968-06-05 07:50:29.294813000	1963-06-13 10:07:17.684362000	4408868012682850955615312107474460.151
+\N	214677364	wrm	bvq	gbm	1967-02-27 11:43:42.275169000	1987-11-16 11:36:35.377907000	-11258802150608921581827847436099018.192
+1562445772	1646464206	nyt	rdx	sni	1995-03-23 18:12:02.840058000	1939-03-17 18:35:27.517627000	-14027636620259204013760243694073520.1
+-1395042428	-1216650714	ido	pbv	gbv	1980-08-19 02:23:46.722194000	1939-02-06 16:19:36.256705000	18592492502269811692070177726610983.077
+-1314431918	-1217043936	sny	wrd	cni	1942-10-23 17:56:00.847512000	1963-07-09 11:37:51.858310000	-18332440920639009382877570473258693.41
+130921078	214939512	aup	eyt	oju	1934-05-02 18:14:54.713614000	1987-12-12 13:07:09.551855000	171564601445827306571592864668519.111
+1468334640	1646726354	epk	cwr	cwi	1956-04-11 02:25:03.202597000	1987-11-03 10:51:18.290933000	-9748794200318930075440458275034205.356
+\N	1646333132	oau	sey	oje	1991-07-20 12:27:42.881741000	1939-03-04 17:50:10.430653000	17134408233624675903915179049968859.018
+1548945150	-1216781788	vqc	aug	kvq	1946-07-10 00:25:37.892804000	1938-12-29 14:03:44.995783000	14355189091032788043026458483805042.037
+-1300800222	215201660	qlg	xse	xje	1968-06-18 08:35:46.381786000	1963-06-26 10:52:34.771336000	4450407021386101244891378283707833.855
+36809946	214808438	bvq	fau	kfq	1967-03-12 12:28:59.362143000	1987-11-29 12:21:52.464881000	-11217263141905671292551781259865644.488
+117420456	1646595280	rdx	vhc	wrm	1995-04-05 18:57:19.927032000	1987-09-25 08:35:27.030011000	-13986097611555953724484177517840146.396
+1562576846	-1216519640	mhs	tfa	kfa	1980-09-01 03:09:03.809168000	1939-02-19 17:04:53.343679000	18634031510973061981346243902844356.781
+\N	-1216912862	wrd	bvh	grm	1942-11-05 18:41:17.934486000	1963-07-22 12:23:08.945284000	-18290901911935759093601504297025319.706
+-1394911354	215070586	eyt	idx	sny	1934-05-15 19:00:11.800588000	1963-05-18 08:36:43.510414000	213103610149077595847659040901892.815
+-1314300844	1646857428	ito	gbv	gbm	1956-04-24 03:10:20.289571000	1987-11-16 11:36:35.377907000	-9707255191615679786164392098800831.652
+131052152	1646464206	sey	wid	sni	1991-08-02 13:12:59.968715000	1939-03-17 18:35:27.517627000	17175947242327926193191245226202232.722
+1468465714	-1216650714	aug	eyk	oau	1946-07-23 01:10:54.979777000	1939-01-11 14:49:02.082757000	14396728099736038332302524660038415.741
+1549076224	215332734	upk	cwi	cni	1968-07-01 09:21:03.468760000	1963-07-09 11:37:51.858310000	4491946030089351534167444459941207.559
+\N	214939512	fau	jey	oju	\N	1987-12-12 13:07:09.551855000	-11175724133202421003275715083632270.784
+-1300669148	1646726354	vhc	alg	bvq	1967-03-25 13:14:16.449117000	1987-10-08 09:20:44.116985000	-13944558602852703435208111341606772.692
+36941020	-1216388566	qlw	xje	oje	1995-04-18 19:42:37.014006000	1939-03-04 17:50:10.430653000	18675570519676312270622310079077730.485
+117551530	-1216781788	bvh	fal	kvq	1980-09-14 03:54:20.896142000	1938-12-29 14:03:44.995783000	-18249362903232508804325438120791946.002
+1562707920	215201660	idx	mhc	wrd	1942-11-18 19:26:35.021460000	1963-05-31 09:22:00.597388000	254642618852327885123725217135266.519
+-1394780280	1646988502	mxs	kfa	kfq	1934-05-28 19:45:28.887562000	1987-11-29 12:21:52.464881000	-9665716182912429496888325922567457.948
+\N	1646595280	wid	bmh	wrm	1956-05-07 03:55:37.376545000	1987-09-25 08:35:27.030011000	17217486251031176482467311402435606.426
+-1314169770	-1216519640	eyk	ido	sey	1991-08-15 13:58:17.055689000	1939-01-24 15:34:19.169731000	14438267108439288621578590836271789.445
+131183226	215463808	yto	gbm	grm	1946-08-05 01:56:12.066751000	1963-07-22 12:23:08.945284000	4533485038792601823443510636174581.263
+1468596788	215070586	jey	nid	sny	1968-07-14 10:06:20.555734000	1963-05-18 08:36:43.510414000	-11134185124499170713999648907398897.08
+1549207298	1646857428	alg	epk	fau	1967-04-07 13:59:33.536091000	1987-10-21 10:06:01.203959000	-13903019594149453145932045165373398.988
+-1300538074	-1216257492	upb	cni	sni	1995-05-01 20:27:54.100980000	1939-03-17 18:35:27.517627000	18717109528379562559898376255311104.189
+\N	-1216650714	fal	jep	oau	1980-09-27 04:39:37.983116000	1939-01-11 14:49:02.082757000	-18207823894529258515049371944558572.298
+37072094	215332734	mhc	qlg	bvh	1942-12-01 20:11:52.108434000	1963-06-13 10:07:17.684362000	296181627555578174399791393368640.223
+117682604	214677364	qcw	oje	oju	1934-06-10 20:30:45.974536000	1987-12-12 13:07:09.551855000	-9624177174209179207612259746334084.244
+1562838994	1646726354	bmh	fql	bvq	1956-05-20 04:40:54.463519000	1987-10-08 09:20:44.116985000	17259025259734426771743377578668980.13
+-1394649206	-1216388566	ido	mhs	gbv	1991-08-28 14:43:34.142663000	1939-02-06 16:19:36.256705000	14479806117142538910854657012505163.149
+-1314038696	-1217043936	dxs	kfq	kvq	1946-08-18 02:41:29.153725000	1938-12-29 14:03:44.995783000	4575024047495852112719576812407954.967
+\N	215201660	nid	rmh	wrd	1968-07-27 10:51:37.642708000	1963-05-31 09:22:00.597388000	-11092646115795920424723582731165523.376
+131314300	1646988502	epk	ito	cwi	1967-04-20 14:44:50.623065000	1987-11-03 10:51:18.290933000	-13861480585446202856655978989140025.284
+1468727862	1646333132	ytf	grm	wrm	1995-05-14 21:13:11.187954000	1987-09-25 08:35:27.030011000	18758648537082812849174442431544477.893
+1549338372	-1216519640	jep	nit	sey	1980-10-10 05:24:55.070090000	1939-01-24 15:34:19.169731000	-18166284885826008225773305768325198.594
+-1300407000	215463808	qlg	upk	xje	1942-12-14 20:57:09.195408000	1963-06-26 10:52:34.771336000	337720636258828463675857569602013.927
+37203168	214808438	ugb	sni	sny	1934-06-23 21:16:03.061510000	1963-05-18 08:36:43.510414000	-9582638165505928918336193570100710.54
+\N	1646857428	fql	jup	fau	1956-06-02 05:26:11.550493000	1987-10-21 10:06:01.203959000	17300564268437677061019443754902353.834
+117813678	-1216257492	mhs	qlw	kfa	1991-09-10 15:28:51.229636000	1939-02-19 17:04:53.343679000	14521345125845789200130723188738536.853
+1562970068	-1216912862	hcw	oju	oau	1946-08-31 03:26:46.240699000	1939-01-11 14:49:02.082757000	4616563056199102401995642988641328.671
+-1394518132	215332734	rmh	vql	bvh	1968-08-09 11:36:54.729682000	1963-06-13 10:07:17.684362000	-11051107107092670135447516554932149.672
+-1313907622	214677364	ito	mxs	gbm	1967-05-03 15:30:07.710039000	1987-11-16 11:36:35.377907000	-13819941576742952567379912812906651.58
+131445374	1646464206	dxj	kvq	bvq	1995-05-27 21:58:28.274928000	1987-10-08 09:20:44.116985000	18800187545786063138450508607777851.597
+\N	-1216388566	nit	rmx	gbv	1980-10-23 06:10:12.157064000	1939-02-06 16:19:36.256705000	-18124745877122757936497239592091824.89
+1468858936	-1217043936	upk	yto	cni	1942-12-27 21:42:26.282382000	1963-07-09 11:37:51.858310000	379259644962078752951923745835387.631
+1549469446	214939512	ykf	wrm	wrd	1934-07-06 22:01:20.148484000	1963-05-31 09:22:00.597388000	-9541099156802678629060127393867336.836
+-1300275926	1646988502	jup	nyt	cwi	1956-06-15 06:11:28.637467000	1987-11-03 10:51:18.290933000	17342103277140927350295509931135727.538
+37334242	1646333132	qlw	upb	oje	1991-09-23 16:14:08.316610000	1939-03-04 17:50:10.430653000	14562884134549039489406789364971910.557
+117944752	-1216781788	lgb	sny	sey	1946-09-13 04:12:03.327673000	1939-01-24 15:34:19.169731000	4658102064902352691271709164874702.375
+\N	215463808	vql	aup	xje	1968-08-22 12:22:11.816656000	1963-06-26 10:52:34.771336000	-11009568098389419846171450378698775.968
+1563101142	214808438	mxs	qcw	kfq	1967-05-16 16:15:24.797013000	1987-11-29 12:21:52.464881000	-13778402568039702278103846636673277.876
+-1394387058	1646595280	hcn	oau	fau	1995-06-09 22:43:45.361902000	1987-10-21 10:06:01.203959000	18841726554489313427726574784011225.301
+-1313776548	-1216257492	rmx	vqc	kfa	1980-11-05 06:55:29.244038000	1939-02-19 17:04:53.343679000	-18083206868419507647221173415858451.186
+131576448	-1216912862	yto	dxs	grm	1943-01-09 22:27:43.369356000	1963-07-22 12:23:08.945284000	420798653665329042227989922068761.335
+1468990010	215070586	doj	bvq	bvh	1934-07-19 22:46:37.235458000	1963-06-13 10:07:17.684362000	-9499560148099428339784061217633963.132
+\N	214677364	nyt	rdx	gbm	1956-06-28 06:56:45.724441000	1987-11-16 11:36:35.377907000	17383642285844177639571576107369101.242
+1549600520	1646464206	upb	ytf	sni	1991-10-06 16:59:25.403584000	1939-03-17 18:35:27.517627000	14604423143252289778682855541205284.261
+-1300144852	-1216650714	pkf	wrd	gbv	1946-09-26 04:57:20.414647000	1939-02-06 16:19:36.256705000	4699641073605602980547775341108076.079
+37465316	-1217043936	aup	eyt	cni	1968-09-04 13:07:28.903630000	1963-07-09 11:37:51.858310000	-10968029089686169556895384202465402.264
+118075826	214939512	qcw	ugb	oju	1967-05-29 17:00:41.883987000	1987-12-12 13:07:09.551855000	-13736863559336451988827780460439904.172
+1563232216	1646726354	lgr	sey	cwi	1995-06-22 23:29:02.448876000	1987-11-03 10:51:18.290933000	18883265563192563717002640960244599.005
+\N	1646333132	vqc	aug	oje	1980-11-18 07:40:46.331012000	1939-03-04 17:50:10.430653000	-18041667859716257357945107239625077.482
+-1394255984	-1216781788	dxs	hcw	kvq	1943-01-22 23:13:00.456330000	1938-12-29 14:03:44.995783000	462337662368579331504056098302135.039
+-1313645474	215201660	hsn	fau	xje	1934-08-01 23:31:54.322432000	1963-06-26 10:52:34.771336000	-9458021139396178050507995041400589.428
+131707522	214808438	rdx	vhc	kfq	1956-07-11 07:42:02.811415000	1987-11-29 12:21:52.464881000	17425181294547427928847642283602474.946
+1469121084	1646595280	ytf	dxj	wrm	1991-10-19 17:44:42.490558000	1987-09-25 08:35:27.030011000	14645962151955540067958921717438657.965
+1549731594	-1216519640	toj	bvh	kfa	1946-10-09 05:42:37.501621000	1939-02-19 17:04:53.343679000	4741180082308853269823841517341449.783
+\N	-1216912862	eyt	idx	grm	1968-09-17 13:52:45.990604000	1963-07-22 12:23:08.945284000	-10926490080982919267619318026232028.56
+-1300013778	215070586	ugb	ykf	sny	1967-06-11 17:45:58.970961000	1963-05-18 08:36:43.510414000	-13695324550633201699551714284206530.468
+37596390	1646857428	pkv	wid	gbm	1995-07-06 00:14:19.535850000	1987-11-16 11:36:35.377907000	18924804571895814006278707136477972.709
+118206900	1646464206	aug	eyk	sni	1980-12-01 08:26:03.417986000	1939-03-17 18:35:27.517627000	-18000128851013007068669041063391703.778
+1563363290	-1216650714	hcw	lgb	oau	1943-02-04 23:58:17.543304000	1939-01-11 14:49:02.082757000	503876671071829620780122274535508.743
+-1394124910	215332734	lwr	jey	cni	1934-08-15 00:17:11.409406000	1963-07-09 11:37:51.858310000	-9416482130692927761231928865167215.724
+\N	214939512	vhc	alg	oju	1956-07-24 08:27:19.898389000	1987-12-12 13:07:09.551855000	17466720303250678218123708459835848.65
+-1313514400	1646726354	dxj	hcn	bvq	1991-11-01 18:29:59.577532000	1987-10-08 09:20:44.116985000	14687501160658790357234987893672031.669
+131838596	-1216388566	xsn	fal	oje	1946-10-22 06:27:54.588595000	1939-03-04 17:50:10.430653000	4782719091012103559099907693574823.487
+1469252158	-1216781788	\N	mhc	kvq	1968-09-30 14:38:03.077578000	1938-12-29 14:03:44.995783000	-10884951072279668978343251849998654.856
+1549862668	215201660	idx	doj	wrd	1967-06-24 18:31:16.057935000	1963-05-31 09:22:00.597388000	-13653785541929951410275648107973156.764
+-1299882704	1646988502	ykf	bmh	kfq	1995-07-19 00:59:36.622824000	1987-11-29 12:21:52.464881000	18966343580599064295554773312711346.413
+\N	1646595280	toa	ido	wrm	1980-12-14 09:11:20.504960000	1987-09-25 08:35:27.030011000	-17958589842309756779392974887158330.074
+37727464	-1216519640	eyk	pkf	sey	1943-02-18 00:43:34.630278000	1939-01-24 15:34:19.169731000	545415679775079910056188450768882.447
+118337974	215463808	lgb	nid	grm	1934-08-28 01:02:28.496380000	1963-07-22 12:23:08.945284000	-9374943121989677471955862688933842.02
+1563494364	215070586	pbv	epk	sny	1956-08-06 09:12:36.985362000	1963-05-18 08:36:43.510414000	17508259311953928507399774636069222.354
+-1393993836	1646857428	alg	lgr	fau	1991-11-14 19:15:16.664506000	1987-10-21 10:06:01.203959000	14729040169362040646511054069905405.373
+-1313383326	-1216257492	hcn	jep	sni	1946-11-04 07:13:11.675569000	1939-03-17 18:35:27.517627000	4824258099715353848375973869808197.191
+\N	-1216650714	cwr	qlg	oau	1968-10-13 15:23:20.164552000	1939-01-11 14:49:02.082757000	-10843412063576418689067185673765281.152
+131969670	215332734	mhc	hsn	bvh	1967-07-07 19:16:33.144909000	1963-06-13 10:07:17.684362000	-13612246533226701120999581931739783.06
+1469383232	214677364	doj	fql	oju	1995-08-01 01:44:53.709798000	1987-12-12 13:07:09.551855000	19007882589302314584830839488944720.117
+1549993742	1646726354	xse	mhs	bvq	1980-12-27 09:56:37.591934000	\N	-17917050833606506490116908710924956.37
+-1299751630	-1216388566	ido	toj	gbv	1943-03-03 01:28:51.717252000	1987-10-08 09:20:44.116985000	586954688478330199332254627002256.151
+37858538	-1217043936	pkf	rmh	kvq	1934-09-10 01:47:45.583353000	1939-02-06 16:19:36.256705000	-9333404113286427182679796512700468.316
+\N	215201660	tfa	ito	wrd	1956-08-19 09:57:54.072336000	1938-12-29 14:03:44.995783000	17549798320657178796675840812302596.058
+118469048	1646988502	epk	pkv	cwi	1991-11-27 20:00:33.751480000	1963-05-31 09:22:00.597388000	14770579178065290935787120246138779.077
+1563625438	1646333132	lgr	nit	wrm	1946-11-17 07:58:28.762543000	1987-11-03 10:51:18.290933000	4865797108418604137652040046041570.895
+-1393862762	-1216519640	gbv	upk	sey	1968-10-26 16:08:37.251526000	1987-09-25 08:35:27.030011000	-10801873054873168399791119497531907.448
+-1313252252	215463808	qlg	lwr	xje	1967-07-20 20:01:50.231883000	1939-01-24 15:34:19.169731000	-13570707524523450831723515755506409.356
+132100744	214808438	hsn	jup	sny	1995-08-14 02:30:10.796772000	1963-06-26 10:52:34.771336000	19049421598005564874106905665178093.821
+\N	1646857428	cwi	qlw	fau	1981-01-09 10:41:54.678908000	1963-05-18 08:36:43.510414000	-17875511824903256200840842534691582.666
+1469514306	-1216257492	mhs	xsn	kfa	1943-03-16 02:14:08.804226000	1987-10-21 10:06:01.203959000	628493697181580488608320803235629.855
+1550124816	-1216912862	toj	vql	oau	1934-09-23 02:33:02.670327000	1939-02-19 17:04:53.343679000	-9291865104583176893403730336467094.612
+-1299620556	215332734	xje	mxs	bvh	1956-09-01 10:43:11.159310000	1939-01-11 14:49:02.082757000	17591337329360429085951906988535969.762
+37989612	214677364	ito	toa	gbm	1991-12-10 20:45:50.838454000	1963-06-13 10:07:17.684362000	14812118186768541225063186422372152.781
+118600122	1646464206	pkv	rmx	bvq	1946-11-30 08:43:45.849517000	1987-11-16 11:36:35.377907000	4907336117121854426928106222274944.599
+\N	-1216388566	kfa	yto	gbv	1968-11-08 16:53:54.338500000	1987-10-08 09:20:44.116985000	-10760334046169918110515053321298533.744
+1563756512	-1217043936	upk	pbv	cni	1967-08-02 20:47:07.318857000	1939-02-06 16:19:36.256705000	-13529168515820200542447449579273035.652
+-1393731688	214939512	lwr	nyt	wrd	1995-08-27 03:15:27.883746000	1963-07-09 11:37:51.858310000	19090960606708815163382971841411467.525
+-1313121178	1646988502	gbm	upb	cwi	1981-01-22 11:27:11.765882000	1963-05-31 09:22:00.597388000	-17833972816200005911564776358458208.962
+132231818	1646333132	qlw	cwr	oje	1943-03-29 02:59:25.891200000	1987-11-03 10:51:18.290933000	670032705884830777884386979469003.559
+1469645380	-1216781788	xsn	aup	sey	1934-10-06 03:18:19.757301000	1939-03-04 17:50:10.430653000	-9250326095879926604127664160233720.908
+\N	215463808	cni	qcw	xje	1956-09-14 11:28:28.246284000	1939-01-24 15:34:19.169731000	17632876338063679375227973164769343.466
+1550255890	214808438	mxs	xse	kfq	1991-12-23 21:31:07.925428000	1963-06-26 10:52:34.771336000	14853657195471791514339252598605526.485
+-1299489482	1646595280	toa	vqc	fau	\N	1987-11-29 12:21:52.464881000	4948875125825104716204172398508318.303
+38120686	-1216257492	oje	dxs	kfa	1946-12-13 09:29:02.936491000	1987-10-21 10:06:01.203959000	-10718795037466667821238987145065160.04
+118731196	-1216912862	yto	tfa	grm	1968-11-21 17:39:11.425474000	1939-02-19 17:04:53.343679000	-13487629507116950253171383403039661.948
+1563887586	215070586	pbv	rdx	bvh	1967-08-15 21:32:24.405831000	1963-07-22 12:23:08.945284000	19132499615412065452659038017644841.229
+\N	214677364	kfq	ytf	gbm	1995-09-09 04:00:44.970720000	1963-06-13 10:07:17.684362000	-17792433807496755622288710182224835.258
+-1393600614	1646464206	upb	gbv	sni	1981-02-04 12:12:28.852856000	1987-11-16 11:36:35.377907000	711571714588081067160453155702377.263
+-1312990104	-1216650714	cwr	eyt	gbv	1943-04-11 03:44:42.978174000	1939-03-17 18:35:27.517627000	-9208787087176676314851597984000347.204
+132362892	-1217043936	grm	ugb	cni	1934-10-19 04:03:36.844275000	1939-02-06 16:19:36.256705000	17674415346766929664504039341002717.17
+1469776454	214939512	qcw	cwi	oju	1956-09-27 12:13:45.333258000	1963-07-09 11:37:51.858310000	14895196204175041803615318774838900.189
+1550386964	1646726354	xse	aug	cwi	1992-01-05 22:16:25.012402000	1987-12-12 13:07:09.551855000	4990414134528355005480238574741692.007
+\N	1646333132	sni	hcw	oje	1946-12-26 10:14:20.023465000	1987-11-03 10:51:18.290933000	-10677256028763417531962920968831786.336
+-1299358408	-1216781788	dxs	xje	kvq	1968-12-04 18:24:28.512448000	1939-03-04 17:50:10.430653000	-13446090498413699963895317226806288.244
+38251760	215201660	tfa	vhc	xje	1967-08-28 22:17:41.492805000	1938-12-29 14:03:44.995783000	19174038624115315741935104193878214.933
+118862270	214808438	oju	dxj	kfq	1995-09-22 04:46:02.057694000	1963-06-26 10:52:34.771336000	-17750894798793505333012644005991461.554
+1564018660	1646595280	ytf	kfa	wrm	1981-02-17 12:57:45.939829000	1987-11-29 12:21:52.464881000	753110723291331356436519331935750.967
+-1393469540	-1216519640	gbv	idx	kfa	1943-04-24 04:30:00.065147000	1987-09-25 08:35:27.030011000	-9167248078473426025575531807766973.5
+\N	-1216912862	kvq	ykf	grm	1934-11-01 04:48:53.931249000	1939-02-19 17:04:53.343679000	17715954355470179953780105517236090.874
+-1312859030	215070586	ugb	gbm	sny	1956-10-10 12:59:02.420232000	1963-07-22 12:23:08.945284000	14936735212878292092891384951072273.893
+132493966	1646857428	cwi	eyk	gbm	1992-01-18 23:01:42.099376000	1963-05-18 08:36:43.510414000	5031953143231605294756304750975065.711
+1469907528	1646464206	wrm	lgb	sni	1947-01-08 10:59:37.110439000	1987-11-16 11:36:35.377907000	-10635717020060167242686854792598412.632
+1550518038	-1216650714	hcw	cni	oau	1968-12-17 19:09:45.599422000	1939-03-17 18:35:27.517627000	-13404551489710449674619251050572914.54
+-1299227334	215332734	xje	alg	cni	1967-09-10 23:02:58.579779000	1939-01-11 14:49:02.082757000	19215577632818566031211170370111588.637
+\N	214939512	sny	hcn	oju	1995-10-05 05:31:19.144667000	1963-07-09 11:37:51.858310000	-17709355790090255043736577829758087.85
+38382834	1646726354	dxj	oje	bvq	1981-03-02 13:43:03.026803000	1987-12-12 13:07:09.551855000	794649731994581645712585508169124.671
+118993344	-1216388566	kfa	mhc	oje	1943-05-07 05:15:17.152121000	1987-10-08 09:20:44.116985000	-9125709069770175736299465631533599.796
+1564149734	-1216781788	oau	doj	kvq	1934-11-14 05:34:11.018223000	1939-03-04 17:50:10.430653000	17757493364173430243056171693469464.578
+-1393338466	215201660	ykf	kfq	wrd	1956-10-23 13:44:19.507206000	1938-12-29 14:03:44.995783000	14978274221581542382167451127305647.597
+-1312727956	1646988502	gbm	ido	kfq	1992-01-31 23:46:59.186350000	1963-05-31 09:22:00.597388000	5073492151934855584032370927208439.415
+\N	1646595280	bvq	pkf	wrm	1947-01-21 11:44:54.197413000	1987-11-29 12:21:52.464881000	-10594178011356916953410788616365038.928
+132625040	-1216519640	lgb	grm	sey	1968-12-30 19:55:02.686396000	\N	-13363012481007199385343184874339540.836
+1470038602	215463808	cni	epk	grm	1967-09-23 23:48:15.666753000	1987-09-25 08:35:27.030011000	19257116641521816320487236546344962.341
+1550649112	215070586	wrd	lgr	sny	1995-10-18 06:16:36.231641000	1939-01-24 15:34:19.169731000	-17667816781387004754460511653524714.146
+-1299096260	1646857428	hcn	sni	fau	1981-03-15 14:28:20.113777000	1963-07-22 12:23:08.945284000	836188740697831934988651684402498.375
+38513908	-1216257492	oje	\N	sni	1943-05-20 06:00:34.239095000	1963-05-18 08:36:43.510414000	-9084170061066925447023399455300226.092
+\N	-1216650714	sey	qlg	oau	1934-11-27 06:19:28.105197000	1987-10-21 10:06:01.203959000	17799032372876680532332237869702838.282
+119124418	215332734	doj	hsn	bvh	1956-11-05 14:29:36.594180000	1939-03-17 18:35:27.517627000	15019813230284792671443517303539021.301
+1564280808	214677364	kfq	oju	oju	1992-02-14 00:32:16.273324000	1939-01-11 14:49:02.082757000	5115031160638105873308437103441813.119
+-1393207392	1646726354	fau	mhs	bvq	1947-02-03 12:30:11.284387000	1963-06-13 10:07:17.684362000	-10552639002653666664134722440131665.224
+-1312596882	-1216388566	pkf	toj	gbv	1969-01-12 20:40:19.773370000	1987-12-12 13:07:09.551855000	-13321473472303949096067118698106167.132
+132756114	-1217043936	grm	kvq	kvq	1967-10-07 00:33:32.753727000	1987-10-08 09:20:44.116985000	19298655650225066609763302722578336.045
+\N	215201660	bvh	ito	wrd	1995-10-31 07:01:53.318615000	1939-02-06 16:19:36.256705000	-17626277772683754465184445477291340.442
+1470169676	1646988502	lgr	pkv	cwi	1981-03-28 15:13:37.200751000	1938-12-29 14:03:44.995783000	877727749401082224264717860635872.079
+1550780186	1646333132	sni	wrm	wrm	1943-06-02 06:45:51.326069000	1963-05-31 09:22:00.597388000	-9042631052363675157747333279066852.388
+-1298965186	-1216519640	wid	upk	sey	1934-12-10 07:04:45.192171000	1987-11-03 10:51:18.290933000	17840571381579930821608304045936211.986
+38644982	215463808	hsn	lwr	xje	1956-11-18 15:14:53.681154000	1987-09-25 08:35:27.030011000	15061352238988042960719583479772395.005
+119255492	214808438	oju	sny	sny	1992-02-27 01:17:33.360298000	1939-01-24 15:34:19.169731000	5156570169341356162584503279675186.823
+\N	1646857428	jey	qlw	fau	1947-02-16 13:15:28.371361000	1963-06-26 10:52:34.771336000	-10511099993950416374858656263898291.52
+1564411882	-1216257492	toj	xsn	kfa	1969-01-25 21:25:36.860344000	1963-05-18 08:36:43.510414000	-13279934463600698806791052521872793.428
+-1393076318	-1216912862	kvq	oau	oau	1967-10-20 01:18:49.840701000	1987-10-21 10:06:01.203959000	19340194658928316899039368898811709.749
+-1312465808	215332734	fal	mxs	bvh	1995-11-13 07:47:10.405589000	1939-02-19 17:04:53.343679000	-17584738763980504175908379301057966.738
+132887188	214677364	pkv	toa	gbm	1981-04-10 15:58:54.287725000	1939-01-11 14:49:02.082757000	919266758104332513540784036869245.783
+1470300750	1646464206	wrm	bvq	bvq	1943-06-15 07:31:08.413043000	1963-06-13 10:07:17.684362000	-9001092043660424868471267102833478.684
+\N	-1216388566	bmh	yto	gbv	1934-12-23 07:50:02.279145000	1987-11-16 11:36:35.377907000	17882110390283181110884370222169585.69
+1550911260	-1217043936	lwr	pbv	cni	1956-12-01 16:00:10.768128000	1987-10-08 09:20:44.116985000	15102891247691293249995649656005768.709
+-1298834112	214939512	sny	wrd	wrd	1992-03-11 02:02:50.447272000	1939-02-06 16:19:36.256705000	5198109178044606451860569455908560.527
+38776056	1646988502	nid	upb	cwi	1947-03-01 14:00:45.458335000	1963-07-09 11:37:51.858310000	-10469560985247166085582590087664917.816
+119386566	1646333132	xsn	cwr	oje	1969-02-07 22:10:53.947318000	1963-05-31 09:22:00.597388000	-13238395454897448517514986345639419.724
+1564542956	-1216781788	oau	sey	sey	1967-11-02 02:04:06.927675000	1987-11-03 10:51:18.290933000	19381733667631567188315435075045083.453
+\N	215463808	jep	qcw	xje	1995-11-26 08:32:27.492563000	1939-03-04 17:50:10.430653000	-17543199755277253886632313124824593.034
+-1392945244	214808438	toa	xse	kfq	1981-04-23 16:44:11.374699000	1939-01-24 15:34:19.169731000	960805766807582802816850213102619.487
+-1312334734	1646595280	bvq	fau	fau	1943-06-28 08:16:25.500017000	1963-06-26 10:52:34.771336000	-8959553034957174579195200926600104.98
+133018262	-1216257492	fql	dxs	kfa	1935-01-05 08:35:19.366119000	1987-11-29 12:21:52.464881000	17923649398986431400160436398402959.394
+1470431824	-1216912862	pbv	tfa	grm	1956-12-14 16:45:27.855102000	1987-10-21 10:06:01.203959000	15144430256394543539271715832239142.413
+1551042334	215070586	wrd	bvh	bvh	1992-03-24 02:48:07.534246000	1939-02-19 17:04:53.343679000	5239648186747856741136635632141934.231
+\N	214677364	rmh	ytf	gbm	1947-03-14 14:46:02.545309000	1963-07-22 12:23:08.945284000	-10428021976543915796306523911431544.112
+-1298703038	1646464206	cwr	gbv	sni	1969-02-20 22:56:11.034292000	1963-06-13 10:07:17.684362000	-13196856446194198228238920169406046.02
+38907130	-1216650714	sey	wid	gbv	1967-11-15 02:49:24.014649000	1987-11-16 11:36:35.377907000	19423272676334817477591501251278457.157
+119517640	-1217043936	nit	ugb	cni	1995-12-09 09:17:44.579537000	1939-03-17 18:35:27.517627000	-17501660746574003597356246948591219.33
+1564674030	214939512	xse	cwi	oju	1981-05-06 17:29:28.461673000	1939-02-06 16:19:36.256705000	1002344775510833092092916389335993.191
+-1392814170	1646726354	fau	jey	cwi	1943-07-11 09:01:42.586991000	1963-07-09 11:37:51.858310000	-8918014026253924289919134750366731.276
+\N	1646333132	jup	hcw	oje	1935-01-18 09:20:36.453093000	1987-12-12 13:07:09.551855000	17965188407689681689436502574636333.098
+-1312203660	-1216781788	tfa	xje	kvq	1956-12-27 17:30:44.942076000	1987-11-03 10:51:18.290933000	15185969265097793828547782008472516.117
+133149336	215201660	bvh	fal	xje	1992-04-06 03:33:24.621220000	1939-03-04 17:50:10.430653000	5281187195451107030412701808375307.935
+1470562898	214808438	vql	dxj	kfq	1947-03-27 15:31:19.632283000	1938-12-29 14:03:44.995783000	-10386482967840665507030457735198170.408
+1551173408	1646595280	gbv	kfa	wrm	1969-03-05 23:41:28.121266000	1963-06-26 10:52:34.771336000	-13155317437490947938962853993172672.316
+-1298571964	-1216519640	wid	bmh	kfa	1967-11-28 03:34:41.101623000	1987-11-29 12:21:52.464881000	19464811685038067766867567427511830.861
+\N	-1216912862	rmx	ykf	grm	1995-12-22 10:03:01.666511000	1987-09-25 08:35:27.030011000	-17460121737870753308080180772357845.626
+39038204	215070586	cwi	gbm	sny	1981-05-19 18:14:45.548647000	1939-02-19 17:04:53.343679000	1043883784214083381368982565569366.895
+119648714	1646857428	jey	nid	gbm	1943-07-24 09:46:59.673965000	1963-07-22 12:23:08.945284000	-8876475017550674000643068574133357.572
+1564805104	1646464206	nyt	lgb	sni	1935-01-31 10:05:53.540067000	1963-05-18 08:36:43.510414000	18006727416392931978712568750869706.802
+-1392683096	-1216650714	xje	cni	oau	1957-01-09 18:16:02.029050000	1987-11-16 11:36:35.377907000	15227508273801044117823848184705889.821
+-1312072586	215332734	fal	jep	cni	1992-04-19 04:18:41.708194000	1939-03-17 18:35:27.517627000	5322726204154357319688767984608681.639
+\N	214939512	aup	hcn	oju	1947-04-09 16:16:36.719257000	1939-01-11 14:49:02.082757000	-10344943959137415217754391558964796.704
+133280410	1646726354	kfa	oje	bvq	1969-03-19 00:26:45.208240000	1963-07-09 11:37:51.858310000	-13113778428787697649686787816939298.612
+1470693972	-1216388566	bmh	fql	oje	1967-12-11 04:19:58.188597000	1987-12-12 13:07:09.551855000	19506350693741318056143633603745204.565
+1551304482	-1216781788	vqc	doj	kvq	1996-01-04 10:48:18.753485000	1987-10-08 09:20:44.116985000	-17418582729167503018804114596124471.922
+-1298440890	215201660	gbm	kfq	wrd	1981-06-01 19:00:02.635621000	1939-03-04 17:50:10.430653000	1085422792917333670645048741802740.599
+39169278	1646988502	nid	rmh	kfq	1943-08-06 10:32:16.760939000	1938-12-29 14:03:44.995783000	-8834936008847423711367002397899983.868
+\N	1646595280	rdx	pkf	wrm	1935-02-13 10:51:10.627041000	1963-05-31 09:22:00.597388000	18048266425096182267988634927103080.506
+119779788	-1216519640	cni	grm	sey	1957-01-22 19:01:19.116024000	1987-11-29 12:21:52.464881000	15269047282504294407099914360939263.525
+1564936178	215463808	jep	nit	grm	1992-05-02 05:03:58.795168000	1987-09-25 08:35:27.030011000	5364265212857607608964834160842055.343
+-1392552022	215070586	eyt	lgr	sny	1947-04-22 17:01:53.806231000	1939-01-24 15:34:19.169731000	-10303404950434164928478325382731423
+-1311941512	1646857428	oje	sni	fau	1969-04-01 01:12:02.295213000	1963-07-22 12:23:08.945284000	-13072239420084447360410721640705924.908
+133411484	-1216257492	fql	jup	sni	1967-12-24 05:05:15.275570000	1963-05-18 08:36:43.510414000	19547889702444568345419699779978578.269
+\N	-1216650714	aug	hsn	oau	1996-01-17 11:33:35.840459000	1987-10-21 10:06:01.203959000	-17377043720464252729528048419891098.218
+1470825046	215332734	kfq	oju	bvh	1981-06-14 19:45:19.722595000	1939-03-17 18:35:27.517627000	1126961801620583959921114918036114.303
+1551435556	214677364	rmh	vql	oju	1943-08-19 11:17:33.847913000	1939-01-11 14:49:02.082757000	-8793397000144173422090936221666610.164
+-1298309816	1646726354	vhc	toj	bvq	1935-02-26 11:36:27.714015000	1963-06-13 10:07:17.684362000	18089805433799432557264701103336454.21
+39300352	-1216388566	grm	kvq	gbv	1957-02-04 19:46:36.202998000	1987-12-12 13:07:09.551855000	15310586291207544696375980537172637.229
+119910862	-1217043936	nit	rmx	kvq	1992-05-15 05:49:15.882142000	1987-10-08 09:20:44.116985000	5405804221560857898240900337075429.047
+\N	215201660	idx	pkv	wrd	1947-05-05 17:47:10.893205000	1939-02-06 16:19:36.256705000	-10261865941730914639202259206498049.296
+1565067252	1646988502	sni	wrm	cwi	1969-04-14 01:57:19.382187000	1938-12-29 14:03:44.995783000	-13030700411381197071134655464472551.204
+-1392420948	1646333132	jup	nyt	wrm	1968-01-06 05:50:32.362544000	1963-05-31 09:22:00.597388000	19589428711147818634695765956211951.973
+-1311810438	-1216519640	eyk	lwr	sey	1996-01-30 12:18:52.927433000	1987-11-03 10:51:18.290933000	-17335504711761002440251982243657724.514
+133542558	215463808	oju	sny	xje	1981-06-27 20:30:36.809569000	1987-09-25 08:35:27.030011000	1168500810323834249197181094269488.007
+1470956120	214808438	vql	aup	sny	1943-09-01 12:02:50.934887000	1939-01-24 15:34:19.169731000	-8751857991440923132814870045433236.46
+\N	1646857428	alg	xsn	fau	1935-03-11 12:21:44.800989000	1963-06-26 10:52:34.771336000	18131344442502682846540767279569827.914
+1551566630	-1216257492	kvq	oau	kfa	1957-02-17 20:31:53.289972000	1963-05-18 08:36:43.510414000	15352125299910794985652046713406010.933
+-1298178742	-1216912862	rmx	vqc	oau	1992-05-28 06:34:32.969116000	1987-10-21 10:06:01.203959000	5447343230264108187516966513308802.751
+39431426	215332734	mhc	toa	bvh	1947-05-18 18:32:27.980178000	1939-02-19 17:04:53.343679000	-10220326933027664349926193030264675.592
+120041936	214677364	wrm	bvq	gbm	1969-04-27 02:42:36.469161000	1939-01-11 14:49:02.082757000	-12989161402677946781858589288239177.5
+1565198326	1646464206	nyt	rdx	bvq	1968-01-19 06:35:49.449518000	1963-06-13 10:07:17.684362000	19630967719851068923971832132445325.677
+\N	-1216388566	ido	pbv	gbv	1996-02-12 13:04:10.014407000	1987-11-16 11:36:35.377907000	-17293965703057752150975916067424350.81
+-1392289874	-1217043936	sny	wrd	cni	1981-07-10 21:15:53.896543000	1987-10-08 09:20:44.116985000	1210039819027084538473247270502861.711
+-1311679364	214939512	aup	eyt	wrd	1943-09-14 12:48:08.021861000	1939-02-06 16:19:36.256705000	-8710318982737672843538803869199862.756
+133673632	1646988502	epk	cwr	cwi	1935-03-24 13:07:01.887963000	1963-07-09 11:37:51.858310000	18172883451205933135816833455803201.618
+1471087194	1646333132	oau	sey	oje	1957-03-02 21:17:10.376946000	1963-05-31 09:22:00.597388000	15393664308614045274928112889639384.637
+1551697704	-1216781788	vqc	aug	sey	1992-06-10 07:19:50.056090000	1987-11-03 10:51:18.290933000	-14457305820759193856084943006068793.4
+\N	215463808	qlg	xse	xje	1947-05-31 19:17:45.067152000	1939-03-04 17:50:10.430653000	-10178787924324414060650126854031301.888
+-1298047668	214808438	bvq	fau	kfq	1969-05-10 03:27:53.556135000	1939-01-24 15:34:19.169731000	-12947622393974696492582523112005803.796
+39562500	1646595280	rdx	vhc	fau	1968-02-01 07:21:06.536492000	1963-06-26 10:52:34.771336000	-263296658230627690673753078742955.402
+120173010	-1216257492	mhs	tfa	kfa	1996-02-25 13:49:27.101381000	1987-11-29 12:21:52.464881000	-17252426694354501861699849891190977.106
+1565329400	-1216912862	wrd	bvh	grm	1981-07-23 22:01:10.983517000	1987-10-21 10:06:01.203959000	1251578827730334827749313446736235.415
+-1392158800	215070586	eyt	idx	bvh	1943-09-27 13:33:25.108835000	1939-02-19 17:04:53.343679000	13935904563474403629658083479999083.81
+\N	214677364	ito	gbv	gbm	1935-04-06 13:52:18.974937000	1963-07-22 12:23:08.945284000	18214422459909183425092899632036575.322
+-1311548290	1646464206	sey	wid	sni	1957-03-15 22:02:27.463920000	1963-06-13 10:07:17.684362000	15435203317317295564204179065872758.341
+133804706	-1216650714	aug	eyk	gbv	1992-06-23 08:05:07.143064000	1987-11-16 11:36:35.377907000	-14415766812055943566808876829835419.696
+1471218268	-1217043936	upk	cwi	cni	1947-06-13 20:03:02.154126000	1939-03-17 18:35:27.517627000	-10137248915621163771374060677797928.184
+1551828778	214939512	fau	jey	oju	1969-05-23 04:13:10.643109000	1939-02-06 16:19:36.256705000	-12906083385271446203306456935772430.092
+-1297916594	1646726354	vhc	alg	cwi	1968-02-14 08:06:23.623466000	1963-07-09 11:37:51.858310000	-221757649527377401397686902509581.698
+\N	1646333132	qlw	xje	oje	1996-03-09 14:34:44.188355000	1987-12-12 13:07:09.551855000	-17210887685651251572423783714957603.402
+39693574	-1216781788	bvh	fal	kvq	1981-08-05 22:46:28.070491000	1987-11-03 10:51:18.290933000	1293117836433585117025379622969609.119
+120304084	215201660	idx	mhc	xje	1943-10-10 14:18:42.195809000	1939-03-04 17:50:10.430653000	13977443572177653918934149656232457.514
+1565460474	214808438	mxs	kfa	kfq	1935-04-19 14:37:36.061911000	1938-12-29 14:03:44.995783000	18255961468612433714368965808269949.026
+-1392027726	1646595280	wid	bmh	wrm	1957-03-28 22:47:44.550894000	1963-06-26 10:52:34.771336000	15476742326020545853480245242106132.045
+-1311417216	-1216519640	eyk	ido	kfa	1992-07-06 08:50:24.230037000	1987-11-29 12:21:52.464881000	-14374227803352693277532810653602045.992
+\N	-1216912862	yto	gbm	grm	1947-06-26 20:48:19.241100000	1987-09-25 08:35:27.030011000	-10095709906917913482097994501564554.48
+133935780	215070586	jey	nid	sny	1969-06-05 04:58:27.730083000	1939-02-19 17:04:53.343679000	-12864544376568195914030390759539056.388
+1471349342	1646857428	alg	epk	\N	1968-02-27 08:51:40.710440000	1963-07-22 12:23:08.945284000	-180218640824127112121620726276207.994
+1551959852	1646464206	upb	cni	gbm	1996-03-22 15:20:01.275329000	1963-05-18 08:36:43.510414000	-17169348676948001283147717538724229.698
+-1297785520	-1216650714	fal	jep	sni	1981-08-18 23:31:45.157465000	1987-11-16 11:36:35.377907000	1334656845136835406301445799202982.823
+39824648	215332734	mhc	qlg	oau	1943-10-23 15:03:59.282783000	1939-03-17 18:35:27.517627000	14018982580880904208210215832465831.218
+\N	214939512	qcw	oje	cni	1935-05-02 15:22:53.148885000	1939-01-11 14:49:02.082757000	18297500477315684003645031984503322.73
+120435158	1646726354	bmh	fql	oju	1957-04-10 23:33:01.637868000	1963-07-09 11:37:51.858310000	15518281334723796142756311418339505.749
+1565591548	-1216388566	ido	mhs	bvq	1992-07-19 09:35:41.317011000	1987-12-12 13:07:09.551855000	-14332688794649442988256744477368672.288
+-1391896652	-1216781788	dxs	kfq	oje	1947-07-09 21:33:36.328074000	1987-10-08 09:20:44.116985000	-10054170898214663192821928325331180.776
+-1311286142	215201660	nid	rmh	kvq	1969-06-18 05:43:44.817057000	1939-03-04 17:50:10.430653000	-12823005367864945624754324583305682.684
+134066854	1646988502	epk	ito	wrd	1968-03-11 09:36:57.797414000	1938-12-29 14:03:44.995783000	-138679632120876822845554550042834.29
+\N	1646595280	ytf	grm	kfq	1996-04-04 16:05:18.362303000	1963-05-31 09:22:00.597388000	-17127809668244750993871651362490855.994
+1471480416	-1216519640	jep	nit	wrm	1981-09-01 00:17:02.244439000	1987-11-29 12:21:52.464881000	1376195853840085695577511975436356.527
+1552090926	215463808	qlg	upk	sey	1943-11-05 15:49:16.369757000	1987-09-25 08:35:27.030011000	14060521589584154497486282008699204.922
+-1297654446	215070586	ugb	sni	grm	1935-05-15 16:08:10.235859000	1939-01-24 15:34:19.169731000	18339039486018934292921098160736696.434
+39955722	1646857428	fql	jup	sny	1957-04-24 00:18:18.724842000	1963-07-22 12:23:08.945284000	15559820343427046432032377594572879.453
+120566232	-1216257492	mhs	qlw	fau	1992-08-01 10:20:58.403985000	1963-05-18 08:36:43.510414000	-14291149785946192698980678301135298.584
+\N	-1216650714	hcw	oju	sni	1947-07-22 22:18:53.415048000	1987-10-21 10:06:01.203959000	-10012631889511412903545862149097807.072
+1565722622	215332734	rmh	vql	oau	1969-07-01 06:29:01.904031000	1939-03-17 18:35:27.517627000	-12781466359161695335478258407072308.98
+-1391765578	214677364	ito	mxs	bvh	1968-03-24 10:22:14.884388000	1939-01-11 14:49:02.082757000	-97140623417626533569488373809460.586
+-1311155068	1646726354	dxj	kvq	oju	1996-04-17 16:50:35.449277000	1963-06-13 10:07:17.684362000	-17086270659541500704595585186257482.29
+134197928	-1216388566	nit	rmx	bvq	1981-09-14 01:02:19.331413000	1987-12-12 13:07:09.551855000	1417734862543335984853578151669730.231
+1471611490	-1217043936	upk	yto	gbv	1943-11-18 16:34:33.456731000	1987-10-08 09:20:44.116985000	14102060598287404786762348184932578.626
+\N	215201660	ykf	wrm	kvq	1935-05-28 16:53:27.322833000	1939-02-06 16:19:36.256705000	18380578494722184582197164336970070.138
+1552222000	1646988502	jup	nyt	wrd	1957-05-07 01:03:35.811816000	1938-12-29 14:03:44.995783000	15601359352130296721308443770806253.157
+-1297523372	1646333132	qlw	upb	cwi	1992-08-14 11:06:15.490959000	1963-05-31 09:22:00.597388000	-14249610777242942409704612124901924.88
+40086796	-1216519640	lgb	sny	wrm	1947-08-04 23:04:10.502022000	1987-11-03 10:51:18.290933000	-9971092880808162614269795972864433.368
+120697306	215463808	vql	aup	sey	1969-07-14 07:14:18.991005000	1987-09-25 08:35:27.030011000	-12739927350458445046202192230838935.276
+1565853696	214808438	mxs	qcw	xje	1968-04-06 11:07:31.971362000	1939-01-24 15:34:19.169731000	-55601614714376244293422197576086.882
+\N	1646857428	hcn	oau	sny	1996-04-30 17:35:52.536251000	1963-06-26 10:52:34.771336000	-17044731650838250415319519010024108.586
+-1391634504	-1216257492	rmx	vqc	fau	1981-09-27 01:47:36.418387000	1963-05-18 08:36:43.510414000	1459273871246586274129644327903103.935
+-1311023994	-1216912862	yto	dxs	kfa	1943-12-01 17:19:50.543705000	1987-10-21 10:06:01.203959000	14143599606990655076038414361165952.33
+134329002	215332734	doj	bvq	oau	1935-06-10 17:38:44.409807000	1939-02-19 17:04:53.343679000	18422117503425434871473230513203443.842
+1471742564	214677364	nyt	rdx	bvh	1957-05-20 01:48:52.898790000	1939-01-11 14:49:02.082757000	15642898360833547010584509947039626.861
+1552353074	1646464206	upb	ytf	gbm	1992-08-27 11:51:32.577933000	1963-06-13 10:07:17.684362000	-14208071768539692120428545948668551.176
+\N	-1216388566	pkf	wrd	bvq	1947-08-17 23:49:27.588996000	1987-11-16 11:36:35.377907000	-9929553872104912324993729796631059.664
+-1297392298	-1217043936	aup	eyt	gbv	1969-07-27 07:59:36.077979000	1987-10-08 09:20:44.116985000	-12698388341755194756926126054605561.572
+40217870	214939512	qcw	ugb	cni	1968-04-19 11:52:49.058336000	1939-02-06 16:19:36.256705000	-14062606011125955017356021342713.178
+120828380	1646988502	lgr	sey	wrd	1996-05-13 18:21:09.623225000	1963-07-09 11:37:51.858310000	-17003192642135000126043452833790734.882
+1565984770	1646333132	vqc	aug	cwi	1981-10-10 02:32:53.505361000	1963-05-31 09:22:00.597388000	1500812879949836563405710504136477.639
+-1391503430	-1216781788	dxs	hcw	oje	1943-12-14 18:05:07.630679000	1987-11-03 10:51:18.290933000	14185138615693905365314480537399326.034
+\N	215463808	hsn	fau	sey	1935-06-23 18:24:01.496781000	1939-03-04 17:50:10.430653000	18463656512128685160749296689436817.546
+-1310892920	214808438	rdx	vhc	xje	1957-06-02 02:34:09.985763000	1939-01-24 15:34:19.169731000	15684437369536797299860576123273000.565
+134460076	1646595280	ytf	dxj	kfq	1992-09-09 12:36:49.664907000	1963-06-26 10:52:34.771336000	-14166532759836441831152479772435177.472
+1471873638	-1216257492	toj	bvh	fau	1947-08-31 00:34:44.675970000	1987-11-29 12:21:52.464881000	-9888014863401662035717663620397685.96
+1552484148	-1216912862	eyt	idx	kfa	1946-05-24 04:27:57.656327000	1987-10-21 10:06:01.203959000	-12656849333051944467650059878372187.868
+-1297261224	215070586	ugb	ykf	grm	1968-05-02 12:38:06.145310000	1939-02-19 17:04:53.343679000	-21240171529866529632202202809594852.69
+\N	214677364	pkv	wid	bvh	1996-05-26 19:06:26.710199000	1963-07-22 12:23:08.945284000	-16961653633431749836767386657557361.178
+40348944	1646464206	aug	eyk	gbm	1958-07-31 10:38:40.835517000	1963-06-13 10:07:17.684362000	1542351888653086852681776680369851.343
+120959454	-1216650714	hcw	lgb	sni	1943-12-27 18:50:24.717653000	1987-11-16 11:36:35.377907000	14226677624397155654590546713632699.738
+1566115844	-1217043936	lwr	jey	gbv	1935-07-06 19:09:18.583754000	1939-03-17 18:35:27.517627000	18505195520831935450025362865670191.25
+-1391372356	214939512	vhc	alg	cni	1970-10-14 05:11:58.262898000	1939-02-06 16:19:36.256705000	15725976378240047589136642299506374.269
+-1310761846	1646726354	dxj	hcn	oju	1992-09-22 13:22:06.751881000	1963-07-09 11:37:51.858310000	-14124993751133191541876413596201803.768
+\N	1646333132	xsn	fal	cwi	1947-09-13 01:20:01.762944000	1987-12-12 13:07:09.551855000	-9846475854698411746441597444164312.256
+134591150	-1216781788	idx	mhc	oje	1946-06-06 05:13:14.743301000	1987-11-03 10:51:18.290933000	-12615310324348694178373993702138814.164
+1472004712	215201660	ykf	doj	kvq	1968-05-15 13:23:23.232284000	1939-03-04 17:50:10.430653000	-21198632521163279342926136633361478.986
+1552615222	214808438	toa	bmh	xje	1996-06-08 19:51:43.797173000	1938-12-29 14:03:44.995783000	-16920114624728499547491320481323987.474
+-1297130150	1646595280	eyk	ido	kfq	1958-08-13 11:23:57.922491000	1963-06-26 10:52:34.771336000	1583890897356337141957842856603225.047
+40480018	-1216519640	lgb	pkf	wrm	1944-01-09 19:35:41.804627000	1987-11-29 12:21:52.464881000	14268216633100405943866612889866073.442
+\N	-1216912862	pbv	nid	kfa	1935-07-19 19:54:35.670728000	1987-09-25 08:35:27.030011000	18546734529535185739301429041903564.954
+121090528	215070586	alg	epk	grm	1970-10-27 05:57:15.349872000	1939-02-19 17:04:53.343679000	15767515386943297878412708475739747.973
+1566246918	1646857428	hcn	lgr	sny	1992-10-05 14:07:23.838855000	1963-07-22 12:23:08.945284000	-14083454742429941252600347419968430.064
+-1391241282	1646464206	cwr	jep	gbm	1947-09-26 02:05:18.849918000	1963-05-18 08:36:43.510414000	-9804936845995161457165531267930938.552
+-1310630772	-1216650714	mhc	qlg	sni	1946-06-19 05:58:31.830275000	1987-11-16 11:36:35.377907000	-12573771315645443889097927525905440.46
+134722224	215332734	doj	hsn	oau	1968-05-28 14:08:40.319258000	1939-03-17 18:35:27.517627000	-21157093512460029053650070457128105.282
+\N	214939512	xse	fql	cni	1996-06-21 20:37:00.884147000	1939-01-11 14:49:02.082757000	-16878575616025249258215254305090613.77
+1472135786	1646726354	ido	mhs	oju	1958-08-26 12:09:15.009465000	1963-07-09 11:37:51.858310000	1625429906059587431233909032836598.751
+1552746296	-1216388566	pkf	toj	bvq	1944-01-22 20:20:58.891601000	1987-12-12 13:07:09.551855000	14309755641803656233142679066099447.146
+-1296999076	-1216781788	tfa	rmh	oje	1935-08-01 20:39:52.757702000	1987-10-08 09:20:44.116985000	18588273538238436028577495218136938.658
+40611092	215201660	epk	ito	kvq	1970-11-09 06:42:32.436846000	1939-03-04 17:50:10.430653000	15809054395646548167688774651973121.677
+121221602	1646988502	lgr	pkv	wrd	1992-10-18 14:52:40.925829000	1938-12-29 14:03:44.995783000	-14041915733726690963324281243735056.36
+\N	1646595280	gbv	nit	kfq	1947-10-09 02:50:35.936892000	1963-05-31 09:22:00.597388000	-9763397837291911167889465091697564.848
+1566377992	-1216519640	qlg	upk	wrm	1946-07-02 06:43:48.917249000	1987-11-29 12:21:52.464881000	-12532232306942193599821861349672066.756
+-1391110208	215463808	hsn	lwr	sey	1968-06-10 14:53:57.406232000	1987-09-25 08:35:27.030011000	-21115554503756778764374004280894731.578
+-1310499698	215070586	cwi	jup	grm	1996-07-04 21:22:17.971121000	1939-01-24 15:34:19.169731000	-16837036607321998968939188128857240.066
+134853298	1646857428	mhs	qlw	sny	1958-09-08 12:54:32.096439000	1963-07-22 12:23:08.945284000	1666968914762837720509975209069972.455
+1472266860	-1216257492	toj	xsn	fau	1944-02-04 21:06:15.978575000	1963-05-18 08:36:43.510414000	14351294650506906522418745242332820.85
+\N	-1216650714	xje	vql	sni	1935-08-14 21:25:09.844676000	1987-10-21 10:06:01.203959000	18629812546941686317853561394370312.362
+1552877370	215332734	ito	mxs	oau	1970-11-22 07:27:49.523820000	1939-03-17 18:35:27.517627000	15850593404349798456964840828206495.381
+-1296868002	214677364	pkv	toa	bvh	1992-10-31 15:37:58.012803000	1939-01-11 14:49:02.082757000	-14000376725023440674048215067501682.656
+40742166	1646726354	kfa	rmx	oju	1947-10-22 03:35:53.023866000	1963-06-13 10:07:17.684362000	-9721858828588660878613398915464191.144
+121352676	-1216388566	upk	yto	bvq	1946-07-15 07:29:06.004223000	1987-12-12 13:07:09.551855000	-12490693298238943310545795173438693.052
+1566509066	-1217043936	lwr	pbv	gbv	1968-06-23 15:39:14.493206000	1987-10-08 09:20:44.116985000	-21074015495053528475097938104661357.874
+\N	215201660	gbm	nyt	kvq	1996-07-17 22:07:35.058095000	1939-02-06 16:19:36.256705000	-16795497598618748679663121952623866.362
+-1390979134	1646988502	qlw	upb	wrd	1958-09-21 13:39:49.183413000	1938-12-29 14:03:44.995783000	1708507923466088009786041385303346.159
+-1310368624	1646333132	xsn	cwr	cwi	1944-02-17 21:51:33.065548000	1963-05-31 09:22:00.597388000	14392833659210156811694811418566194.554
+134984372	-1216519640	cni	aup	wrm	1935-08-27 22:10:26.931650000	1987-11-03 10:51:18.290933000	18671351555644936607129627570603686.066
+1472397934	215463808	mxs	qcw	sey	1970-12-05 08:13:06.610794000	1987-09-25 08:35:27.030011000	15892132413053048746240907004439869.085
+1553008444	214808438	toa	xse	xje	1992-11-13 16:23:15.099777000	1939-01-24 15:34:19.169731000	-13958837716320190384772148891268308.952
+\N	1646857428	oje	vqc	sny	1947-11-04 04:21:10.110840000	1963-06-26 10:52:34.771336000	-9680319819885410589337332739230817.44
+-1296736928	-1216257492	yto	dxs	fau	1946-07-28 08:14:23.091197000	1963-05-18 08:36:43.510414000	-12449154289535693021269728997205319.348
+40873240	-1216912862	pbv	tfa	kfa	1968-07-06 16:24:31.580180000	1987-10-21 10:06:01.203959000	-21032476486350278185821871928427984.17
+121483750	215332734	kfq	rdx	oau	1996-07-30 22:52:52.145068000	1939-02-19 17:04:53.343679000	-16753958589915498390387055776390492.658
+1566640140	214677364	upb	ytf	bvh	1958-10-04 14:25:06.270386000	1939-01-11 14:49:02.082757000	1750046932169338299062107561536719.863
+-1390848060	1646464206	\N	gbv	gbm	1944-03-01 22:36:50.152522000	1963-06-13 10:07:17.684362000	14434372667913407100970877594799568.258
+\N	-1216388566	cwr	eyt	bvq	1935-09-09 22:55:44.018624000	1987-11-16 11:36:35.377907000	18712890564348186896405693746837059.77
+-1310237550	-1217043936	grm	ugb	gbv	1970-12-18 08:58:23.697768000	1987-10-08 09:20:44.116985000	15933671421756299035516973180673242.789
+135115446	214939512	qcw	cwi	cni	1992-11-26 17:08:32.186751000	1939-02-06 16:19:36.256705000	-13917298707616940095496082715034935.248
+1472529008	1646988502	xse	aug	wrd	1947-11-17 05:06:27.197814000	1963-07-09 11:37:51.858310000	-9638780811182160300061266562997443.736
+1553139518	1646333132	sni	hcw	cwi	1946-08-10 08:59:40.178171000	1963-05-31 09:22:00.597388000	-12407615280832442731993662820971945.644
+-1296605854	-1216781788	dxs	xje	oje	1968-07-19 17:09:48.667154000	1987-11-03 10:51:18.290933000	-20990937477647027896545805752194610.466
+\N	215463808	tfa	vhc	sey	1996-08-12 23:38:09.232042000	1939-03-04 17:50:10.430653000	-16712419581212248101110989600157118.954
+41004314	214808438	oju	dxj	xje	1958-10-17 15:10:23.357360000	1939-01-24 15:34:19.169731000	1791585940872588588338173737770093.567
+121614824	1646595280	ytf	kfa	kfq	1944-03-14 23:22:07.239496000	1963-06-26 10:52:34.771336000	14475911676616657390246943771032941.962
+1566771214	-1216257492	gbv	idx	fau	1935-09-22 23:41:01.105598000	1987-11-29 12:21:52.464881000	18754429573051437185681759923070433.474
+-1390716986	-1216912862	kvq	ykf	kfa	1970-12-31 09:43:40.784742000	1987-10-21 10:06:01.203959000	15975210430459549324793039356906616.493
+-1310106476	215070586	ugb	gbm	grm	1992-12-09 17:53:49.273725000	1939-02-19 17:04:53.343679000	-13875759698913689806220016538801561.544
+\N	214677364	cwi	eyk	bvh	1947-11-30 05:51:44.284788000	1963-07-22 12:23:08.945284000	-9597241802478910010785200386764070.032
+135246520	1646464206	wrm	lgb	gbm	1946-08-23 09:44:57.265145000	1963-06-13 10:07:17.684362000	-12366076272129192442717596644738571.94
+1472660082	-1216650714	hcw	cni	sni	1968-08-01 17:55:05.754128000	1987-11-16 11:36:35.377907000	-20949398468943777607269739575961236.762
+1553270592	-1217043936	xje	alg	gbv	1996-08-26 00:23:26.319016000	1939-03-17 18:35:27.517627000	-16670880572508997811834923423923745.25
+-1296474780	214939512	sny	hcn	cni	1958-10-30 15:55:40.444334000	1939-02-06 16:19:36.256705000	1833124949575838877614239914003467.271
+41135388	1646726354	dxj	oje	oju	1944-03-28 00:07:24.326470000	1963-07-09 11:37:51.858310000	14517450685319907679523009947266315.666
+\N	1646333132	kfa	mhc	cwi	1935-10-06 00:26:18.192572000	1987-12-12 13:07:09.551855000	18795968581754687474957826099303807.178
+121745898	-1216781788	oau	doj	oje	1971-01-13 10:28:57.871716000	1987-11-03 10:51:18.290933000	16016749439162799614069105533139990.197
+1566902288	215201660	ykf	kfq	kvq	1992-12-22 18:39:06.360699000	1939-03-04 17:50:10.430653000	-13834220690210439516943950362568187.84
+-1390585912	214808438	gbm	ido	xje	1947-12-13 06:37:01.371762000	1938-12-29 14:03:44.995783000	-9555702793775659721509134210530696.328
+-1309975402	1646595280	bvq	pkf	kfq	1946-09-05 10:30:14.352119000	1963-06-26 10:52:34.771336000	-12324537263425942153441530468505198.236
+135377594	-1216519640	lgb	grm	wrm	1968-08-14 18:40:22.841102000	1987-11-29 12:21:52.464881000	-20907859460240527317993673399727863.058
+\N	-1216912862	cni	epk	kfa	1996-09-08 01:08:43.405990000	1987-09-25 08:35:27.030011000	-16629341563805747522558857247690371.546
+1472791156	215070586	wrd	lgr	grm	1958-11-12 16:40:57.531308000	1939-02-19 17:04:53.343679000	1874663958279089166890306090236840.975
+1553401666	1646857428	hcn	sni	sny	1944-04-10 00:52:41.413444000	1963-07-22 12:23:08.945284000	14558989694023157968799076123499689.37
+-1296343706	1646464206	oje	qlg	gbm	1935-10-19 01:11:35.279546000	1963-05-18 08:36:43.510414000	18837507590457937764233892275537180.882
+41266462	-1216650714	sey	hsn	sni	1971-01-26 11:14:14.958690000	1987-11-16 11:36:35.377907000	16058288447866049903345171709373363.901
+121876972	215332734	doj	oju	oau	1993-01-04 19:24:23.447673000	1939-03-17 18:35:27.517627000	-13792681681507189227667884186334814.136
+\N	214939512	kfq	mhs	cni	1947-12-26 07:22:18.458736000	1939-01-11 14:49:02.082757000	-9514163785072409432233068034297322.624
+1567033362	1646726354	fau	toj	oju	1946-09-18 11:15:31.439093000	1963-07-09 11:37:51.858310000	-12282998254722691864165464292271824.532
+-1390454838	-1216388566	pkf	kvq	bvq	1968-08-27 19:25:39.928076000	1987-12-12 13:07:09.551855000	-20866320451537277028717607223494489.354
+-1309844328	-1216781788	grm	\N	oje	1996-09-21 01:54:00.492964000	1987-10-08 09:20:44.116985000	-16587802555102497233282791071456997.842
+135508668	215201660	bvh	ito	kvq	1958-11-25 17:26:14.618282000	1939-03-04 17:50:10.430653000	1916202966982339456166372266470214.679
+1472922230	1646988502	lgr	pkv	wrd	1944-04-23 01:37:58.500418000	1938-12-29 14:03:44.995783000	14600528702726408258075142299733063.074
+\N	1646595280	sni	wrm	kfq	1935-11-01 01:56:52.366520000	1963-05-31 09:22:00.597388000	18879046599161188053509958451770554.586
+1553532740	-1216519640	wid	upk	wrm	1971-02-08 11:59:32.045664000	1987-11-29 12:21:52.464881000	16099827456569300192621237885606737.605
+-1296212632	215463808	hsn	lwr	sey	1993-01-17 20:09:40.534647000	1987-09-25 08:35:27.030011000	-13751142672803938938391818010101440.432
+41397536	215070586	oju	sny	grm	1948-01-08 08:07:35.545710000	1939-01-24 15:34:19.169731000	-9472624776369159142957001858063948.92
+122008046	1646857428	jey	qlw	sny	1946-10-01 12:00:48.526067000	1963-07-22 12:23:08.945284000	\N
+1567164436	-1216257492	toj	xsn	fau	1968-09-09 20:10:57.015050000	1963-05-18 08:36:43.510414000	-12241459246019441574889398116038450.828
+\N	-1216650714	kvq	oau	sni	1996-10-04 02:39:17.579938000	1987-10-21 10:06:01.203959000	-20824781442834026739441541047261115.65
+-1390323764	215332734	fal	mxs	oau	1958-12-08 18:11:31.705256000	1939-03-17 18:35:27.517627000	-16546263546399246944006724895223624.138
+-1309713254	214677364	pkv	toa	bvh	1944-05-06 02:23:15.587392000	1939-01-11 14:49:02.082757000	1957741975685589745442438442703588.383
+135639742	1646726354	wrm	bvq	oju	1935-11-14 02:42:09.453494000	1963-06-13 10:07:17.684362000	14642067711429658547351208475966436.778
+1473053304	-1216388566	bmh	yto	bvq	1971-02-21 12:44:49.132638000	1987-12-12 13:07:09.551855000	18920585607864438342786024628003928.29
+1553663814	-1217043936	lwr	pbv	gbv	1993-01-30 20:54:57.621621000	1987-10-08 09:20:44.116985000	16141366465272550481897304061840111.309
+\N	215201660	sny	wrd	kvq	1948-01-21 08:52:52.632684000	1939-02-06 16:19:36.256705000	-13709603664100688649115751833868066.728
+-1296081558	1646988502	nid	upb	wrd	1946-10-14 12:46:05.613041000	1938-12-29 14:03:44.995783000	-9431085767665908853680935681830575.216
+41528610	1646333132	xsn	cwr	cwi	1968-09-22 20:56:14.102024000	1963-05-31 09:22:00.597388000	-12199920237316191285613331939805077.124
+122139120	-1216519640	oau	sey	wrm	1996-10-17 03:24:34.666912000	1987-11-03 10:51:18.290933000	-20783242434130776450165474871027741.946
+1567295510	215463808	jep	qcw	sey	1958-12-21 18:56:48.792230000	1987-09-25 08:35:27.030011000	-16504724537695996654730658718990250.434
+-1390192690	214808438	toa	xse	xje	1944-05-19 03:08:32.674366000	1939-01-24 15:34:19.169731000	1999280984388840034718504618936962.087
+\N	1646857428	bvq	fau	sny	1935-11-27 03:27:26.540468000	1963-06-26 10:52:34.771336000	14683606720132908836627274652199810.482
+-1309582180	-1216257492	fql	dxs	fau	1971-03-06 13:30:06.219612000	1963-05-18 08:36:43.510414000	18962124616567688632062090804237301.994
+135770816	-1216912862	pbv	tfa	kfa	1993-02-12 21:40:14.708595000	1987-10-21 10:06:01.203959000	16182905473975800771173370238073485.013
+1473184378	215332734	wrd	bvh	oau	1948-02-03 09:38:09.719658000	1939-02-19 17:04:53.343679000	-13668064655397438359839685657634693.024
+1553794888	214677364	rmh	ytf	bvh	1946-10-27 13:31:22.700015000	1939-01-11 14:49:02.082757000	-9389546758962658564404869505597201.512
+-1295950484	1646464206	cwr	gbv	gbm	1968-10-05 21:41:31.188998000	1963-06-13 10:07:17.684362000	-12158381228612940996337265763571703.42
+\N	-1216388566	sey	wid	bvq	1996-10-30 04:09:51.753886000	1987-11-16 11:36:35.377907000	-20741703425427526160889408694794368.242
+41659684	-1217043936	nit	ugb	gbv	1959-01-03 19:42:05.879204000	1987-10-08 09:20:44.116985000	-16463185528992746365454592542756876.73
+122270194	214939512	xse	cwi	cni	1944-06-01 03:53:49.761340000	1939-02-06 16:19:36.256705000	2040819993092090323994570795170335.791
+1567426584	1646988502	fau	jey	wrd	1935-12-10 04:12:43.627442000	1963-07-09 11:37:51.858310000	14725145728836159125903340828433184.186
+-1390061616	1646333132	jup	hcw	cwi	1971-03-19 14:15:23.306586000	1963-05-31 09:22:00.597388000	19003663625270938921338156980470675.698
+-1309451106	-1216781788	tfa	xje	oje	1993-02-25 22:25:31.795569000	1987-11-03 10:51:18.290933000	16224444482679051060449436414306858.717
+\N	215463808	bvh	fal	sey	1948-02-16 10:23:26.806632000	1939-03-04 17:50:10.430653000	-13626525646694188070563619481401319.32
+135901890	214808438	vql	dxj	xje	1946-11-09 14:16:39.786989000	1939-01-24 15:34:19.169731000	-9348007750259408275128803329363827.808
+1473315452	1646595280	gbv	kfa	kfq	1968-10-18 22:26:48.275971000	1963-06-26 10:52:34.771336000	-12116842219909690707061199587338329.716
+1553925962	-1216257492	wid	bmh	fau	1996-11-12 04:55:08.840860000	1987-11-29 12:21:52.464881000	-20700164416724275871613342518560994.538
+-1295819410	-1216912862	rmx	ykf	\N	1959-01-16 20:27:22.966178000	1987-10-21 10:06:01.203959000	-16421646520289496076178526366523503.026
+41790758	215070586	cwi	gbm	kfa	1944-06-14 04:39:06.848314000	\N	2082359001795340613270636971403709.495
+\N	214677364	jey	nid	grm	1935-12-23 04:58:00.714416000	1939-02-19 17:04:53.343679000	14766684737539409415179407004666557.89
+122401268	1646464206	nyt	lgb	bvh	1971-04-01 15:00:40.393560000	1963-07-22 12:23:08.945284000	19045202633974189210614223156704049.402
+1567557658	-1216650714	xje	cni	gbm	1993-03-10 23:10:48.882543000	\N	16265983491382301349725502590540232.421
+-1389930542	-1217043936	fal	jep	sni	1948-02-29 11:08:43.893605000	1963-06-13 10:07:17.684362000	-13584986637990937781287553305167945.616
+-1309320032	214939512	aup	hcn	gbv	1946-11-22 15:01:56.873962000	1987-11-16 11:36:35.377907000	-9306468741556157985852737153130454.104
+136032964	1646726354	kfa	oje	cni	1968-10-31 23:12:05.362945000	1939-03-17 18:35:27.517627000	-12075303211206440417785133411104956.012
+\N	1646333132	bmh	fql	oju	1996-11-25 05:40:25.927834000	1939-02-06 16:19:36.256705000	-20658625408021025582337276342327620.834
+1473446526	-1216781788	vqc	doj	cwi	1959-01-29 21:12:40.053152000	1963-07-09 11:37:51.858310000	-16380107511586245786902460190290129.322
+1554057036	215201660	gbm	kfq	oje	1944-06-27 05:24:23.935288000	1987-12-12 13:07:09.551855000	2123898010498590902546703147637083.199
+-1295688336	214808438	nid	upb	kvq	1936-01-05 05:43:17.801390000	1987-11-03 10:51:18.290933000	14808223746242659704455473180899931.594
+41921832	1646595280	rdx	pkf	xje	1971-04-14 15:45:57.480534000	1939-03-04 17:50:10.430653000	19086741642677439499890289332937423.106
+122532342	-1216519640	cni	grm	kfq	1993-03-23 23:56:05.969517000	1938-12-29 14:03:44.995783000	16307522500085551639001568766773606.125
+\N	-1216912862	jep	qcw	wrm	1948-03-13 11:54:00.980579000	1963-06-26 10:52:34.771336000	-13543447629287687492011487128934571.912
+1567688732	215070586	eyt	lgr	kfa	1946-12-05 15:47:13.960936000	1987-11-29 12:21:52.464881000	-9264929732852907696576670976897080.4
+-1389799468	1646857428	oje	sni	grm	1968-11-13 23:57:22.449919000	1987-09-25 08:35:27.030011000	-12033764202503190128509067234871582.308
+-1309188958	1646464206	fql	dxs	sny	1996-12-08 06:25:43.014808000	1939-02-19 17:04:53.343679000	-20617086399317775293061210166094247.13
+136164038	-1216650714	aug	hsn	gbm	1959-02-11 21:57:57.140126000	1963-07-22 12:23:08.945284000	-16338568502882995497626394014056755.618
+1473577600	215332734	kfq	oju	sni	1944-07-10 06:09:41.022262000	1963-05-18 08:36:43.510414000	2165437019201841191822769323870456.903
+\N	214939512	rmh	ytf	oau	1936-01-18 06:28:34.888364000	1987-11-16 11:36:35.377907000	14849762754945909993731539357133305.298
+1554188110	1646726354	vhc	toj	cni	1971-04-27 16:31:14.567508000	1939-03-17 18:35:27.517627000	19128280651380689789166355509170796.81
+-1295557262	-1216388566	grm	kvq	oju	1993-04-06 00:41:23.056491000	1939-01-11 14:49:02.082757000	16349061508788801928277634943006979.829
+42052906	-1216781788	nit	ugb	bvq	1948-03-26 12:39:18.067553000	1963-07-09 11:37:51.858310000	-13501908620584437202735420952701198.208
+122663416	215201660	idx	pkv	oje	1946-12-18 16:32:31.047910000	1987-12-12 13:07:09.551855000	-9223390724149657407300604800663706.696
+1567819806	1646988502	sni	wrm	kvq	1968-11-27 00:42:39.536893000	1987-10-08 09:20:44.116985000	-11992225193799939839233001058638208.604
+\N	1646595280	jup	hcw	wrd	1996-12-21 07:11:00.101782000	1939-03-04 17:50:10.430653000	-20575547390614525003785143989860873.426
+-1389668394	-1216519640	eyk	lwr	kfq	1959-02-24 22:43:14.227100000	1938-12-29 14:03:44.995783000	-16297029494179745208350327837823381.914
+-1309057884	215463808	oju	sny	wrm	1944-07-23 06:54:58.109236000	1963-05-31 09:22:00.597388000	2206976027905091481098835500103830.607
+136295112	215070586	vql	dxj	sey	1936-01-31 07:13:51.975338000	1987-11-29 12:21:52.464881000	14891301763649160283007605533366679.002
+1473708674	1646857428	alg	xsn	grm	1971-05-10 17:16:31.654482000	1987-09-25 08:35:27.030011000	19169819660083940078442421685404170.514
+1554319184	-1216257492	kvq	oau	sny	1993-04-19 01:26:40.143465000	1939-01-24 15:34:19.169731000	16390600517492052217553701119240353.533
+\N	-1216650714	rmx	ykf	fau	1948-04-08 13:24:35.154527000	1963-07-22 12:23:08.945284000	-13460369611881186913459354776467824.504
+-1295426188	215332734	mhc	toa	sni	1946-12-31 17:17:48.134884000	1963-05-18 08:36:43.510414000	-9181851715446407118024538624430332.992
+42183980	214677364	wrm	bvq	oau	1968-12-10 01:27:56.623867000	1987-10-21 10:06:01.203959000	-11950686185096689549956934882404834.9
+122794490	1646726354	nyt	lgb	bvh	1997-01-03 07:56:17.188756000	1939-03-17 18:35:27.517627000	-20534008381911274714509077813627499.722
+1567950880	-1216388566	ido	pbv	oju	1959-03-09 23:28:31.314074000	1939-01-11 14:49:02.082757000	-16255490485476494919074261661590008.21
+-1389537320	-1217043936	sny	wrd	bvq	1944-08-05 07:40:15.196210000	1963-06-13 10:07:17.684362000	2248515036608341770374901676337204.311
+\N	215201660	aup	hcn	gbv	1936-02-13 07:59:09.062312000	1987-12-12 13:07:09.551855000	14932840772352410572283671709600052.706
+-1308926810	1646988502	epk	cwr	kvq	1971-05-23 18:01:48.741456000	1987-10-08 09:20:44.116985000	19211358668787190367718487861637544.218
+136426186	1646333132	oau	sey	wrd	1993-05-02 02:11:57.230438000	1939-02-06 16:19:36.256705000	16432139526195302506829767295473727.237
+1473839748	-1216519640	vqc	doj	cwi	1948-04-21 14:09:52.241501000	1938-12-29 14:03:44.995783000	-13418830603177936624183288600234450.8
+1554450258	215463808	qlg	xse	wrm	1947-01-13 18:03:05.221858000	1963-05-31 09:22:00.597388000	-9140312706743156828748472448196959.288
+-1295295114	214808438	bvq	fau	sey	1968-12-23 02:13:13.710841000	1987-11-03 10:51:18.290933000	-11909147176393439260680868706171461.196
+\N	1646857428	rdx	pkf	xje	1997-01-16 08:41:34.275730000	1987-09-25 08:35:27.030011000	-20492469373208024425233011637394126.018
+42315054	-1216257492	mhs	tfa	sny	1959-03-23 00:13:48.401048000	1939-01-24 15:34:19.169731000	-16213951476773244629798195485356634.506
+122925564	-1216912862	wrd	bvh	fau	1944-08-18 08:25:32.283184000	1963-06-26 10:52:34.771336000	2290054045311592059650967852570578.015
+1568081954	215332734	eyt	lgr	kfa	1936-02-26 08:44:26.149286000	1963-05-18 08:36:43.510414000	14974379781055660861559737885833426.41
+-1389406246	214677364	ito	gbv	oau	\N	1987-10-21 10:06:01.203959000	19252897677490440656994554037870917.922
+-1308795736	1646464206	sey	wid	bvh	1971-06-05 18:47:05.828429000	1939-02-19 17:04:53.343679000	16473678534898552796105833471707100.941
+\N	-1216388566	aug	hsn	gbm	1993-05-15 02:57:14.317412000	1939-01-11 14:49:02.082757000	-13377291594474686334907222424001077.096
+136557260	-1217043936	upk	cwi	bvq	1948-05-04 14:55:09.328475000	1963-06-13 10:07:17.684362000	-9098773698039906539472406271963585.584
+1473970822	214939512	fau	jey	gbv	1947-01-26 18:48:22.308832000	1987-11-16 11:36:35.377907000	-11867608167690188971404802529938087.492
+1554581332	1646988502	vhc	toj	cni	1969-01-05 02:58:30.797815000	1987-10-08 09:20:44.116985000	-20450930364504774135956945461160752.314
+-1295164040	1646333132	qlw	xje	wrd	1997-01-29 09:26:51.362704000	1939-02-06 16:19:36.256705000	-16172412468069994340522129309123260.802
+42446128	-1216781788	bvh	fal	cwi	1959-04-05 00:59:05.488022000	1963-07-09 11:37:51.858310000	2331593054014842348927034028803951.719
+\N	215463808	idx	pkv	oje	1944-08-31 09:10:49.370158000	1963-05-31 09:22:00.597388000	15015918789758911150835804062066800.114
+123056638	214808438	mxs	kfa	sey	1936-03-10 09:29:43.236260000	1987-11-03 10:51:18.290933000	19294436686193690946270620214104291.626
+1568213028	1646595280	wid	bmh	xje	1971-06-18 19:32:22.915403000	1939-03-04 17:50:10.430653000	16515217543601803085381899647940474.645
+-1389275172	-1216257492	eyk	lwr	kfq	1993-05-28 03:42:31.404386000	1939-01-24 15:34:19.169731000	-13335752585771436045631156247767703.392
+-1308664662	-1216912862	yto	gbm	fau	1948-05-17 15:40:26.415449000	1963-06-26 10:52:34.771336000	-9057234689336656250196340095730211.88
+136688334	215070586	jey	nid	kfa	1947-02-08 19:33:39.395806000	1987-11-29 12:21:52.464881000	-11826069158986938682128736353704713.788
+\N	214677364	alg	xsn	grm	1969-01-18 03:43:47.884789000	1987-10-21 10:06:01.203959000	-20409391355801523846680879284927378.61
+1474101896	1646464206	upb	cni	bvh	1997-02-11 10:12:08.449678000	1939-02-19 17:04:53.343679000	-16130873459366744051246063132889887.098
+1554712406	-1216650714	fal	jep	gbm	1959-04-18 01:44:22.574996000	1963-07-22 12:23:08.945284000	2373132062718092638203100205037325.423
+-1295032966	-1217043936	mhc	toa	sni	1944-09-13 09:56:06.457132000	1963-06-13 10:07:17.684362000	15057457798462161440111870238300173.818
+42577202	214939512	qcw	oje	gbv	1936-03-23 10:15:00.323234000	1987-11-16 11:36:35.377907000	19335975694896941235546686390337665.33
+123187712	1646726354	bmh	fql	cni	1971-07-01 20:17:40.002377000	1939-03-17 18:35:27.517627000	16556756552305053374657965824173848.349
+\N	1646333132	ido	pbv	oju	1993-06-10 04:27:48.491360000	1939-02-06 16:19:36.256705000	-13294213577068185756355090071534329.688
+1568344102	-1216781788	dxs	kfq	cwi	1948-05-30 16:25:43.502423000	1963-07-09 11:37:51.858310000	-9015695680633405960920273919496838.176
+-1389144098	215201660	nid	rmh	oje	1947-02-21 20:18:56.482780000	1987-12-12 13:07:09.551855000	-11784530150283688392852670177471340.084
+-1308533588	214808438	epk	cwr	kvq	1969-01-31 04:29:04.971763000	1987-11-03 10:51:18.290933000	-20367852347098273557404813108694004.906
+136819408	1646595280	ytf	grm	xje	1997-02-24 10:57:25.536652000	1939-03-04 17:50:10.430653000	-16089334450663493761969996956656513.394
+1474232970	-1216519640	jep	nit	kfq	1959-05-01 02:29:39.661970000	1938-12-29 14:03:44.995783000	2414671071421342927479166381270699.127
+\N	-1216912862	qlg	xse	wrm	1944-09-26 10:41:23.544106000	1963-06-26 10:52:34.771336000	15098996807165411729387936414533547.522
+1554843480	215070586	\N	sni	kfa	1936-04-05 11:00:17.410208000	1987-11-29 12:21:52.464881000	19377514703600191524822752566571039.034
+-1294901892	1646857428	ugb	jup	grm	1971-07-14 21:02:57.089351000	1987-09-25 08:35:27.030011000	16598295561008303663934032000407222.053
+42708276	1646464206	fql	tfa	sny	1993-06-23 05:13:05.578334000	1939-02-19 17:04:53.343679000	-13252674568364935467079023895300955.984
+123318786	-1216650714	mhs	oju	gbm	1948-06-12 17:11:00.589397000	1963-07-22 12:23:08.945284000	-8974156671930155671644207743263464.472
+1568475176	215332734	hcw	vql	sni	1947-03-06 21:04:13.569754000	1963-05-18 08:36:43.510414000	-11742991141580438103576604001237966.38
+\N	214939512	rmh	gbv	oau	1969-02-13 05:14:22.058737000	1987-11-16 11:36:35.377907000	-20326313338395023268128746932460631.202
+-1389013024	1646726354	ito	kvq	cni	1997-03-09 11:42:42.623626000	1939-03-17 18:35:27.517627000	-16047795441960243472693930780423139.69
+-1308402514	-1216388566	dxj	rmx	oju	1959-05-14 03:14:56.748944000	1939-01-11 14:49:02.082757000	2456210080124593216755232557504072.831
+136950482	-1216781788	nit	cwi	bvq	1944-10-09 11:26:40.631080000	1963-07-09 11:37:51.858310000	15140535815868662018664002590766921.226
+1474364044	215201660	upk	wrm	oje	1936-04-18 11:45:34.497182000	1987-12-12 13:07:09.551855000	19419053712303441814098818742804412.738
+1554974554	1646988502	ykf	nyt	kvq	1971-07-27 21:48:14.176325000	1987-10-08 09:20:44.116985000	16639834569711553953210098176640595.757
+\N	1646595280	jup	xje	wrd	1993-07-06 05:58:22.665308000	1939-03-04 17:50:10.430653000	-13211135559661685177802957719067582.28
+-1294770818	-1216519640	qlw	sny	kfq	1948-06-25 17:56:17.676371000	1938-12-29 14:03:44.995783000	-8932617663226905382368141567030090.768
+42839350	215463808	lgb	aup	wrm	1947-03-19 21:49:30.656728000	1963-05-31 09:22:00.597388000	-11701452132877187814300537825004592.676
+123449860	215070586	vql	kfa	sey	1969-02-26 05:59:39.145711000	1987-11-29 12:21:52.464881000	-20284774329691772978852680756227257.498
+1568606250	1646857428	mxs	oau	grm	1997-03-22 12:27:59.710600000	1987-09-25 08:35:27.030011000	-16006256433256993183417864604189765.986
+-1388881950	-1216257492	hcn	vqc	sny	1959-05-27 04:00:13.835918000	1939-01-24 15:34:19.169731000	\N
+\N	-1216650714	rmx	gbm	fau	1944-10-22 12:11:57.718054000	1963-07-22 12:23:08.945284000	2497749088827843506031298733737446.535
+-1308271440	215332734	yto	bvq	sni	1936-05-01 12:30:51.584155000	1963-05-18 08:36:43.510414000	15182074824571912307940068767000294.93
+137081556	214677364	doj	rdx	oau	1971-08-09 22:33:31.263299000	1987-10-21 10:06:01.203959000	19460592721006692103374884919037786.442
+1474495118	1646726354	nyt	cni	bvh	1993-07-19 06:43:39.752282000	1939-03-17 18:35:27.517627000	16681373578414804242486164352873969.461
+1555105628	-1216388566	upb	wrd	oju	1948-07-08 18:41:34.763345000	1939-01-11 14:49:02.082757000	-13169596550958434888526891542834208.576
+-1294639744	-1217043936	pkf	eyt	bvq	1947-04-01 22:34:47.743702000	1963-06-13 10:07:17.684362000	-8891078654523655093092075390796717.064
+\N	215201660	aup	oje	gbv	1969-03-11 06:44:56.232685000	1987-12-12 13:07:09.551855000	-11659913124173937525024471648771218.972
+42970424	1646988502	qcw	sey	kvq	1997-04-04 13:13:16.797574000	1987-10-08 09:20:44.116985000	-20243235320988522689576614579993883.794
+123580934	1646333132	lgr	aug	wrd	1959-06-09 04:45:30.922892000	1939-02-06 16:19:36.256705000	-15964717424553742894141798427956392.282
+1568737324	-1216519640	vqc	kfq	cwi	1944-11-04 12:57:14.805028000	1938-12-29 14:03:44.995783000	2539288097531093795307364909970820.239
+-1388750876	215463808	dxs	fau	wrm	1936-05-14 13:16:08.671129000	1963-05-31 09:22:00.597388000	15223613833275162597216134943233668.634
+-1308140366	214808438	hsn	vhc	sey	1971-08-22 23:18:48.350273000	1987-11-03 10:51:18.290933000	19502131729709942392650951095271160.146
+\N	1646857428	rdx	grm	xje	1993-08-01 07:28:56.839256000	1987-09-25 08:35:27.030011000	16722912587118054531762230529107343.165
+137212630	-1216257492	ytf	bvh	sny	1948-07-21 19:26:51.850319000	1939-01-24 15:34:19.169731000	-13128057542255184599250825366600834.872
+1474626192	-1216912862	toj	idx	fau	1947-04-14 23:20:04.830676000	1963-06-26 10:52:34.771336000	-8849539645820404803816009214563343.36
+1555236702	215332734	eyt	sni	kfa	1969-03-24 07:30:13.319659000	1963-05-18 08:36:43.510414000	-11618374115470687235748405472537845.268
+-1294508670	214677364	ugb	wid	oau	1997-04-17 13:58:33.884548000	1987-10-21 10:06:01.203959000	-20201696312285272400300548403760510.09
+43101498	1646464206	pkv	eyk	bvh	1959-06-22 05:30:48.009866000	1939-02-19 17:04:53.343679000	-15923178415850492604865732251723018.578
+\N	-1216388566	aug	oju	gbm	1944-11-17 13:42:31.892002000	1939-01-11 14:49:02.082757000	2580827106234344084583431086204193.943
+123712008	-1217043936	hcw	jey	bvq	1936-05-27 14:01:25.758103000	1963-06-13 10:07:17.684362000	15265152841978412886492201119467042.338
+1568868398	214939512	lwr	alg	gbv	1971-09-05 00:04:05.437247000	1987-11-16 11:36:35.377907000	19543670738413192681927017271504533.85
+-1388619802	1646988502	vhc	kvq	cni	1993-08-14 08:14:13.926230000	1987-10-08 09:20:44.116985000	16764451595821304821038296705340716.869
+-1308009292	1646333132	dxj	fal	wrd	1948-08-03 20:12:08.937293000	1939-02-06 16:19:36.256705000	-13086518533551934309974759190367461.168
+137343704	-1216781788	xsn	mhc	cwi	1947-04-28 00:05:21.917650000	1963-07-09 11:37:51.858310000	-8808000637117154514539943038329969.656
+\N	215463808	idx	wrm	oje	1969-04-06 08:15:30.406633000	1963-05-31 09:22:00.597388000	-11576835106767436946472339296304471.564
+1474757266	214808438	ykf	bmh	sey	1997-04-30 14:43:50.971522000	1987-11-03 10:51:18.290933000	-20160157303582022111024482227527136.386
+1555367776	1646595280	toa	ido	xje	1959-07-05 06:16:05.096840000	1939-03-04 17:50:10.430653000	-15881639407147242315589666075489644.874
+-1294377596	-1216257492	eyk	sny	kfq	1944-11-30 14:27:48.978976000	1939-01-24 15:34:19.169731000	2622366114937594373859497262437567.647
+43232572	-1216912862	lgb	nid	fau	1936-06-09 14:46:42.845077000	1963-06-26 10:52:34.771336000	15306691850681663175768267295700416.042
+123843082	215070586	pbv	epk	kfa	1971-09-18 00:49:22.524221000	1987-11-29 12:21:52.464881000	19585209747116442971203083447737907.554
+\N	214677364	alg	oau	grm	1993-08-27 08:59:31.013204000	1987-10-21 10:06:01.203959000	16805990604524555110314362881574090.573
+1568999472	1646464206	hcn	jep	bvh	1948-08-16 20:57:26.024267000	1939-02-19 17:04:53.343679000	-13044979524848684020698693014134087.464
+-1388488728	-1216650714	cwr	qlg	gbm	1947-05-11 00:50:39.004624000	1963-07-22 12:23:08.945284000	-8766461628413904225263876862096595.952
+-1307878218	-1217043936	mhc	bvq	sni	1969-04-19 09:00:47.493607000	1963-06-13 10:07:17.684362000	-11535296098064186657196273120071097.86
+137474778	214939512	doj	fql	gbv	1997-05-13 15:29:08.058495000	1987-11-16 11:36:35.377907000	-20118618294878771821748416051293762.682
+1474888340	1646726354	xse	mhs	cni	1959-07-18 07:01:22.183813000	1939-03-17 18:35:27.517627000	-15840100398443992026313599899256271.17
+\N	1646333132	ido	wrd	oju	1944-12-13 15:13:06.065949000	1939-02-06 16:19:36.256705000	2663905123640844663135563438670941.351
+1555498850	-1216781788	pkf	rmh	cwi	1936-06-22 15:31:59.932051000	1963-07-09 11:37:51.858310000	15348230859384913465044333471933789.746
+-1294246522	215201660	tfa	ito	oje	1971-10-01 01:34:39.611195000	1987-12-12 13:07:09.551855000	19626748755819693260479149623971281.258
+43363646	214808438	epk	sey	kvq	1993-09-09 09:44:48.100178000	1987-11-03 10:51:18.290933000	16847529613227805399590429057807464.277
+123974156	1646595280	lgr	nit	xje	1948-08-29 21:42:43.111241000	1939-03-04 17:50:10.430653000	-13003440516145433731422626837900713.76
+1569130546	-1216519640	gbv	upk	kfq	1947-05-24 01:35:56.091598000	1938-12-29 14:03:44.995783000	-8724922619710653935987810685863222.248
+\N	-1216912862	qlg	fau	wrm	1969-05-02 09:46:04.580581000	1963-06-26 10:52:34.771336000	-11493757089360936367920206943837724.156
+-1388357654	215070586	hsn	jup	kfa	1997-05-26 16:14:25.145469000	1987-11-29 12:21:52.464881000	-20077079286175521532472349875060388.978
+-1307747144	1646857428	cwi	qlw	grm	1959-07-31 07:46:39.270787000	1987-09-25 08:35:27.030011000	-15798561389740741737037533723022897.466
+137605852	1646464206	mhs	bvh	sny	1944-12-26 15:58:23.152923000	1939-02-19 17:04:53.343679000	2705444132344094952411629614904315.055
+1475019414	-1216650714	toj	vql	gbm	1936-07-05 16:17:17.019025000	1963-07-22 12:23:08.945284000	15389769868088163754320399648167163.45
+1555629924	215332734	xje	mxs	sni	1971-10-14 02:19:56.698169000	1963-05-18 08:36:43.510414000	19668287764522943549755215800204654.962
+\N	214939512	ito	wid	oau	1993-09-22 10:30:05.187152000	1987-11-16 11:36:35.377907000	16889068621931055688866495234040837.981
+-1294115448	1646726354	pkv	rmx	cni	1948-09-11 22:28:00.198215000	1939-03-17 18:35:27.517627000	-12961901507442183442146560661667340.056
+43494720	-1216388566	kfa	yto	oju	1947-06-06 02:21:13.178572000	1939-01-11 14:49:02.082757000	-8683383611007403646711744509629848.544
+124105230	-1216781788	upk	jey	bvq	1969-05-15 10:31:21.667555000	1963-07-09 11:37:51.858310000	-11452218080657686078644140767604350.452
+1569261620	215201660	lwr	nyt	oje	1997-06-08 16:59:42.232443000	1987-12-12 13:07:09.551855000	-20035540277472271243196283698827015.274
+-1388226580	1646988502	gbm	upb	kvq	1959-08-13 08:31:56.357761000	1987-10-08 09:20:44.116985000	-15757022381037491447761467546789523.762
+\N	1646595280	qlw	fal	wrd	1945-01-08 16:43:40.239897000	1939-03-04 17:50:10.430653000	2746983141047345241687695791137688.759
+-1307616070	-1216519640	xsn	aup	kfq	1936-07-18 17:02:34.105999000	1938-12-29 14:03:44.995783000	15431308876791414043596465824400537.154
+137736926	215463808	cni	qcw	wrm	1971-10-27 03:05:13.785143000	1963-05-31 09:22:00.597388000	19709826773226193839031281976438028.666
+1475150488	215070586	mxs	bmh	sey	1993-10-05 11:15:22.274126000	1987-11-29 12:21:52.464881000	16930607630634305978142561410274211.685
+1555760998	1646857428	toa	vqc	grm	1948-09-24 23:13:17.285189000	1987-09-25 08:35:27.030011000	-12920362498738933152870494485433966.352
+-1293984374	-1216257492	oje	dxs	sny	1947-06-19 03:06:30.265546000	1939-01-24 15:34:19.169731000	-8641844602304153357435678333396474.84
+\N	-1216650714	yto	nid	fau	1969-05-28 11:16:38.754529000	1963-07-22 12:23:08.945284000	-11410679071954435789368074591370976.748
+43625794	215332734	pbv	rdx	sni	1997-06-21 17:44:59.319417000	1963-05-18 08:36:43.510414000	-19994001268769020953920217522593641.57
+124236304	214677364	kfq	ytf	oau	1959-08-26 09:17:13.444735000	1987-10-21 10:06:01.203959000	-15715483372334241158485401370556150.058
+1569392694	1646726354	upb	jep	bvh	1945-01-21 17:28:57.326871000	1939-03-17 18:35:27.517627000	2788522149750595530963761967371062.463
+-1388095506	-1216388566	cwr	eyt	oju	1936-07-31 17:47:51.192973000	1939-01-11 14:49:02.082757000	15472847885494664332872532000633910.858
+-1307484996	-1217043936	grm	ugb	bvq	1971-11-09 03:50:30.872117000	1963-06-13 10:07:17.684362000	19751365781929444128307348152671402.37
+\N	215201660	qcw	fql	gbv	1993-10-18 12:00:39.361100000	1987-12-12 13:07:09.551855000	16972146639337556267418627586507585.389
+137868000	1646988502	xse	aug	kvq	1948-10-07 23:58:34.372163000	1987-10-08 09:20:44.116985000	-12878823490035682863594428309200592.648
+1475281562	1646333132	sni	hcw	wrd	1947-07-02 03:51:47.352520000	1939-02-06 16:19:36.256705000	-8600305593600903068159612157163101.136
+1555892072	-1216519640	dxs	rmh	cwi	1969-06-10 12:01:55.841503000	1938-12-29 14:03:44.995783000	-11369140063251185500092008415137603.044
+-1293853300	215463808	tfa	vhc	wrm	1997-07-04 18:30:16.406391000	1963-05-31 09:22:00.597388000	-19952462260065770664644151346360267.866
+43756868	214808438	oju	dxj	sey	1959-09-08 10:02:30.531709000	1987-11-03 10:51:18.290933000	-15673944363630990869209335194322776.354
+124367378	1646857428	ytf	nit	xje	1945-02-03 18:14:14.413845000	1987-09-25 08:35:27.030011000	2830061158453845820239828143604436.167
+1569523768	-1216257492	gbv	idx	sny	1936-08-13 18:33:08.279947000	1939-01-24 15:34:19.169731000	15514386894197914622148598176867284.562
+-1387964432	-1216912862	kvq	ykf	fau	1971-11-22 04:35:47.959091000	1963-06-26 10:52:34.771336000	19792904790632694417583414328904776.074
+-1307353922	215332734	ugb	jup	kfa	1993-10-31 12:45:56.448074000	1963-05-18 08:36:43.510414000	17013685648040806556694693762740959.093
+137999074	214677364	cwi	eyk	oau	1948-10-21 00:43:51.459137000	1987-10-21 10:06:01.203959000	-12837284481332432574318362132967218.944
+1475412636	1646464206	wrm	lgb	bvh	1947-07-15 04:37:04.439494000	1939-02-19 17:04:53.343679000	-8558766584897652778883545980929727.432
+1556023146	-1216388566	hcw	vql	gbm	1969-06-23 12:47:12.928477000	1939-01-11 14:49:02.082757000	-11327601054547935210815942238904229.34
+-1293722226	-1217043936	xje	alg	bvq	1997-07-17 19:15:33.493365000	1963-06-13 10:07:17.684362000	-19910923251362520375368085170126894.162
+43887942	214939512	sny	hcn	gbv	1959-09-21 10:47:47.618683000	1987-11-16 11:36:35.377907000	-15632405354927740579933269018089402.65
+124498452	1646988502	dxj	rmx	cni	1945-02-16 18:59:31.500819000	1987-10-08 09:20:44.116985000	2871600167157096109515894319837809.871
+1569654842	1646333132	kfa	mhc	wrd	1936-08-26 19:18:25.366921000	1939-02-06 16:19:36.256705000	15555925902901164911424664353100658.266
+-1387833358	-1216781788	oau	doj	cwi	1971-12-05 05:21:05.046065000	1963-07-09 11:37:51.858310000	19834443799335944706859480505138149.778
+-1307222848	215463808	ykf	nyt	oje	1993-11-13 13:31:13.535048000	1963-05-31 09:22:00.597388000	17055224656744056845970759938974332.797
+138130148	214808438	gbm	ido	sey	1948-11-03 01:29:08.546111000	1987-11-03 10:51:18.290933000	\N
+1475543710	1646595280	bvq	pkf	xje	1947-07-28 05:22:21.526468000	1939-03-04 17:50:10.430653000	-12795745472629182285042295956733845.24
+1556154220	-1216257492	lgb	aup	kfq	1969-07-06 13:32:30.015451000	1939-01-24 15:34:19.169731000	-8517227576194402489607479804696353.728
+-1293591152	-1216912862	cni	epk	fau	1997-07-30 20:00:50.580339000	1963-06-26 10:52:34.771336000	-11286062045844684921539876062670855.636
+44019016	215070586	wrd	lgr	kfa	1959-10-04 11:33:04.705657000	1987-11-29 12:21:52.464881000	-19869384242659270086092018993893520.458
+124629526	214677364	hcn	vqc	grm	1945-03-01 19:44:48.587793000	1987-10-21 10:06:01.203959000	-15590866346224490290657202841856028.946
+1569785916	1646464206	oje	qlg	bvh	1936-09-08 20:03:42.453895000	1939-02-19 17:04:53.343679000	2913139175860346398791960496071183.575
+-1387702284	-1216650714	sey	hsn	gbm	1971-12-18 06:06:22.133039000	1963-07-22 12:23:08.945284000	15597464911604415200700730529334031.97
+-1307091774	-1217043936	doj	rdx	sni	1993-11-26 14:16:30.622022000	1963-06-13 10:07:17.684362000	19875982808039194996135546681371523.482
+138261222	214939512	kfq	mhs	gbv	1948-11-16 02:14:25.633085000	1987-11-16 11:36:35.377907000	17096763665447307135246826115207706.501
+1475674784	1646726354	fau	toj	cni	1947-08-10 06:07:38.613442000	1939-03-17 18:35:27.517627000	-12754206463925931995766229780500471.536
+1556285294	1646333132	pkf	eyt	oju	1969-07-19 14:17:47.102425000	1939-02-06 16:19:36.256705000	-8475688567491152200331413628462980.024
+-1293460078	\N	grm	ito	cwi	1997-08-12 20:46:07.667313000	1963-07-09 11:37:51.858310000	-11244523037141434632263809886437481.932
+44150090	-1216781788	bvh	pkv	oje	1959-10-17 12:18:21.792631000	1987-12-12 13:07:09.551855000	-19827845233956019796815952817660146.754
+124760600	215201660	lgr	aug	kvq	1945-03-14 20:30:05.674767000	1987-11-03 10:51:18.290933000	-15549327337521240001381136665622655.242
+1569916990	214808438	sni	upk	xje	1936-09-21 20:48:59.540869000	\N	2954678184563596688068026672304557.279
+-1387571210	1646595280	wid	lwr	kfq	1971-12-31 06:51:39.220013000	1939-03-04 17:50:10.430653000	15639003920307665489976796705567405.674
+-1306960700	-1216519640	hsn	vhc	wrm	1993-12-09 15:01:47.708996000	1938-12-29 14:03:44.995783000	19917521816742445285411612857604897.186
+138392296	-1216912862	oju	qlw	kfa	1948-11-29 02:59:42.720059000	1963-06-26 10:52:34.771336000	17138302674150557424522892291441080.205
+1475805858	215070586	jey	xsn	grm	1947-08-23 06:52:55.700416000	1987-11-29 12:21:52.464881000	-12712667455222681706490163604267097.832
+1556416368	1646857428	toj	idx	sny	1969-08-01 15:03:04.189398000	1987-09-25 08:35:27.030011000	-8434149558787901911055347452229606.32
+-1293329004	1646464206	kvq	mxs	gbm	1997-08-25 21:31:24.754287000	1939-02-19 17:04:53.343679000	-11202984028438184342987743710204108.228
+44281164	-1216650714	fal	toa	sni	1959-10-30 13:03:38.879605000	1963-07-22 12:23:08.945284000	-19786306225252769507539886641426773.05
+124891674	215332734	pkv	eyk	oau	1945-03-27 21:15:22.761741000	1963-05-18 08:36:43.510414000	-15507788328817989712105070489389281.538
+1570048064	214939512	wrm	yto	cni	1936-10-04 21:34:16.627843000	1987-11-16 11:36:35.377907000	2996217193266846977344092848537930.983
+-1387440136	1646726354	bmh	pbv	oju	1972-01-13 07:36:56.306987000	1939-03-17 18:35:27.517627000	15680542929010915779252862881800779.378
+-1306829626	-1216388566	lwr	alg	bvq	1993-12-22 15:47:04.795970000	1939-01-11 14:49:02.082757000	19959060825445695574687679033838270.89
+138523370	-1216781788	sny	upb	oje	1948-12-12 03:44:59.807033000	1963-07-09 11:37:51.858310000	17179841682853807713798958467674453.909
+1475936932	215201660	nid	cwr	kvq	1947-09-05 07:38:12.787390000	1987-12-12 13:07:09.551855000	-12671128446519431417214097428033724.128
+1556547442	1646988502	xsn	mhc	wrd	1969-08-14 15:48:21.276372000	1987-10-08 09:20:44.116985000	-8392610550084651621779281275996232.616
+-1293197930	1646595280	oau	qcw	kfq	1997-09-07 22:16:41.841261000	1939-03-04 17:50:10.430653000	-11161445019734934053711677533970734.524
+44412238	-1216519640	jep	xse	wrm	1959-11-12 13:48:55.966579000	1938-12-29 14:03:44.995783000	-19744767216549519218263820465193399.346
+125022748	215463808	toa	ido	sey	1945-04-09 22:00:39.848715000	1963-05-31 09:22:00.597388000	-15466249320114739422829004313155907.834
+1570179138	215070586	bvq	dxs	grm	1936-10-17 22:19:33.714817000	1987-11-29 12:21:52.464881000	3037756201970097266620159024771304.687
+-1387309062	1646857428	fql	tfa	sny	1972-01-26 08:22:13.393961000	1987-09-25 08:35:27.030011000	15722081937714166068528929058034153.082
+-1306698552	-1216257492	pbv	epk	fau	1994-01-04 16:32:21.882944000	1939-01-24 15:34:19.169731000	20000599834148945863963745210071644.594
+138654444	-1216650714	wrd	ytf	sni	1948-12-25 04:30:16.894006000	1963-07-22 12:23:08.945284000	17221380691557058003075024643907827.613
+1476068006	215332734	rmh	gbv	oau	1947-09-18 08:23:29.874363000	1963-05-18 08:36:43.510414000	-12629589437816181127938031251800350.424
+1556678516	214677364	cwr	qlg	bvh	1969-08-27 16:33:38.363346000	1987-10-21 10:06:01.203959000	-8351071541381401332503215099762858.912
+-1293066856	1646726354	sey	ugb	oju	1997-09-20 23:01:58.928235000	1939-03-17 18:35:27.517627000	-11119906011031683764435611357737360.82
+44543312	-1216388566	nit	cwi	bvq	1959-11-25 14:34:13.053553000	1939-01-11 14:49:02.082757000	-19703228207846268928987754288960025.642
+125153822	-1217043936	xse	mhs	gbv	1945-04-22 22:45:56.935689000	1963-06-13 10:07:17.684362000	-15424710311411489133552938136922534.13
+1570310212	215201660	fau	hcw	kvq	1936-10-30 23:04:50.801791000	1987-12-12 13:07:09.551855000	3079295210673347555896225201004678.391
+-1387177988	1646988502	jup	xje	wrd	1972-02-08 09:07:30.480935000	1987-10-08 09:20:44.116985000	15763620946417416357804995234267526.786
+-1306567478	1646333132	tfa	ito	cwi	1994-01-17 17:17:38.969918000	1939-02-06 16:19:36.256705000	20042138842852196153239811386305018.298
+138785518	-1216519640	bvh	dxj	wrm	1949-01-07 05:15:33.980980000	1938-12-29 14:03:44.995783000	17262919700260308292351090820141201.317
+1476199080	215463808	vql	kfa	sey	1947-10-01 09:08:46.961337000	1963-05-31 09:22:00.597388000	-12588050429112930838661965075566976.72
+1556809590	214808438	gbv	upk	xje	1969-09-09 17:18:55.450320000	1987-11-03 10:51:18.290933000	-8309532532678151043227148923529485.208
+-1292935782	1646857428	wid	ykf	sny	1997-10-03 23:47:16.015209000	1987-09-25 08:35:27.030011000	-11078367002328433475159545181503987.116
+44674386	-1216257492	rmx	gbm	fau	1959-12-08 15:19:30.140527000	1939-01-24 15:34:19.169731000	-19661689199143018639711688112726651.938
+125284896	-1216912862	cwi	qlw	kfa	1945-05-05 23:31:14.022663000	1963-06-26 10:52:34.771336000	-15383171302708238844276871960689160.426
+1570441286	215332734	jey	lgb	oau	1936-11-12 23:50:07.888765000	1963-05-18 08:36:43.510414000	3120834219376597845172291377238052.095
+-1387046914	214677364	nyt	cni	bvh	1972-02-21 09:52:47.567909000	1987-10-21 10:06:01.203959000	15805159955120666647081061410500900.49
+-1306436404	1646464206	xje	mxs	gbm	1994-01-30 18:02:56.056892000	1939-02-19 17:04:53.343679000	20083677851555446442515877562538392.002
+138916592	-1216388566	fal	hcn	bvq	1949-01-20 06:00:51.067954000	1939-01-11 14:49:02.082757000	17304458708963558581627156996374575.021
+1476330154	-1217043936	aup	oje	gbv	1947-10-14 09:54:04.048311000	1963-06-13 10:07:17.684362000	-12546511420409680549385898899333603.016
+1556940664	214939512	kfa	yto	cni	1969-09-22 18:04:12.537294000	1987-11-16 11:36:35.377907000	-8267993523974900753951082747296111.504
+-1292804708	1646988502	bmh	doj	wrd	1997-10-17 00:32:33.102183000	1987-10-08 09:20:44.116985000	-11036827993625183185883479005270613.412
+44805460	1646333132	vqc	kfq	cwi	1959-12-21 16:04:47.227501000	1939-02-06 16:19:36.256705000	-19620150190439768350435621936493278.234
+125415970	-1216781788	gbm	upb	oje	1945-05-19 00:16:31.109637000	1963-07-09 11:37:51.858310000	-15341632294004988555000805784455786.722
+1570572360	215463808	nid	pkf	sey	1936-11-26 00:35:24.975739000	1963-05-31 09:22:00.597388000	3162373228079848134448357553471425.799
+-1386915840	214808438	rdx	grm	xje	1972-03-05 10:38:04.654883000	1987-11-03 10:51:18.290933000	15846698963823916936357127586734274.194
+-1306305330	1646595280	cni	qcw	kfq	1994-02-12 18:48:13.143865000	1939-03-04 17:50:10.430653000	20125216860258696731791943738771765.706
+139047666	-1216257492	jep	lgr	fau	1949-02-02 06:46:08.154928000	1939-01-24 15:34:19.169731000	17345997717666808870903223172607948.725
+1476461228	-1216912862	eyt	sni	kfa	1947-10-27 10:39:21.135285000	1963-06-26 10:52:34.771336000	-12504972411706430260109832723100229.312
+1557071738	215070586	oje	dxs	grm	1969-10-05 18:49:29.624268000	1987-11-29 12:21:52.464881000	-8226454515271650464675016571062737.8
+-1292673634	214677364	fql	hsn	bvh	1997-10-30 01:17:50.189157000	1987-10-21 10:06:01.203959000	-10995288984921932896607412829037239.708
+44936534	1646464206	aug	oju	gbm	1960-01-03 16:50:04.314475000	1939-02-19 17:04:53.343679000	-19578611181736518061159555760259904.53
+125547044	-1216650714	kfq	ytf	sni	1945-06-01 01:01:48.196611000	1963-07-22 12:23:08.945284000	-15300093285301738265724739608222413.018
+1570703434	-1217043936	rmh	toj	gbv	1936-12-09 01:20:42.062713000	1963-06-13 10:07:17.684362000	3203912236783098423724423729704799.503
+\N	214939512	vhc	kvq	cni	1972-03-18 11:23:21.741857000	1987-11-16 11:36:35.377907000	15888237972527167225633193762967647.898
+-1386784766	1646726354	grm	ugb	oju	1994-02-25 19:33:30.230839000	1939-03-17 18:35:27.517627000	20166755868961947021068009915005139.41
+-1306174256	1646333132	nit	pkv	cwi	1949-02-15 07:31:25.241902000	1939-02-06 16:19:36.256705000	17387536726370059160179289348841322.429
+139178740	-1216781788	idx	wrm	oje	1947-11-09 11:24:38.222259000	1963-07-09 11:37:51.858310000	-12463433403003179970833766546866855.608
+1476592302	215201660	sni	hcw	kvq	1969-10-18 19:34:46.711242000	1987-12-12 13:07:09.551855000	-8184915506568400175398950394829364.096
+1557202812	214808438	jup	lwr	xje	1997-11-12 02:03:07.276131000	1987-11-03 10:51:18.290933000	-10953749976218682607331346652803866.004
+-1292542560	1646595280	eyk	sny	kfq	1960-01-16 17:35:21.401449000	1939-03-04 17:50:10.430653000	-19537072173033267771883489584026530.826
+45067608	-1216519640	oju	dxj	wrm	1945-06-14 01:47:05.283585000	1938-12-29 14:03:44.995783000	-15258554276598487976448673431989039.314
+125678118	-1216912862	vql	xsn	kfa	1936-12-22 02:05:59.149687000	1963-06-26 10:52:34.771336000	3245451245486348713000489905938173.207
+1570834508	215070586	alg	oau	grm	1972-03-31 12:08:38.828830000	\N	15929776981230417514909259939201021.602
+-1386653692	1646857428	kvq	ykf	sny	1994-03-10 20:18:47.317813000	1987-11-29 12:21:52.464881000	20208294877665197310344076091238513.114
+-1306043182	1646464206	rmx	toa	gbm	1949-02-28 08:16:42.328876000	1987-09-25 08:35:27.030011000	17429075735073309449455355525074696.133
+139309814	-1216650714	mhc	bvq	sni	1947-11-22 12:09:55.309233000	1939-02-19 17:04:53.343679000	-12421894394299929681557700370633481.904
+1476723376	215332734	wrm	lgb	oau	1969-10-31 20:20:03.798216000	1963-07-22 12:23:08.945284000	-8143376497865149886122884218595990.392
+1557333886	214939512	nyt	pbv	cni	1997-11-25 02:48:24.363105000	1963-05-18 08:36:43.510414000	-10912210967515432318055280476570492.3
+-1292411486	1646726354	ido	wrd	oju	1960-01-29 18:20:38.488423000	1987-11-16 11:36:35.377907000	-19495533164330017482607423407793157.122
+45198682	-1216388566	sny	hcn	bvq	1945-06-27 02:32:22.370559000	1939-03-17 18:35:27.517627000	-15217015267895237687172607255755665.61
+125809192	-1216781788	aup	cwr	oje	1937-01-04 02:51:16.236661000	1939-01-11 14:49:02.082757000	3286990254189599002276556082171546.911
+1570965582	215201660	epk	sey	kvq	1972-04-13 12:53:55.915804000	1963-07-09 11:37:51.858310000	15971315989933667804185326115434395.306
+-1386522618	1646988502	oau	doj	wrd	1994-03-23 21:04:04.404787000	1987-12-12 13:07:09.551855000	20249833886368447599620142267471886.818
+-1305912108	1646595280	vqc	xse	kfq	1949-03-13 09:01:59.415850000	1987-10-08 09:20:44.116985000	17470614743776559738731421701308069.837
+139440888	-1216519640	qlg	fau	wrm	1947-12-05 12:55:12.396207000	1939-03-04 17:50:10.430653000	-12380355385596679392281634194400108.2
+1476854450	215463808	bvq	\N	sey	1969-11-13 21:05:20.885190000	1938-12-29 14:03:44.995783000	-8101837489161899596846818042362616.688
+1557464960	215070586	rdx	pkf	grm	1997-12-08 03:33:41.450079000	1963-05-31 09:22:00.597388000	-10870671958812182028779214300337118.596
+-1292280412	1646857428	mhs	tfa	sny	1960-02-11 19:05:55.575397000	1987-11-29 12:21:52.464881000	-19453994155626767193331357231559783.418
+45329756	-1216257492	wrd	bvh	fau	1945-07-10 03:17:39.457533000	1987-09-25 08:35:27.030011000	-15175476259191987397896541079522291.906
+125940266	-1216650714	eyt	lgr	sni	1937-01-17 03:36:33.323635000	1939-01-24 15:34:19.169731000	3328529262892849291552622258404920.615
+1571096656	215332734	ito	gbv	oau	1972-04-26 13:39:13.002778000	1963-07-22 12:23:08.945284000	16012854998636918093461392291667769.01
+-1386391544	214677364	sey	wid	bvh	1994-04-05 21:49:21.491761000	1963-05-18 08:36:43.510414000	20291372895071697888896208443705260.522
+-1305781034	1646726354	aug	hsn	oju	1949-03-26 09:47:16.502824000	1987-10-21 10:06:01.203959000	17512153752479810028007487877541443.541
+139571962	-1216388566	upk	cwi	bvq	1947-12-18 13:40:29.483181000	1939-03-17 18:35:27.517627000	-12338816376893429103005568018166734.496
+1476985524	-1217043936	fau	jey	gbv	1969-11-26 21:50:37.972164000	1939-01-11 14:49:02.082757000	-8060298480458649307570751866129242.984
+1557596034	215201660	vhc	toj	kvq	1997-12-21 04:18:58.537053000	1963-06-13 10:07:17.684362000	-10829132950108931739503148124103744.892
+-1292149338	1646988502	qlw	xje	wrd	1960-02-24 19:51:12.662371000	1987-12-12 13:07:09.551855000	-19412455146923516904055291055326409.714
+45460830	1646333132	bvh	fal	cwi	1945-07-23 04:02:56.544507000	1987-10-08 09:20:44.116985000	-15133937250488737108620474903288918.202
+126071340	-1216519640	idx	pkv	wrm	1937-01-30 04:21:50.410609000	1939-02-06 16:19:36.256705000	3370068271596099580828688434638294.319
+1571227730	215463808	mxs	kfa	sey	1972-05-09 14:24:30.089752000	1938-12-29 14:03:44.995783000	16054394007340168382737458467901142.714
+-1386260470	214808438	wid	bmh	xje	1994-04-18 22:34:38.578735000	1963-05-31 09:22:00.597388000	20332911903774948178172274619938634.226
+-1305649960	1646857428	eyk	lwr	sny	1949-04-08 10:32:33.589798000	1987-11-03 10:51:18.290933000	17553692761183060317283554053774817.245
+139703036	-1216257492	yto	gbm	fau	1947-12-31 14:25:46.570155000	1987-09-25 08:35:27.030011000	-12297277368190178813729501841933360.792
+1477116598	-1216912862	jey	nid	kfa	1969-12-09 22:35:55.059138000	1939-01-24 15:34:19.169731000	-8018759471755399018294685689895869.28
+1557727108	215332734	alg	xsn	oau	1998-01-03 05:04:15.624027000	1963-06-26 10:52:34.771336000	-10787593941405681450227081947870371.188
+-1292018264	214677364	upb	cni	bvh	1960-03-08 20:36:29.749345000	1963-05-18 08:36:43.510414000	-19370916138220266614779224879093036.01
+45591904	1646464206	fal	jep	gbm	1945-08-05 04:48:13.631481000	1987-10-21 10:06:01.203959000	-15092398241785486819344408727055544.498
+126202414	-1216388566	\N	toa	bvq	1937-02-12 05:07:07.497583000	1939-02-19 17:04:53.343679000	3411607280299349870104754610871668.023
+1571358804	-1217043936	mhc	oje	gbv	1972-05-22 15:09:47.176726000	1939-01-11 14:49:02.082757000	16095933016043418672013524644134516.418
+-1386129396	214939512	qcw	fql	cni	1994-05-01 23:19:55.665709000	1963-06-13 10:07:17.684362000	20374450912478198467448340796172007.93
+-1305518886	1646988502	bmh	pbv	wrd	1949-04-21 11:17:50.676772000	1987-11-16 11:36:35.377907000	17595231769886310606559620230008190.949
+139834110	1646333132	ido	kfq	cwi	1948-01-13 15:11:03.657129000	1987-10-08 09:20:44.116985000	-12255738359486928524453435665699987.088
+1477247672	-1216781788	dxs	rmh	oje	1969-12-22 23:21:12.146112000	1939-02-06 16:19:36.256705000	-7977220463052148729018619513662495.576
+1557858182	215463808	nid	cwr	sey	1998-01-16 05:49:32.711001000	1963-07-09 11:37:51.858310000	-10746054932702431160951015771636997.484
+-1291887190	214808438	epk	grm	xje	1960-03-21 21:21:46.836319000	1963-05-31 09:22:00.597388000	-19329377129517016325503158702859662.306
+45722978	1646595280	ytf	nit	kfq	1945-08-18 05:33:30.718455000	1987-11-03 10:51:18.290933000	-15050859233082236530068342550822170.794
+126333488	-1216257492	jep	xse	fau	1937-02-25 05:52:24.584556000	1939-03-04 17:50:10.430653000	3453146289002600159380820787105041.727
+1571489878	-1216912862	qlg	sni	kfa	1972-06-04 15:55:04.263700000	1939-01-24 15:34:19.169731000	16137472024746668961289590820367890.122
+-1385998322	215070586	ugb	jup	grm	1994-05-15 00:05:12.752683000	1963-06-26 10:52:34.771336000	20415989921181448756724406972405381.634
+-1305387812	214677364	fql	tfa	bvh	1949-05-04 12:03:07.763746000	1987-11-29 12:21:52.464881000	17636770778589560895835686406241564.653
+139965184	1646464206	mhs	oju	gbm	1948-01-26 15:56:20.744103000	1987-10-21 10:06:01.203959000	-12214199350783678235177369489466613.384
+1477378746	-1216650714	hcw	vql	sni	1933-06-24 00:08:04.626239000	1939-02-19 17:04:53.343679000	-7935681454348898439742553337429121.872
+1557989256	-1217043936	rmh	gbv	gbv	1998-01-29 06:34:49.797975000	1963-07-22 12:23:08.945284000	-10704515923999180871674949595403623.78
+-1291756116	214939512	ito	kvq	cni	1960-04-03 22:07:03.923293000	1963-06-13 10:07:17.684362000	-19287838120813766036227092526626288.602
+45854052	1646726354	dxj	rmx	oju	1945-08-31 06:18:47.805429000	1987-11-16 11:36:35.377907000	-15009320224378986240792276374588797.09
+126464562	1646333132	nit	cwi	cwi	1937-03-10 06:37:41.671530000	1939-03-17 18:35:27.517627000	3494685297705850448656886963338415.431
+1571620952	-1216781788	upk	wrm	oje	1972-06-17 16:40:21.350674000	1939-02-06 16:19:36.256705000	16179011033449919250565656996601263.826
+-1385867248	215201660	ykf	nyt	kvq	1994-05-28 00:50:29.839657000	1963-07-09 11:37:51.858310000	20457528929884699046000473148638755.338
+-1305256738	214808438	jup	xje	xje	1949-05-17 12:48:24.850720000	1987-12-12 13:07:09.551855000	17678309787292811185111752582474938.357
+140096258	1646595280	qlw	sny	kfq	1948-02-08 16:41:37.831077000	1987-11-03 10:51:18.290933000	-12172660342080427945901303313233239.68
+1477509820	-1216519640	lgb	aup	wrm	1933-07-07 00:53:21.713213000	1939-03-04 17:50:10.430653000	-7894142445645648150466487161195748.168
+1558120330	-1216912862	vql	kfa	kfa	1998-02-11 07:20:06.884949000	1938-12-29 14:03:44.995783000	-10662976915295930582398883419170250.076
+-1291625042	215070586	mxs	oau	grm	1960-04-16 22:52:21.010267000	1963-06-26 10:52:34.771336000	-19246299112110515746951026350392914.898
+45985126	1646857428	hcn	vqc	sny	1945-09-13 07:04:04.892403000	1987-11-29 12:21:52.464881000	-14967781215675735951516210198355423.386
+126595636	1646464206	rmx	gbm	gbm	1937-03-23 07:22:58.758504000	1987-09-25 08:35:27.030011000	3536224306409100737932953139571789.135
+1571752026	-1216650714	yto	bvq	sni	1972-06-30 17:25:38.437648000	1939-02-19 17:04:53.343679000	16220550042153169539841723172834637.53
+-1385736174	215332734	doj	rdx	oau	1994-06-10 01:35:46.926631000	1963-07-22 12:23:08.945284000	20499067938587949335276539324872129.042
+-1305125664	214939512	nyt	cni	cni	1949-05-30 13:33:41.937694000	1963-05-18 08:36:43.510414000	17719848795996061474387818758708312.061
+140227332	1646726354	upb	wrd	oju	1948-02-21 17:26:54.918051000	1987-11-16 11:36:35.377907000	-12131121333377177656625237136999865.976
+1477640894	-1216388566	pkf	eyt	bvq	1933-07-20 01:38:38.800187000	1939-03-17 18:35:27.517627000	-7852603436942397861190420984962374.464
+1558251404	-1216781788	aup	oje	oje	1998-02-24 08:05:23.971923000	1939-01-11 14:49:02.082757000	-10621437906592680293122817242936876.372
+-1291493968	215201660	qcw	sey	kvq	1960-04-29 23:37:38.097241000	1963-07-09 11:37:51.858310000	-19204760103407265457674960174159541.194
+46116200	1646988502	lgr	aug	wrd	1945-09-26 07:49:21.979376000	1987-12-12 13:07:09.551855000	-14926242206972485662240144022122049.682
+126726710	1646595280	vqc	kfq	kfq	1937-04-05 08:08:15.845478000	1987-10-08 09:20:44.116985000	3577763315112351027209019315805162.839
+1571883100	-1216519640	dxs	fau	wrm	1972-07-13 18:10:55.524622000	1939-03-04 17:50:10.430653000	16262089050856419829117789349068011.234
+-1385605100	215463808	hsn	vhc	sey	1994-06-23 02:21:04.013605000	1938-12-29 14:03:44.995783000	20540606947291199624552605501105502.746
+-1304994590	215070586	rdx	grm	grm	1949-06-12 14:18:59.024668000	1963-05-31 09:22:00.597388000	17761387804699311763663884934941685.765
+140358406	1646857428	ytf	bvh	sny	1948-03-05 18:12:12.005025000	1987-11-29 12:21:52.464881000	-12089582324673927367349170960766492.272
+1477771968	-1216257492	toj	idx	fau	1933-08-02 02:23:55.887161000	1987-09-25 08:35:27.030011000	-7811064428239147571914354808729000.76
+1558382478	-1216650714	eyt	sni	sni	1998-03-09 08:50:41.058896000	1939-01-24 15:34:19.169731000	-10579898897889430003846751066703502.668
+-1291362894	215332734	ugb	wid	oau	1960-05-13 00:22:55.184214000	1963-07-22 12:23:08.945284000	-19163221094704015168398893997926167.49
+46247274	214677364	pkv	eyk	bvh	1945-10-09 08:34:39.066350000	1963-05-18 08:36:43.510414000	-14884703198269235372964077845888675.978
+126857784	1646726354	aug	oju	oju	1937-04-18 08:53:32.932452000	1987-10-21 10:06:01.203959000	3619302323815601316485085492038536.543
+1572014174	-1216388566	hcw	jey	bvq	1972-07-26 18:56:12.611596000	1939-03-17 18:35:27.517627000	16303628059559670118393855525301384.938
+-1385474026	-1217043936	lwr	alg	gbv	1994-07-06 03:06:21.100579000	1939-01-11 14:49:02.082757000	20582145955994449913828671677338876.45
+-1304863516	215201660	vhc	kvq	kvq	1949-06-25 15:04:16.111642000	1963-06-13 10:07:17.684362000	17802926813402562052939951111175059.469
+140489480	1646988502	dxj	fal	wrd	1948-03-18 18:57:29.091999000	1987-12-12 13:07:09.551855000	-12048043315970677078073104784533118.568
+1477903042	1646333132	xsn	mhc	cwi	1933-08-15 03:09:12.974135000	1987-10-08 09:20:44.116985000	-7769525419535897282638288632495627.056
+1558513552	-1216519640	idx	wrm	wrm	1998-03-22 09:35:58.145870000	1939-02-06 16:19:36.256705000	-10538359889186179714570684890470128.964
+-1291231820	215463808	ykf	bmh	sey	1960-05-26 01:08:12.271188000	1938-12-29 14:03:44.995783000	-19121682086000764879122827821692793.786
+46378348	214808438	toa	ido	xje	1945-10-22 09:19:56.153324000	1963-05-31 09:22:00.597388000	-14843164189565985083688011669655302.274
+126988858	1646857428	eyk	sny	sny	1937-05-01 09:38:50.019426000	1987-11-03 10:51:18.290933000	3660841332518851605761151668271910.247
+1572145248	-1216257492	lgb	nid	fau	1972-08-08 19:41:29.698570000	1987-09-25 08:35:27.030011000	16345167068262920407669921701534758.642
+-1385342952	-1216912862	pbv	epk	kfa	1994-07-19 03:51:38.187553000	1939-01-24 15:34:19.169731000	20623684964697700203104737853572250.154
+-1304732442	215332734	alg	oau	oau	1949-07-08 15:49:33.198616000	1963-06-26 10:52:34.771336000	17844465822105812342216017287408433.173
+140620554	214677364	hcn	jep	bvh	1948-03-31 19:42:46.178973000	1963-05-18 08:36:43.510414000	-12006504307267426788797038608299744.864
+1478034116	1646464206	cwr	qlg	gbm	1933-08-28 03:54:30.061109000	1987-10-21 10:06:01.203959000	-7727986410832646993362222456262253.352
+1558644626	-1216388566	mhc	bvq	bvq	1998-04-04 10:21:15.232844000	1939-02-19 17:04:53.343679000	-10496820880482929425294618714236755.26
+-1291100746	-1217043936	doj	fql	gbv	1960-06-08 01:53:29.358162000	1939-01-11 14:49:02.082757000	-19080143077297514589846761645459420.082
+46509422	214939512	xse	mhs	cni	1945-11-04 10:05:13.240298000	1963-06-13 10:07:17.684362000	-14801625180862734794411945493421928.57
+127119932	1646988502	ido	wrd	wrd	1937-05-14 10:24:07.106400000	1987-11-16 11:36:35.377907000	3702380341222101895037217844505283.951
+1572276322	1646333132	pkf	rmh	cwi	1972-08-21 20:26:46.785544000	1987-10-08 09:20:44.116985000	16386706076966170696945987877768132.346
+-1385211878	-1216781788	tfa	ito	oje	1994-08-01 04:36:55.274527000	1939-02-06 16:19:36.256705000	20665223973400950492380804029805623.858
+-1304601368	215463808	epk	sey	sey	1949-07-21 16:34:50.285590000	1963-07-09 11:37:51.858310000	17886004830809062631492083463641806.877
+140751628	214808438	lgr	nit	xje	1948-04-13 20:28:03.265947000	1963-05-31 09:22:00.597388000	-11964965298564176499520972432066371.16
+1478165190	1646595280	gbv	upk	kfq	1933-09-10 04:39:47.148083000	1987-11-03 10:51:18.290933000	-7686447402129396704086156280028879.648
+1558775700	-1216257492	qlg	fau	fau	1998-04-17 11:06:32.319818000	1939-03-04 17:50:10.430653000	-10455281871779679136018552538003381.556
+-1290969672	-1216912862	hsn	jup	kfa	1960-06-21 02:38:46.445136000	1939-01-24 15:34:19.169731000	-19038604068594264300570695469226046.378
+46640496	215070586	cwi	qlw	grm	1945-11-17 10:50:30.327272000	1963-06-26 10:52:34.771336000	-14760086172159484505135879317188554.866
+127251006	214677364	mhs	bvh	bvh	1937-05-27 11:09:24.193374000	1987-11-29 12:21:52.464881000	3743919349925352184313284020738657.655
+1572407396	1646464206	toj	vql	gbm	1972-09-03 21:12:03.872518000	1987-10-21 10:06:01.203959000	16428245085669420986222054054001506.05
+-1385080804	-1216650714	xje	mxs	sni	1994-08-14 05:22:12.361501000	1939-02-19 17:04:53.343679000	20706762982104200781656870206038997.562
+-1304470294	-1217043936	ito	wid	\N	1949-08-03 17:20:07.372564000	1963-07-22 12:23:08.945284000	17927543839512312920768149639875180.581
+140882702	214939512	pkv	rmx	gbv	1948-04-26 21:13:20.352921000	1963-06-13 10:07:17.684362000	-11923426289860926210244906255832997.456
+1478296264	1646726354	kfa	yto	cni	1933-09-23 05:25:04.235057000	1987-11-16 11:36:35.377907000	-7644908393426146414810090103795505.944
+1558906774	1646333132	upk	jey	oju	1998-04-30 11:51:49.406792000	1939-03-17 18:35:27.517627000	-10413742863076428846742486361770007.852
+-1290838598	-1216781788	lwr	nyt	cwi	1960-07-04 03:24:03.532110000	1939-02-06 16:19:36.256705000	-18997065059891014011294629292992672.674
+46771570	215201660	gbm	upb	oje	1945-11-30 11:35:47.414246000	1963-07-09 11:37:51.858310000	-14718547163456234215859813140955181.162
+127382080	214808438	qlw	fal	kvq	1937-06-09 11:54:41.280348000	1987-12-12 13:07:09.551855000	3785458358628602473589350196972031.359
+1572538470	1646595280	xsn	aup	xje	1972-09-16 21:57:20.959492000	1987-11-03 10:51:18.290933000	16469784094372671275498120230234879.754
+-1384949730	-1216519640	cni	qcw	kfq	1994-08-27 06:07:29.448475000	1939-03-04 17:50:10.430653000	20748301990807451070932936382272371.266
+-1304339220	-1216912862	mxs	bmh	wrm	1949-08-16 18:05:24.459538000	1938-12-29 14:03:44.995783000	17969082848215563210044215816108554.285
+141013776	215070586	toa	vqc	kfa	1948-05-09 21:58:37.439895000	1963-06-26 10:52:34.771336000	-11881887281157675920968840079599623.752
+1478427338	1646857428	oje	dxs	grm	1933-10-06 06:10:21.322031000	1987-11-29 12:21:52.464881000	-7603369384722896125534023927562132.24
+1559037848	1646464206	yto	nid	sny	1998-05-13 12:37:06.493766000	1987-09-25 08:35:27.030011000	-10372203854373178557466420185536634.148
+-1290707524	-1216650714	pbv	rdx	gbm	1960-07-17 04:09:20.619084000	1939-02-19 17:04:53.343679000	-18955526051187763722018563116759298.97
+46902644	215332734	kfq	ytf	sni	1945-12-13 12:21:04.501220000	1963-07-22 12:23:08.945284000	-14677008154752983926583746964721807.458
+127513154	214939512	upb	jep	oau	1937-06-22 12:39:58.367322000	1963-05-18 08:36:43.510414000	3826997367331852762865416373205405.063
+1572669544	1646726354	cwr	eyt	cni	1972-09-29 22:42:38.046466000	1987-11-16 11:36:35.377907000	16511323103075921564774186406468253.458
+-1384818656	-1216388566	grm	ugb	oju	1994-09-09 06:52:46.535449000	1939-03-17 18:35:27.517627000	20789840999510701360209002558505744.97
+-1304208146	-1216781788	qcw	fql	\N	1949-08-29 18:50:41.546512000	1939-01-11 14:49:02.082757000	18010621856918813499320281992341927.989
+141144850	215201660	xse	aug	bvq	1948-05-22 22:43:54.526869000	1963-07-09 11:37:51.858310000	-11840348272454425631692773903366250.048
+1478558412	1646988502	sni	hcw	oje	1933-10-19 06:55:38.409005000	1987-12-12 13:07:09.551855000	-7561830376019645836257957751328758.536
+1559168922	1646595280	dxs	rmh	kvq	1998-05-26 13:22:23.580740000	1987-10-08 09:20:44.116985000	-10330664845669928268190354009303260.444
+-1290576450	-1216519640	tfa	vhc	wrd	1960-07-30 04:54:37.706058000	1939-03-04 17:50:10.430653000	\N
+47033718	215463808	oju	dxj	kfq	1945-12-26 13:06:21.588194000	1938-12-29 14:03:44.995783000	-18913987042484513432742496940525925.266
+127644228	215070586	ytf	nit	wrm	1937-07-05 13:25:15.454296000	1963-05-31 09:22:00.597388000	-14635469146049733637307680788488433.754
+1572800618	1646857428	gbv	idx	sey	1972-10-12 23:27:55.133440000	1987-11-29 12:21:52.464881000	3868536376035103052141482549438778.767
+-1384687582	-1216257492	kvq	ykf	grm	1994-09-22 07:38:03.622423000	1987-09-25 08:35:27.030011000	16552862111779171854050252582701627.162
+-1304077072	-1216650714	ugb	jup	sny	1949-09-11 19:35:58.633486000	1939-01-24 15:34:19.169731000	20831380008213951649485068734739118.674
+141275924	215332734	cwi	eyk	fau	1948-06-04 23:29:11.613843000	1963-07-22 12:23:08.945284000	18052160865622063788596348168575301.693
+1478689486	214677364	wrm	lgb	sni	1933-11-01 07:40:55.495979000	1963-05-18 08:36:43.510414000	-11798809263751175342416707727132876.344
+1559299996	1646726354	hcw	vql	oau	1998-06-08 14:07:40.667714000	1987-10-21 10:06:01.203959000	-7520291367316395546981891575095384.832
+-1290445376	-1216388566	xje	alg	bvh	1960-08-12 05:39:54.793032000	1939-03-17 18:35:27.517627000	-10289125836966677978914287833069886.74
+47164792	-1217043936	sny	hcn	oju	1946-01-08 13:51:38.675168000	1939-01-11 14:49:02.082757000	-18872448033781263143466430764292551.562
+127775302	215201660	dxj	rmx	bvq	1937-07-18 14:10:32.541270000	1963-06-13 10:07:17.684362000	-14593930137346483348031614612255060.05
+1572931692	1646988502	kfa	mhc	gbv	1972-10-26 00:13:12.220414000	1987-12-12 13:07:09.551855000	3910075384738353341417548725672152.471
+-1384556508	1646333132	oau	doj	kvq	1994-10-05 08:23:20.709397000	1987-10-08 09:20:44.116985000	16594401120482422143326318758935000.866
+-1303945998	-1216519640	ykf	nyt	wrd	1949-09-24 20:21:15.720460000	1939-02-06 16:19:36.256705000	20872919016917201938761134910972492.378
+141406998	215463808	gbm	ido	cwi	1948-06-18 00:14:28.700817000	1938-12-29 14:03:44.995783000	18093699874325314077872414344808675.397
+1478820560	214808438	bvq	pkf	wrm	1933-11-14 08:26:12.582953000	1963-05-31 09:22:00.597388000	-11757270255047925053140641550899502.64
+1559431070	1646857428	lgb	aup	sey	1998-06-21 14:52:57.754688000	1987-11-03 10:51:18.290933000	-7478752358613145257705825398862011.128
+-1290314302	-1216257492	cni	epk	xje	1960-08-25 06:25:11.880006000	1987-09-25 08:35:27.030011000	-10247586828263427689638221656836513.036
+47295866	-1216912862	wrd	lgr	sny	1946-01-21 14:36:55.762142000	1939-01-24 15:34:19.169731000	-18830909025078012854190364588059177.858
+127906376	215332734	hcn	vqc	fau	1937-07-31 14:55:49.628244000	1963-06-26 10:52:34.771336000	-14552391128643233058755548436021686.346
+1573062766	214677364	oje	qlg	kfa	1972-11-08 00:58:29.307388000	1963-05-18 08:36:43.510414000	3951614393441603630693614901905526.175
+-1384425434	1646464206	sey	hsn	oau	1994-10-18 09:08:37.796371000	1987-10-21 10:06:01.203959000	16635940129185672432602384935168374.57
+-1303814924	-1216388566	doj	rdx	bvh	1949-10-07 21:06:32.807434000	1939-02-19 17:04:53.343679000	20914458025620452228037201087205866.082
+141538072	-1217043936	kfq	mhs	gbm	1948-07-01 00:59:45.787790000	1939-01-11 14:49:02.082757000	18135238883028564367148480521042049.101
+1478951634	214939512	fau	toj	bvq	1933-11-27 09:11:29.669926000	1963-06-13 10:07:17.684362000	-11715731246344674763864575374666128.936
+1559562144	1646988502	pkf	eyt	gbv	1998-07-04 15:38:14.841662000	1987-11-16 11:36:35.377907000	-7437213349909894968429759222628637.424
+-1290183228	1646333132	grm	ito	cni	1960-09-07 07:10:28.966980000	\N	-10206047819560177400362155480603139.332
+47426940	-1216781788	bvh	pkv	wrd	1946-02-03 15:22:12.849116000	1987-10-08 09:20:44.116985000	-18789370016374762564914298411825804.154
+128037450	215463808	lgr	aug	cwi	1937-08-13 15:41:06.715218000	1939-02-06 16:19:36.256705000	-14510852119939982769479482259788312.642
+1573193840	214808438	sni	upk	oje	1972-11-21 01:43:46.394362000	1963-07-09 11:37:51.858310000	3993153402144853919969681078138899.879
+-1384294360	1646595280	wid	lwr	sey	1994-10-31 09:53:54.883345000	1963-05-31 09:22:00.597388000	16677479137888922721878451111401748.274
+-1303683850	-1216257492	hsn	vhc	xje	1949-10-20 21:51:49.894407000	1987-11-03 10:51:18.290933000	20955997034323702517313267263439239.786
+141669146	-1216912862	oju	qlw	kfq	1948-07-14 01:45:02.874764000	1939-03-04 17:50:10.430653000	18176777891731814656424546697275422.805
+1479082708	215070586	jey	xsn	fau	1933-12-10 09:56:46.756900000	1939-01-24 15:34:19.169731000	-11674192237641424474588509198432755.232
+1559693218	214677364	toj	idx	kfa	1998-07-17 16:23:31.928636000	1963-06-26 10:52:34.771336000	-7395674341206644679153693046395263.72
+-1290052154	1646464206	kvq	mxs	grm	1960-09-20 07:55:46.053954000	1987-11-29 12:21:52.464881000	-10164508810856927111086089304369765.628
+47558014	-1216650714	fal	toa	bvh	1946-02-16 16:07:29.936090000	1987-10-21 10:06:01.203959000	-18747831007671512275638232235592430.45
+128168524	-1217043936	pkv	eyk	gbm	1937-08-26 16:26:23.802192000	1939-02-19 17:04:53.343679000	-14469313111236732480203416083554938.938
+1573324914	214939512	wrm	yto	sni	1972-12-04 02:29:03.481336000	1963-07-22 12:23:08.945284000	4034692410848104209245747254372273.583
+-1384163286	1646726354	bmh	pbv	gbv	1994-11-13 10:39:11.970319000	1963-06-13 10:07:17.684362000	16719018146592173011154517287635121.978
+-1303552776	1646333132	lwr	alg	cni	1949-11-02 22:37:06.981381000	1987-11-16 11:36:35.377907000	20997536043026952806589333439672613.49
+141800220	-1216781788	sny	upb	oju	1948-07-27 02:30:19.961738000	1939-03-17 18:35:27.517627000	18218316900435064945700612873508796.509
+1479213782	215201660	nid	cwr	cwi	1933-12-23 10:42:03.843874000	1939-02-06 16:19:36.256705000	-11632653228938174185312443022199381.528
+1559824292	214808438	xsn	mhc	oje	1998-07-30 17:08:49.015610000	1963-07-09 11:37:51.858310000	-7354135332503394389877626870161890.016
+-1289921080	1646595280	oau	qcw	kvq	1960-10-03 08:41:03.140928000	1987-12-12 13:07:09.551855000	-10122969802153676821810023128136391.924
+47689088	-1216519640	jep	xse	xje	1946-03-01 16:52:47.023064000	1987-11-03 10:51:18.290933000	-18706291998968261986362166059359056.746
+128299598	-1216912862	toa	ido	kfq	1937-09-08 17:11:40.889166000	1939-03-04 17:50:10.430653000	-14427774102533482190927349907321565.234
+1573455988	215070586	bvq	dxs	wrm	1972-12-17 03:14:20.568310000	1938-12-29 14:03:44.995783000	4076231419551354498521813430605647.287
+-1384032212	1646857428	fql	tfa	kfa	1994-11-26 11:24:29.057293000	1963-06-26 10:52:34.771336000	16760557155295423300430583463868495.682
+-1303421702	1646464206	pbv	epk	grm	1949-11-15 23:22:24.068355000	1987-11-29 12:21:52.464881000	21039075051730203095865399615905987.194
+141931294	-1216650714	wrd	ytf	sny	1948-08-09 03:15:37.048712000	1987-09-25 08:35:27.030011000	18259855909138315234976679049742170.213
+1479344856	215332734	rmh	gbv	gbm	1934-01-05 11:27:20.930848000	1939-02-19 17:04:53.343679000	-11591114220234923896036376845966007.824
+1559955366	214939512	cwr	qlg	sni	1998-08-12 17:54:06.102584000	1963-07-22 12:23:08.945284000	-7312596323800144100601560693928516.312
+-1289790006	1646726354	sey	ugb	oau	1960-10-16 09:26:20.227902000	1963-05-18 08:36:43.510414000	-10081430793450426532533956951903018.22
+47820162	-1216388566	nit	cwi	cni	1946-03-14 17:38:04.110038000	1987-11-16 11:36:35.377907000	-18664752990265011697086099883125683.042
+128430672	-1216781788	xse	mhs	oju	1937-09-21 17:56:57.976140000	1939-03-17 18:35:27.517627000	-14386235093830231901651283731088191.53
+1573587062	215201660	fau	hcw	bvq	1972-12-30 03:59:37.655284000	1939-01-11 14:49:02.082757000	4117770428254604787797879606839020.991
+-1383901138	1646988502	jup	xje	oje	1994-12-09 12:09:46.144266000	1963-07-09 11:37:51.858310000	16802096163998673589706649640101869.386
+-1303290628	1646595280	tfa	ito	kvq	1949-11-29 00:07:41.155329000	1987-12-12 13:07:09.551855000	21080614060433453385141465792139360.898
+142062368	\N	bvh	dxj	wrd	1948-08-22 04:00:54.135686000	1987-10-08 09:20:44.116985000	18301394917841565524252745225975543.917
+1479475930	-1216519640	vql	kfa	kfq	1934-01-18 12:12:38.017822000	1939-03-04 17:50:10.430653000	-11549575211531673606760310669732634.12
+1560086440	215463808	gbv	upk	wrm	1998-08-25 18:39:23.189558000	1938-12-29 14:03:44.995783000	-7271057315096893811325494517695142.608
+-1289658932	215070586	wid	ykf	sey	1960-10-29 10:11:37.314876000	1963-05-31 09:22:00.597388000	-10039891784747176243257890775669644.516
+47951236	1646857428	rmx	gbm	grm	1946-03-27 18:23:21.197012000	1987-11-29 12:21:52.464881000	-18623213981561761407810033706892309.338
+128561746	-1216257492	cwi	qlw	sny	1937-10-04 18:42:15.063114000	1987-09-25 08:35:27.030011000	-14344696085126981612375217554854817.826
+1573718136	-1216650714	jey	lgb	fau	1973-01-12 04:44:54.742258000	1939-01-24 15:34:19.169731000	4159309436957855077073945783072394.695
+-1383770064	215332734	nyt	cni	sni	1994-12-22 12:55:03.231240000	1963-07-22 12:23:08.945284000	16843635172701923878982715816335243.09
+-1303159554	214677364	xje	mxs	oau	1949-12-12 00:52:58.242303000	1963-05-18 08:36:43.510414000	21122153069136703674417531968372734.602
+142193442	1646726354	fal	hcn	bvh	1948-09-04 04:46:11.222660000	1987-10-21 10:06:01.203959000	18342933926544815813528811402208917.621
+1479607004	-1216388566	aup	oje	oju	1934-01-31 12:57:55.104796000	1939-03-17 18:35:27.517627000	-11508036202828423317484244493499260.416
+1560217514	-1217043936	kfa	yto	bvq	1998-09-07 19:24:40.276532000	1939-01-11 14:49:02.082757000	-7229518306393643522049428341461768.904
+-1289527858	215201660	bmh	doj	gbv	1960-11-11 10:56:54.401850000	1963-06-13 10:07:17.684362000	-9998352776043925953981824599436270.812
+48082310	1646988502	vqc	kfq	kvq	1946-04-09 19:08:38.283986000	1987-12-12 13:07:09.551855000	-18581674972858511118533967530658935.634
+128692820	1646333132	gbm	upb	wrd	1937-10-17 19:27:32.150088000	1987-10-08 09:20:44.116985000	-14303157076423731323099151378621444.122
+1573849210	-1216519640	nid	pkf	cwi	1973-01-25 05:30:11.829231000	1939-02-06 16:19:36.256705000	4200848445661105366350011959305768.399
+-1383638990	215463808	rdx	grm	wrm	1995-01-04 13:40:20.318214000	1938-12-29 14:03:44.995783000	16885174181405174168258781992568616.794
+-1303028480	214808438	cni	qcw	sey	1949-12-25 01:38:15.329277000	1963-05-31 09:22:00.597388000	21163692077839953963693598144606108.306
+142324516	1646857428	jep	lgr	xje	1948-09-17 05:31:28.309634000	1987-11-03 10:51:18.290933000	18384472935248066102804877578442291.325
+1479738078	-1216257492	eyt	sni	sny	1934-02-13 13:43:12.191770000	1987-09-25 08:35:27.030011000	-11466497194125173028208178317265886.712
+1560348588	-1216912862	oje	dxs	fau	1998-09-20 20:09:57.363506000	1939-01-24 15:34:19.169731000	-7187979297690393232773362165228395.2
+-1289396784	215332734	fql	hsn	kfa	1960-11-24 11:42:11.488824000	1963-06-26 10:52:34.771336000	-9956813767340675664705758423202897.108
+48213384	214677364	aug	oju	oau	1946-04-22 19:53:55.370960000	1963-05-18 08:36:43.510414000	-18540135964155260829257901354425561.93
+128823894	1646464206	kfq	ytf	bvh	1937-10-30 20:12:49.237062000	1987-10-21 10:06:01.203959000	-14261618067720481033823085202388070.418
+1573980284	-1216388566	rmh	toj	gbm	1973-02-07 06:15:28.916205000	1939-02-19 17:04:53.343679000	4242387454364355655626078135539142.103
+-1383507916	-1217043936	vhc	kvq	bvq	1995-01-17 14:25:37.405188000	1939-01-11 14:49:02.082757000	16926713190108424457534848168801990.498
+-1302897406	214939512	grm	ugb	gbv	1950-01-07 02:23:32.416251000	1963-06-13 10:07:17.684362000	21205231086543204252969664320839482.01
+142455590	1646988502	nit	pkv	cni	1948-09-30 06:16:45.396608000	1987-11-16 11:36:35.377907000	18426011943951316392080943754675665.029
+1479869152	1646333132	idx	wrm	wrd	1934-02-26 14:28:29.278744000	1987-10-08 09:20:44.116985000	-11424958185421922738932112141032513.008
+1560479662	-1216781788	sni	hcw	cwi	1998-10-03 20:55:14.450480000	1939-02-06 16:19:36.256705000	-7146440288987142943497295988995021.496
+-1289265710	215463808	jup	lwr	oje	1960-12-07 12:27:28.575798000	1963-07-09 11:37:51.858310000	-9915274758637425375429692246969523.404
+48344458	214808438	eyk	sny	sey	1946-05-05 20:39:12.457934000	1963-05-31 09:22:00.597388000	-18498596955452010539981835178192188.226
+128954968	1646595280	\N	dxj	xje	1937-11-12 20:58:06.324036000	1987-11-03 10:51:18.290933000	-14220079059017230744547019026154696.714
+1574111358	-1216257492	oju	xsn	kfq	1973-02-20 07:00:46.003179000	1939-03-04 17:50:10.430653000	4283926463067605944902144311772515.807
+-1383376842	-1216912862	vql	oau	fau	1995-01-30 15:10:54.492162000	1939-01-24 15:34:19.169731000	16968252198811674746810914345035364.202
+-1302766332	215070586	alg	ykf	kfa	1950-01-20 03:08:49.503225000	1963-06-26 10:52:34.771336000	21246770095246454542245730497072855.714
+142586664	214677364	kvq	toa	grm	1948-10-13 07:02:02.483582000	1987-11-29 12:21:52.464881000	18467550952654566681357009930909038.733
+1480000226	1646464206	rmx	bvq	bvh	1934-03-11 15:13:46.365718000	1987-10-21 10:06:01.203959000	-11383419176718672449656045964799139.304
+1560610736	-1216650714	mhc	lgb	gbm	1998-10-16 21:40:31.537454000	1939-02-19 17:04:53.343679000	-7104901280283892654221229812761647.792
+-1289134636	-1217043936	wrm	pbv	sni	1960-12-20 13:12:45.662772000	1963-07-22 12:23:08.945284000	-9873735749934175086153626070736149.7
+48475532	214939512	nyt	wrd	gbv	1946-05-18 21:24:29.544908000	1963-06-13 10:07:17.684362000	-18457057946748760250705769001958814.522
+129086042	1646726354	ido	hcn	cni	1937-11-25 21:43:23.411010000	1987-11-16 11:36:35.377907000	-14178540050313980455270952849921323.01
+1574242432	1646333132	sny	cwr	oju	1973-03-05 07:46:03.090153000	1939-03-17 18:35:27.517627000	4325465471770856234178210488005889.511
+-1383245768	-1216781788	aup	sey	cwi	1995-02-12 15:56:11.579136000	1939-02-06 16:19:36.256705000	17009791207514925036086980521268737.906
+-1302635258	215201660	epk	doj	oje	1950-02-02 03:54:06.590199000	1963-07-09 11:37:51.858310000	20661171391050865060883708820716.202
+142717738	\N	oau	xse	kvq	1948-10-26 07:47:19.570556000	1987-12-12 13:07:09.551855000	18509089961357816970633076107142412.437
+1480131300	214808438	vqc	fau	xje	1934-03-24 15:59:03.452692000	1987-11-03 10:51:18.290933000	-11341880168015422160379979788565765.6
+1560741810	1646595280	qlg	pkf	kfq	1998-10-29 22:25:48.624428000	1939-03-04 17:50:10.430653000	-7063362271580642364945163636528274.088
+-1289003562	-1216519640	bvq	tfa	wrm	1961-01-02 13:58:02.749746000	1938-12-29 14:03:44.995783000	-9832196741230924796877559894502775.996
+48606606	-1216912862	rdx	bvh	kfa	1946-05-31 22:09:46.631882000	1963-06-26 10:52:34.771336000	-18415518938045509961429702825725440.818
+129217116	215070586	mhs	lgr	grm	1937-12-08 22:28:40.497983000	1987-11-29 12:21:52.464881000	-14137001041610730165994886673687949.306
+1574373506	1646857428	wrd	gbv	sny	1973-03-18 08:31:20.177127000	1987-09-25 08:35:27.030011000	4367004480474106523454276664239263.215
+-1383114694	1646464206	eyt	wid	gbm	1995-02-25 16:41:28.666110000	1939-02-19 17:04:53.343679000	17051330216218175325363046697502111.61
+-1302504184	-1216650714	ito	hsn	sni	1950-02-15 04:39:23.677173000	1963-07-22 12:23:08.945284000	62200180094301154336949885054089.906
+142848812	215332734	sey	cwi	oau	1948-11-08 08:32:36.657530000	1963-05-18 08:36:43.510414000	18550628970061067259909142283375786.141
+1480262374	214939512	aug	jey	cni	\N	1987-11-16 11:36:35.377907000	-11300341159312171871103913612332391.896
+1560872884	1646726354	upk	toj	oju	1934-04-06 16:44:20.539666000	1939-03-17 18:35:27.517627000	-7021823262877392075669097460294900.384
+-1288872488	-1216388566	fau	xje	bvq	1998-11-11 23:11:05.711402000	1939-01-11 14:49:02.082757000	-9790657732527674507601493718269402.292
+48737680	-1216781788	vhc	fal	oje	1961-01-15 14:43:19.836720000	1963-07-09 11:37:51.858310000	-18373979929342259672153636649492067.114
+129348190	215201660	qlw	pkv	kvq	1946-06-13 22:55:03.718856000	1987-12-12 13:07:09.551855000	-14095462032907479876718820497454575.602
+1574504580	1646988502	bvh	kfa	wrd	1937-12-21 23:13:57.584957000	1987-10-08 09:20:44.116985000	4408543489177356812730342840472636.919
+-1382983620	1646595280	idx	bmh	kfq	1973-03-31 09:16:37.264101000	1939-03-04 17:50:10.430653000	17092869224921425614639112873735485.314
+-1302373110	-1216519640	mxs	lwr	wrm	1995-03-10 17:26:45.753084000	1938-12-29 14:03:44.995783000	103739188797551443613016061287463.61
+142979886	215463808	wid	gbm	sey	1950-02-28 05:24:40.764147000	1963-05-31 09:22:00.597388000	18592167978764317549185208459609159.845
+1480393448	215070586	eyk	nid	grm	1948-11-21 09:17:53.744504000	1987-11-29 12:21:52.464881000	-11258802150608921581827847436099018.192
+1561003958	1646857428	yto	xsn	sny	1934-04-19 17:29:37.626640000	1987-09-25 08:35:27.030011000	-6980284254174141786393031284061526.68
+-1288741414	-1216257492	jey	cni	fau	1998-11-24 23:56:22.798376000	1939-01-24 15:34:19.169731000	-9749118723824424218325427542036028.588
+48868754	-1216650714	alg	jep	sni	1961-01-28 15:28:36.923694000	1963-07-22 12:23:08.945284000	-18332440920639009382877570473258693.41
+129479264	215332734	upb	toa	oau	1946-06-26 23:40:20.805830000	1963-05-18 08:36:43.510414000	-14053923024204229587442754321221201.898
+1574635654	214677364	fal	oje	bvh	1938-01-03 23:59:14.671931000	1987-10-21 10:06:01.203959000	4450082497880607102006409016706010.623
+-1382852546	1646726354	mhc	fql	oju	1973-04-13 10:01:54.351075000	1939-03-17 18:35:27.517627000	17134408233624675903915179049968859.018
+-1302242036	-1216388566	qcw	pbv	bvq	1995-03-23 18:12:02.840058000	1939-01-11 14:49:02.082757000	145278197500801732889082237520837.314
+143110960	-1217043936	bmh	kfq	gbv	1950-03-13 06:09:57.851121000	1963-06-13 10:07:17.684362000	18633706987467567838461274635842533.549
+1480524522	215201660	ido	rmh	kvq	1948-12-04 10:03:10.831478000	1987-12-12 13:07:09.551855000	-11217263141905671292551781259865644.488
diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh
new file mode 100755
index 00000000000..5c129e6c5ce
--- /dev/null
+++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh
@@ -0,0 +1,210 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+PAR_PATH="$CURDIR"/02998_native_parquet_reader.parquet
+# the content of parquet file can be generated by following codes
+# <<EndOfCodes
+# 
+# #include <Columns/ColumnNullable.h>
+# #include <Columns/ColumnString.h>
+# #include <Common/tests/gtest_global_register.h>
+# #include <Core/Block.h>
+# #include <DataTypes/DataTypeDateTime64.h>
+# #include <DataTypes/DataTypeLowCardinality.h>
+# #include <DataTypes/DataTypeNullable.h>
+# #include <DataTypes/DataTypeString.h>
+# #include <DataTypes/DataTypesDecimal.h>
+# #include <IO/WriteBufferFromFile.h>
+# #include <Processors/Formats/IOutputFormat.h>
+# #include <Processors/Formats/Impl/Parquet/ParquetRecordReader.h>
+# 
+# namespace
+# {
+# 
+# using namespace DB;
+# 
+# const UInt32 ROW_NUM = 2000;
+# const UInt32 MIN_STRING_LEN = 3;
+# const UInt32 MAX_STRING_LEN = 5;
+# 
+# const UInt32 PLAIN_ENCODING_CARDINALITY = ROW_NUM * 2;
+# const UInt32 MIX_ENCODING_CARDINALITY = 800;
+# const UInt32 DICT_ENCODING_CARDINALITY = 20;
+# 
+# UInt16 nextNum()
+# {
+#     static UInt16 idx = 0;
+#     static UInt16 nums[] = {0, 21845, 43690};
+#     static size_t nums_len = sizeof(nums) / sizeof(nums[0]);
+#     return nums[(idx++) % nums_len]++;
+# }
+# 
+# template <typename NumericDataType>
+# void generateValues(MutableColumnPtr & col, size_t num)
+# {
+#     using FieldType = typename NumericDataType::FieldType;
+# 
+#     const size_t next_num_bytes = sizeof(nextNum());
+#     char bytewise_val[sizeof(FieldType)];
+# 
+#     while (col->size() < num)
+#     {
+#         for (auto bytes = 0; bytes < sizeof(FieldType); bytes += next_num_bytes)
+#         {
+#             auto tmp = nextNum();
+#             memcpy(bytewise_val + bytes, &tmp, std::min(next_num_bytes, sizeof(FieldType) - bytes));
+#         }
+#         if (is_decimal<FieldType>)
+#         {
+#             // clean highest 3 bits, make sure the result doest not exceed the limits of the decimal type
+#             if (bytewise_val[sizeof(FieldType) - 1] > 0)
+#                 bytewise_val[sizeof(FieldType) - 1] &= 0x0f;
+#             else
+#                 bytewise_val[sizeof(FieldType) - 1] |= 0xf0;
+#         }
+#         FieldType val;
+#         memcpy(&val, &bytewise_val, sizeof(FieldType));
+#         col->insert(val);
+#     }
+# }
+# 
+# template <>
+# void generateValues<DataTypeString>(MutableColumnPtr & col, size_t num)
+# {
+#     std::string str;
+#     while (col->size() < num)
+#     {
+#         auto len = MIN_STRING_LEN + nextNum() % (MAX_STRING_LEN - MIN_STRING_LEN);
+#         str.clear();
+#         for (size_t i = 0; i < len; i++)
+#         {
+#             str.push_back('a' + nextNum() % ('z' - 'a'));
+#         }
+#         col->insert(str);
+#     }
+# }
+# 
+# template <typename DataType>
+# ColumnWithTypeAndName generateColumn(
+#     std::shared_ptr<DataType> ch_type,
+#     size_t cardinality,
+#     const std::string & col_name,
+#     const std::set<size_t> & null_indice)
+# {
+#     DataTypePtr col_type = ch_type;
+#     if (!null_indice.empty())
+#     {
+#         col_type = std::make_shared<DataTypeNullable>(ch_type);
+#     }
+# 
+#     auto values = ch_type->createColumn();
+#     values->reserve(cardinality);
+#     generateValues<DataType>(values, cardinality);
+# 
+#     auto col = col_type->createColumn();
+#     col->reserve(ROW_NUM);
+#     for (size_t i = 0; i < ROW_NUM; i++)
+#     {
+#         if (!null_indice.empty() && null_indice.contains(i))
+#         {
+#             col->insert(Null());
+#         }
+#         else
+#         {
+#             col->insert(values->operator[](nextNum() % cardinality));
+#         }
+#     }
+#     return {std::move(col), col_type, col_name};
+# }
+# 
+# Block generateBlock()
+# {
+#     ColumnsWithTypeAndName cols;
+# 
+#     // test Int32 type
+#     std::set<size_t> null_indice{512, 1001, 211, 392, 553, 1725};
+#     // Nullability is expressed by definition level, and encoded by bit packed with smallest group size of 8
+#     // when null value appeared. Here we make a big bit packed group with more than 1000 values.
+#     for (size_t i = 0; i < 170; i++)
+#     {
+#         null_indice.emplace(622 + i * 6);
+#     }
+#     cols.emplace_back(generateColumn(
+#         std::make_shared<DataTypeInt32>(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_i32", null_indice));
+#     null_indice = {917, 482, 283, 580, 1926, 1667, 1971};
+#     cols.emplace_back(generateColumn(
+#         std::make_shared<DataTypeInt32>(), DICT_ENCODING_CARDINALITY, "dict_encoding_i32", null_indice));
+# 
+#     // test string type
+#     null_indice = {818, 928, 1958, 1141, 1553, 1407, 690, 1769};
+#     cols.emplace_back(generateColumn(
+#         std::make_shared<DataTypeString>(), PLAIN_ENCODING_CARDINALITY, "plain_encoding_str", null_indice));
+#     null_indice = {1441, 1747, 216, 1209, 89, 52, 536, 625};
+#     cols.emplace_back(generateColumn(
+#         std::make_shared<DataTypeString>(), MIX_ENCODING_CARDINALITY, "mix_encoding_str", null_indice));
+#     null_indice = {1478, 1862, 894, 1314, 1844, 243, 869, 551};
+#     cols.emplace_back(generateColumn(
+#         std::make_shared<DataTypeString>(), DICT_ENCODING_CARDINALITY, "dict_encoding_str", null_indice));
+# 
+#     // test DateTime64 type
+#     auto dt_type = std::make_shared<DataTypeDateTime64>(ParquetRecordReader::default_datetime64_scale);
+#     null_indice = {1078, 112, 1981, 795, 371, 1176, 1526, 11};
+#     cols.emplace_back(generateColumn(dt_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_dt64", null_indice));
+#     null_indice = {1734, 1153, 1893, 1205, 644, 1670, 1482, 1479};
+#     cols.emplace_back(generateColumn(dt_type, DICT_ENCODING_CARDINALITY, "dict_encoding_dt64", null_indice));
+# 
+#     // test Decimal128 type
+#     auto d128_type = std::make_shared<DataTypeDecimal128>(DecimalUtils::max_precision<Decimal128>, 3);
+#     null_indice = {852, 1448, 1569, 896, 1866, 1655, 100, 418};
+#     cols.emplace_back(generateColumn(d128_type, PLAIN_ENCODING_CARDINALITY, "plain_encoding_decimal128", null_indice));
+# 
+#     return {cols};
+# }
+# 
+# void dumpBlock(const Block & block)
+# {
+#     WriteBufferFromFile output_buf("/tmp/ut-out.csv");
+#     auto out = getContext().context->getOutputFormat("CSVWithNames", output_buf, block);
+#     out->write(block);
+#     out->finalize();
+#     std::cerr << block.dumpStructure() << std::endl << std::endl;
+# }
+# 
+# }
+#
+# EndOfCodes 
+#
+# How to generate the parquet file:
+# 1. Use above C++ codes.
+#    Put above codes in src/Common/tests/gtest_main.cpp, add following two inlines in main function:
+#     tryRegisterFormats();
+#     dumpBlock(generateBlock());
+# 2. Genetate /tmp/ut-out.csv.
+#    After compiled, run any test, such as "./src/unit_tests_dbms --gtest_filter=IColumn.dumpStructure", 
+# 3. Generate the parquet file by following spark sql
+#    create temporary view tv using csv options('path' '/tmp/ut-out.csv', 'header' 'true', 'nullValue' '\\N');
+#    insert overwrite directory "/tmp/test-parquet" using Parquet
+#      options('parquet.dictionary.page.size' '500')
+#      select /*+ COALESCE(1) */ cast(plain_encoding_i32 as int), cast(dict_encoding_i32 as int),
+#             plain_encoding_str, mix_encoding_str, dict_encoding_str,
+#             cast(plain_encoding_dt64 as timestamp), cast(dict_encoding_dt64 as timestamp),
+#             cast(plain_encoding_decimal128 as decimal(38, 3))
+#      from tv;
+#   
+
+CH_SCHEMA="\
+    plain_encoding_i32 Nullable(Int32), \
+    dict_encoding_i32 Nullable(Int32), \
+    plain_encoding_str Nullable(String), \
+    mix_encoding_str Nullable(String), \
+    dict_encoding_str LowCardinality(Nullable(String)), \
+    plain_encoding_dt64 Nullable(DateTime64(9)), \
+    dict_encoding_dt64 Nullable(DateTime64(9)), \
+    plain_encoding_decimal128 Nullable(Decimal(38, 3))"
+QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')"
+
+# there may be more than on group in parquet files, unstable results may generated by multithreads
+$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY"

From e1fcdba4dd51a4b4af500c1a09663820004a4a76 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sat, 24 Feb 2024 22:47:53 +0800
Subject: [PATCH 114/392] fix style

Change-Id: I8f7ebd173558b16d94d3161cb0b5300e7e78833d
---
 .../Formats/Impl/Parquet/ParquetDataBuffer.h  | 21 ++++++----
 .../Impl/Parquet/ParquetDataValuesReader.cpp  | 40 +++++++++++++------
 .../Impl/Parquet/ParquetDataValuesReader.h    |  6 ---
 .../Impl/Parquet/ParquetLeafColReader.cpp     | 18 ++++++---
 .../Impl/Parquet/ParquetRecordReader.cpp      |  3 +-
 5 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
index d4956f83092..f21216d5b5d 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -9,6 +9,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int PARQUET_EXCEPTION;
+}
+
 template <typename T> struct ToArrowDecimal;
 
 template <> struct ToArrowDecimal<Decimal<wide::integer<128, signed>>>
@@ -27,8 +32,8 @@ class ParquetDataBuffer
 private:
 
 public:
-    ParquetDataBuffer(const uint8_t * data_, UInt64 avaible_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale)
-        : data(reinterpret_cast<const Int8 *>(data_)), avaible(avaible_), datetime64_scale(datetime64_scale_) {}
+    ParquetDataBuffer(const uint8_t * data_, UInt64 available_, UInt8 datetime64_scale_ = DataTypeDateTime64::default_scale)
+        : data(reinterpret_cast<const Int8 *>(data_)), available(available_), datetime64_scale(datetime64_scale_) {}
 
     template <typename TValue>
     void ALWAYS_INLINE readValue(TValue & dst)
@@ -84,7 +89,7 @@ public:
         auto value_len = ::arrow::util::SafeLoadAs<Int32>(getArrowData());
         if (unlikely(value_len < 0 || value_len > INT32_MAX - 4))
         {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid or corrupted value_len '{}'", value_len);
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Invalid or corrupted value_len '{}'", value_len);
         }
         consume(4);
         checkAvaible(value_len);
@@ -110,7 +115,7 @@ public:
         auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num);
         if (unlikely(!status.ok()))
         {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Read parquet decimal failed: {}", status.status().ToString());
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Read parquet decimal failed: {}", status.status().ToString());
         }
         status.ValueUnsafe().ToBytes(reinterpret_cast<uint8_t *>(out));
         consume(elem_bytes_num);
@@ -118,14 +123,14 @@ public:
 
 private:
     const Int8 * data;
-    UInt64 avaible;
+    UInt64 available;
     const UInt8 datetime64_scale;
 
     void ALWAYS_INLINE checkAvaible(UInt64 num)
     {
-        if (unlikely(avaible < num))
+        if (unlikely(available < num))
         {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Consuming {} bytes while {} avaible", num, avaible);
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Consuming {} bytes while {} available", num, available);
         }
     }
 
@@ -134,7 +139,7 @@ private:
     void ALWAYS_INLINE consume(UInt64 num)
     {
         data += num;
-        avaible -= num;
+        available -= num;
     }
 };
 
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 3afc66dcb36..4ebe3d6a636 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -8,6 +8,12 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int PARQUET_EXCEPTION;
+}
+
 void RleValuesReader::nextGroup()
 {
     // refer to:
@@ -142,7 +148,7 @@ void RleValuesReader::visitNullableBySteps(
                     individual_null_visitor(null_map_cursor);
                     if (unlikely(valid_index_steps[step_idx] == UINT8_MAX))
                     {
-                        throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported packed values number");
+                        throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported packed values number");
                     }
                     valid_index_steps[step_idx]++;
                 }
@@ -270,7 +276,7 @@ void ParquetPlainValuesReader<ColumnString>::readBatch(
 
                 auto idx = cursor;
                 cursor += count;
-                // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is avaible
+                // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is available
                 for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++)
                 {
                     offset_data[idx] = ++val_offset;
@@ -394,14 +400,17 @@ void ParquetRleLCReader<TColumnVector>::readBatch(
         cursor,
         num_values,
         max_def_level,
-        /* individual_null_visitor */ [&](size_t nest_cursor) {
+        /* individual_null_visitor */ [&](size_t nest_cursor)
+        {
             column_data[nest_cursor] = 0;
             has_null = true;
         },
-        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps)
+        {
             rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
         },
-        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count)
+        {
             if (is_valid)
             {
                 rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
@@ -435,7 +444,8 @@ void ParquetRleDictReader<ColumnString>::readBatch(
     auto * offset_data = column.getOffsets().data();
     auto & chars = column.getChars();
 
-    auto append_nulls = [&](UInt8 num) {
+    auto append_nulls = [&](UInt8 num)
+    {
         for (auto limit = cursor + num; cursor < limit; cursor++)
         {
             chars.push_back(0);
@@ -444,7 +454,8 @@ void ParquetRleDictReader<ColumnString>::readBatch(
         }
     };
 
-    auto append_string = [&](Int32 dict_idx) {
+    auto append_string = [&](Int32 dict_idx)
+    {
         auto dict_chars_cursor = dict_offsets[dict_idx - 1];
         auto value_len = dict_offsets[dict_idx] - dict_chars_cursor;
         auto chars_cursor = chars.size();
@@ -462,7 +473,8 @@ void ParquetRleDictReader<ColumnString>::readBatch(
         num_values,
         max_def_level,
         /* individual_null_visitor */ [&](size_t) {},
-        /* stepped_valid_visitor */ [&](size_t, const std::vector<UInt8> & valid_index_steps) {
+        /* stepped_valid_visitor */ [&](size_t, const std::vector<UInt8> & valid_index_steps)
+        {
             value_cache.resize(valid_index_steps.size());
             rle_data_reader->setValues(
                 value_cache.data() + 1, static_cast<UInt32>(valid_index_steps.size() - 1), val_getter);
@@ -474,7 +486,8 @@ void ParquetRleDictReader<ColumnString>::readBatch(
                 append_nulls(valid_index_steps[i] - 1);
             }
         },
-        /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t, UInt32 count)
+        {
             if (is_valid)
             {
                 value_cache.resize(count);
@@ -505,13 +518,16 @@ void ParquetRleDictReader<TColumnVector>::readBatch(
         cursor,
         num_values,
         max_def_level,
-        /* individual_null_visitor */ [&](size_t nest_cursor) {
+        /* individual_null_visitor */ [&](size_t nest_cursor)
+        {
             null_map.setNull(nest_cursor);
         },
-        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps) {
+        /* stepped_valid_visitor */ [&](size_t nest_cursor, const std::vector<UInt8> & valid_index_steps)
+        {
             rle_data_reader->setValueBySteps(column_data + nest_cursor, valid_index_steps, val_getter);
         },
-        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count) {
+        /* repeated_visitor */ [&](bool is_valid, size_t nest_cursor, UInt32 count)
+        {
             if (is_valid)
             {
                 rle_data_reader->setValues(column_data + nest_cursor, count, val_getter);
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 66a1f4877e4..8bc381aa8d2 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -15,12 +15,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int PARQUET_EXCEPTION;
-}
-
 class RleValuesReader
 {
 public:
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
index e2677d7cae3..17feea80b9f 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -216,7 +216,8 @@ template <typename TColumn>
 ColumnWithTypeAndName ParquetLeafColReader<TColumn>::readBatch(UInt64 rows_num, const String & name)
 {
     reading_rows_num = rows_num;
-    auto readPageIfEmpty = [&]() {
+    auto readPageIfEmpty = [&]()
+    {
         while (!cur_page_values) readPage();
     };
 
@@ -245,7 +246,8 @@ void ParquetLeafColReader<ColumnString>::resetColumn(UInt64 rows_num)
     if (reading_low_cardinality)
     {
         assert(dictionary);
-        visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *) {
+        visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *)
+        {
             column = TColVec::create();
         });
 
@@ -289,7 +291,8 @@ void ParquetLeafColReader<TColumn>::degradeDictionary()
     ColumnString & col_dest = *static_cast<ColumnString *>(column.get());
     const ColumnString & col_dict_str = *static_cast<const ColumnString *>(dictionary.get());
 
-    visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *) {
+    visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *)
+    {
         const TColVec & col_src = *static_cast<const TColVec *>(col_existing.get());
         reserveColumnStrRows(column, reading_rows_num);
 
@@ -411,7 +414,8 @@ void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
 
     assert(col_descriptor.max_definition_level() >= 0);
     std::unique_ptr<RleValuesReader> def_level_reader;
-    if (col_descriptor.max_definition_level() > 0) {
+    if (col_descriptor.max_definition_level() > 0)
+    {
         auto bit_width = arrow::bit_util::Log2(col_descriptor.max_definition_level() + 1);
         auto num_bytes = ::arrow::util::SafeLoadAs<int32_t>(buffer);
         auto bit_reader = std::make_unique<arrow::bit_util::BitReader>(buffer + 4, num_bytes);
@@ -435,7 +439,8 @@ void ParquetLeafColReader<TColumn>::readPageV1(const parquet::DataPageV1 & page)
                 degradeDictionary();
             }
 
-            ParquetDataBuffer parquet_buffer = [&]() {
+            ParquetDataBuffer parquet_buffer = [&]()
+            {
                 if constexpr (!std::is_same_v<ColumnDecimal<DateTime64>, TColumn>)
                     return ParquetDataBuffer(buffer, max_size);
 
@@ -485,7 +490,8 @@ std::unique_ptr<ParquetDataValuesReader> ParquetLeafColReader<TColumn>::createDi
     if (reading_low_cardinality && std::same_as<TColumn, ColumnString>)
     {
         std::unique_ptr<ParquetDataValuesReader> res;
-        visitColStrIndexType(dictionary->size(), [&]<typename TCol>(TCol *) {
+        visitColStrIndexType(dictionary->size(), [&]<typename TCol>(TCol *)
+        {
             res = std::make_unique<ParquetRleLCReader<TCol>>(
                 col_descriptor.max_definition_level(),
                 std::move(def_level_reader),
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 42f131ff794..69e694a340f 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -27,7 +27,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
     extern const int PARQUET_EXCEPTION;
 }
 
@@ -142,7 +141,7 @@ std::unique_ptr<ParquetColumnReader> createColReader(
     }
 }
 
-} // anonymouse namespace
+} // anonymous namespace
 
 ParquetRecordReader::ParquetRecordReader(
     Block header_,

From 471dff6589abff5d05ab8a9bb267e198f377c536 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 25 Feb 2024 14:26:53 +0800
Subject: [PATCH 115/392] fix test

Change-Id: Ia7dbf1d762f7f054a9aa677caaaff6bfe1a42c38
---
 src/Core/SettingsChangesHistory.h                   |  1 +
 .../Formats/Impl/Parquet/ParquetDataBuffer.h        | 13 +++++--------
 .../Impl/Parquet/ParquetDataValuesReader.cpp        |  2 +-
 .../Formats/Impl/Parquet/ParquetDataValuesReader.h  |  4 ++--
 .../Formats/Impl/Parquet/ParquetLeafColReader.cpp   |  6 +++---
 .../Formats/Impl/Parquet/ParquetRecordReader.cpp    |  7 ++-----
 .../Formats/Impl/ParquetBlockInputFormat.cpp        |  8 ++++++++
 .../0_stateless/02998_native_parquet_reader.sh      |  5 +++--
 8 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index ece48620618..6fb8fb9358c 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -176,6 +176,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"},
               {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
               {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
+              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               }},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
index f21216d5b5d..5c37375fa0c 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -38,15 +38,13 @@ public:
     template <typename TValue>
     void ALWAYS_INLINE readValue(TValue & dst)
     {
-        checkAvaible(sizeof(TValue));
-        dst = *(reinterpret_cast<const TValue *>(data));
-        consume(sizeof(TValue));
+        readBytes(&dst, sizeof(TValue));
     }
 
     void ALWAYS_INLINE readBytes(void * dst, size_t bytes)
     {
         checkAvaible(bytes);
-        memcpy(dst, data, bytes);
+        std::copy(data, data + bytes, reinterpret_cast<Int8 *>(dst));
         consume(bytes);
     }
 
@@ -68,13 +66,12 @@ public:
                100000000 * spd,
                1000000000 * spd};
 
-        checkAvaible(sizeof(parquet::Int96));
-        auto decoded = parquet::DecodeInt96Timestamp(*reinterpret_cast<const parquet::Int96 *>(data));
+        parquet::Int96 tmp;
+        readValue(tmp);
+        auto decoded = parquet::DecodeInt96Timestamp(tmp);
 
         uint64_t scaled_nano = decoded.nanoseconds / pow10[datetime64_scale];
         dst = static_cast<Int64>(decoded.days_since_epoch * scaled_day[datetime64_scale] + scaled_nano);
-
-        consume(sizeof(parquet::Int96));
     }
 
     /**
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 4ebe3d6a636..6743086e9e6 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -306,7 +306,7 @@ void ParquetPlainValuesReader<ColumnDecimal<DateTime64>>::readBatch(
         },
         /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
         {
-            auto col_data_pos = column_data + nest_cursor;
+            auto * col_data_pos = column_data + nest_cursor;
             for (UInt32 i = 0; i < count; i++)
             {
                 plain_data_buffer.readDateTime64(col_data_pos[i]);
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 8bc381aa8d2..688de4f52eb 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -97,7 +97,7 @@ public:
      * @tparam ValueGetter A callback with signature: TValue(Int32 val)
      */
     template <typename TValue, typename ValueGetter>
-    void setValues(TValue * column_data, UInt32 num_values, ValueGetter && val_getter);
+    void setValues(TValue * res_values, UInt32 num_values, ValueGetter && val_getter);
 
     /**
      * @brief Set the value by valid_index_steps generated in visitNullableBySteps.
@@ -106,7 +106,7 @@ public:
      */
     template <typename TValue, typename ValueGetter>
     void setValueBySteps(
-        TValue * column_data,
+        TValue * res_values,
         const std::vector<UInt8> & col_data_steps,
         ValueGetter && val_getter);
 
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
index 17feea80b9f..52dfad7606a 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -113,7 +113,7 @@ ColumnPtr readDictPage<ColumnDecimal<DateTime64>>(
     const parquet::ColumnDescriptor & /* col_des */,
     const DataTypePtr & data_type)
 {
-    auto & datetime_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
+    const auto & datetime_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
     auto dict_col = ColumnDecimal<DateTime64>::create(page.num_values(), datetime_type.getScale());
     auto * col_data = dict_col->getData().data();
     ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale());
@@ -282,7 +282,7 @@ void ParquetLeafColReader<TColumn>::degradeDictionary()
         dictionary = nullptr;
         return;
     }
-    assert(dictionary && column->size());
+    assert(dictionary && !column->empty());
 
     null_map = std::make_unique<LazyNullMap>(reading_rows_num);
     auto col_existing = std::move(column);
@@ -372,7 +372,7 @@ void ParquetLeafColReader<TColumn>::readPage()
                 dict_page.encoding() != parquet::Encoding::PLAIN_DICTIONARY
                 && dict_page.encoding() != parquet::Encoding::PLAIN))
             {
-                throw new Exception(
+                throw Exception(
                     ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary page encoding {}", dict_page.encoding());
             }
             LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name());
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 69e694a340f..9cde433b983 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -156,9 +156,6 @@ ParquetRecordReader::ParquetRecordReader(
     , row_groups_indices(std::move(row_groups_indices_))
     , left_rows(getTotalRows(*file_reader->metadata()))
 {
-    // Only little endian system is supported currently
-    static_assert(std::endian::native == std::endian::little);
-
     log = &Poco::Logger::get("ParquetRecordReader");
 
     parquet_col_indice.reserve(header.columns());
@@ -230,9 +227,9 @@ void ParquetRecordReader::loadNextRowGroup()
 Int64 ParquetRecordReader::getTotalRows(const parquet::FileMetaData & meta_data)
 {
     Int64 res = 0;
-    for (size_t i = 0; i < row_groups_indices.size(); i++)
+    for (auto idx : row_groups_indices)
     {
-        res += meta_data.RowGroup(row_groups_indices[i])->num_rows();
+        res += meta_data.RowGroup(idx)->num_rows();
     }
     return res;
 }
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index e35d53dc4f4..2e849f09fda 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -484,6 +484,14 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat
 
     if (format_settings.parquet.use_native_reader)
     {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
+        if constexpr (std::endian::native != std::endian::little)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "parquet native reader only supports little endian system currently");
+#pragma clang diagnostic pop
+
         row_group_batch.native_record_reader = std::make_shared<ParquetRecordReader>(
             getPort().getHeader(),
             std::move(properties),
diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh
index 5c129e6c5ce..4e5169c4bf0 100755
--- a/tests/queries/0_stateless/02998_native_parquet_reader.sh
+++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env bash
+# Tags: no-fasttest
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -201,8 +202,8 @@ CH_SCHEMA="\
     plain_encoding_str Nullable(String), \
     mix_encoding_str Nullable(String), \
     dict_encoding_str LowCardinality(Nullable(String)), \
-    plain_encoding_dt64 Nullable(DateTime64(9)), \
-    dict_encoding_dt64 Nullable(DateTime64(9)), \
+    plain_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \
+    dict_encoding_dt64 Nullable(DateTime64(9, \\'UTC\\')), \
     plain_encoding_decimal128 Nullable(Decimal(38, 3))"
 QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')"
 

From f68b788f5900b66ab4623874c98ed1b4025b5fd0 Mon Sep 17 00:00:00 2001
From: Danila Puzov <danila.puzov.lenovo@gmail.com>
Date: Sat, 11 May 2024 15:34:13 +0300
Subject: [PATCH 116/392] Tests and docs for serial, some fixes for
 generateSnowflakeID

---
 src/Functions/generateSnowflakeID.cpp         |  62 +++-
 src/Functions/generateUUIDv7.cpp              | 284 ++++++++++++++----
 src/Functions/serial.cpp                      | 134 ++++-----
 .../03129_serial_test_zookeeper.reference     |   8 +
 .../03129_serial_test_zookeeper.sql           |  20 ++
 5 files changed, 373 insertions(+), 135 deletions(-)
 create mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.reference
 create mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.sql

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index e54b720ec98..dd837a58325 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -11,11 +11,42 @@ namespace ErrorCodes
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
+namespace
+{
+
+/*
+  Snowflake ID 
+  https://en.wikipedia.org/wiki/Snowflake_ID
+
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|0|                         timestamp                           |
+├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                   |     machine_id    |    machine_seq_num    |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+
+- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
+- The middle 10 bits are the machine ID.
+- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
+*/
+
+constexpr auto timestamp_size = 41;
+constexpr auto machine_id_size = 10;
+constexpr auto machine_seq_num_size = 12;
+
+constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size);
+constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size;
+constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1;
+
+}
+
 class FunctionSnowflakeID : public IFunction
 {
 private:
-    mutable std::atomic<size_t> machine_sequence_number{0};
-    mutable std::atomic<Int64> last_timestamp{0};
+    mutable std::atomic<Int64> state{0};
+    // previous snowflake id
+    // state is 1 atomic value because we don't want use mutex
 
 public:
     static constexpr auto name = "generateSnowflakeID";
@@ -60,23 +91,28 @@ public:
         // hash serverUUID into 32 bytes
         Int64 h = UUIDHelpers::getHighBytes(serverUUID);
         Int64 l = UUIDHelpers::getLowBytes(serverUUID);
-        Int64 machine_id = (h * 11) ^ (l * 17);
+        Int64 machine_id = ((h * 11) ^ (l * 17)) & machine_id_mask;
 
-        for (Int64 & x : vec_to) {
+        for (Int64 & el : vec_to) {
             const auto tm_point = std::chrono::system_clock::now();
             Int64 current_timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(
-                   tm_point.time_since_epoch()).count();
+                   tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1);
 
-            Int64 local_machine_sequence_number = 0;
+            Int64 last_state, new_state;
+            do {
+                last_state = state.load();
+                Int64 last_timestamp = (last_state & timestamp_mask) >> (machine_id_size + machine_seq_num_size);
+                Int64 machine_seq_num = last_state & machine_seq_num_mask;
 
-            if (current_timestamp != last_timestamp.load()) {
-                machine_sequence_number.store(0);
-                last_timestamp.store(current_timestamp);
-            } else {
-                local_machine_sequence_number = machine_sequence_number.fetch_add(1) + 1;
-            }
+                if (current_timestamp == last_timestamp) {
+                    ++machine_seq_num;
+                }
+                new_state = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | machine_seq_num;
+            } while (!state.compare_exchange_strong(last_state, new_state));
+            // failed CAS     => another thread updated state
+            // successful CAS => we have unique (timestamp, machine_seq_num) on this machine
 
-            x = (current_timestamp << 22) | (machine_id & 0x3ff000ull) | (local_machine_sequence_number & 0xfffull);
+            el = new_state;
         }
 
         return col_res;
diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp
index 61d742d2fda..411a3a076ac 100644
--- a/src/Functions/generateUUIDv7.cpp
+++ b/src/Functions/generateUUIDv7.cpp
@@ -1,13 +1,178 @@
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsRandom.h>
 #include <DataTypes/DataTypeUUID.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionsRandom.h>
 
 namespace DB
 {
 
-namespace ErrorCodes
+namespace
 {
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+
+/* Bit layouts of UUIDv7
+
+without counter:
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                           unix_ts_ms                          |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|          unix_ts_ms           |  ver  |       rand_a          |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|var|                        rand_b                             |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                            rand_b                             |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+
+with counter:
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                           unix_ts_ms                          |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|          unix_ts_ms           |  ver  |   counter_high_bits   |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|var|                   counter_low_bits                        |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                            rand_b                             |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+*/
+
+/// bit counts
+constexpr auto rand_a_bits_count = 12;
+constexpr auto rand_b_bits_count = 62;
+constexpr auto rand_b_low_bits_count = 32;
+constexpr auto counter_high_bits_count = rand_a_bits_count;
+constexpr auto counter_low_bits_count = 30;
+constexpr auto bits_in_counter = counter_high_bits_count + counter_low_bits_count;
+constexpr uint64_t counter_limit = (1ull << bits_in_counter);
+
+/// bit masks for UUIDv7 components
+constexpr uint64_t variant_2_mask  = (2ull << rand_b_bits_count);
+constexpr uint64_t rand_a_bits_mask = (1ull << rand_a_bits_count) - 1;
+constexpr uint64_t rand_b_bits_mask = (1ull << rand_b_bits_count) - 1;
+constexpr uint64_t rand_b_with_counter_bits_mask = (1ull << rand_b_low_bits_count) - 1;
+constexpr uint64_t counter_low_bits_mask = (1ull << counter_low_bits_count) - 1;
+constexpr uint64_t counter_high_bits_mask = rand_a_bits_mask;
+
+uint64_t getTimestampMillisecond()
+{
+    timespec tp;
+    clock_gettime(CLOCK_REALTIME, &tp);
+    const uint64_t sec = tp.tv_sec;
+    return sec * 1000 + tp.tv_nsec / 1000000;
+}
+
+void setTimestampAndVersion(UUID & uuid, uint64_t timestamp)
+{
+    UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & rand_a_bits_mask) | (timestamp << 16) | 0x7000;
+}
+
+void setVariant(UUID & uuid)
+{
+    UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & rand_b_bits_mask) | variant_2_mask;
+}
+
+struct FillAllRandomPolicy
+{
+    static constexpr auto name = "generateUUIDv7NonMonotonic";
+    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
+    struct Data
+    {
+        void generate(UUID & uuid, uint64_t ts)
+        {
+            setTimestampAndVersion(uuid, ts);
+            setVariant(uuid);
+        }
+    };
+};
+
+struct CounterFields
+{
+    uint64_t last_timestamp = 0;
+    uint64_t counter = 0;
+
+    void resetCounter(const UUID & uuid)
+    {
+        const uint64_t counter_low_bits = (UUIDHelpers::getLowBytes(uuid) >> rand_b_low_bits_count) & counter_low_bits_mask;
+        const uint64_t counter_high_bits = UUIDHelpers::getHighBytes(uuid) & counter_high_bits_mask;
+        counter = (counter_high_bits << 30) | counter_low_bits;
+    }
+
+    void incrementCounter(UUID & uuid)
+    {
+        if (++counter == counter_limit) [[unlikely]]
+        {
+            ++last_timestamp;
+            resetCounter(uuid);
+            setTimestampAndVersion(uuid, last_timestamp);
+            setVariant(uuid);
+        }
+        else
+        {
+            UUIDHelpers::getHighBytes(uuid) = (last_timestamp << 16) | 0x7000 | (counter >> counter_low_bits_count);
+            UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & rand_b_with_counter_bits_mask) | variant_2_mask | ((counter & counter_low_bits_mask) << rand_b_low_bits_count);
+        }
+    }
+
+    void generate(UUID & uuid, uint64_t timestamp)
+    {
+        const bool need_to_increment_counter = (last_timestamp == timestamp) || ((last_timestamp > timestamp) & (last_timestamp < timestamp + 10000));
+        if (need_to_increment_counter)
+        {
+            incrementCounter(uuid);
+        }
+        else
+        {
+            last_timestamp = timestamp;
+            resetCounter(uuid);
+            setTimestampAndVersion(uuid, last_timestamp);
+            setVariant(uuid);
+        }
+    }
+};
+
+
+struct GlobalCounterPolicy
+{
+    static constexpr auto name = "generateUUIDv7";
+    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
+
+    /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously.
+    struct Data
+    {
+        static inline CounterFields fields;
+        static inline SharedMutex mutex; /// works a little bit faster than std::mutex here
+        std::lock_guard<SharedMutex> guard;
+
+        Data()
+            : guard(mutex)
+        {}
+
+        void generate(UUID & uuid, uint64_t timestamp)
+        {
+            fields.generate(uuid, timestamp);
+        }
+    };
+};
+
+struct ThreadLocalCounterPolicy
+{
+    static constexpr auto name = "generateUUIDv7ThreadMonotonic";
+    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
+
+    /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
+    struct Data
+    {
+        static inline thread_local CounterFields fields;
+
+        void generate(UUID & uuid, uint64_t timestamp)
+        {
+            fields.generate(uuid, timestamp);
+        }
+    };
+};
+
 }
 
 #define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \
@@ -16,77 +181,72 @@ DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__)
 
 DECLARE_SEVERAL_IMPLEMENTATIONS(
 
-class FunctionGenerateUUIDv7 : public IFunction
+template <typename FillPolicy>
+class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy
 {
 public:
-    static constexpr auto name = "generateUUIDv7";
+    String getName() const final {  return FillPolicy::name; }
 
-    String getName() const override
+    size_t getNumberOfArguments() const final { return 0; }
+    bool isDeterministic() const override { return false; }
+    bool isDeterministicInScopeOfQuery() const final { return false; }
+    bool useDefaultImplementationForNulls() const final { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const final { return false; }
+    bool isVariadic() const final { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        return name;
-    }
-
-    size_t getNumberOfArguments() const override { return 0; }
-
-    bool isDeterministicInScopeOfQuery() const override { return false; }
-    bool useDefaultImplementationForNulls() const override { return false; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
-    bool isVariadic() const override { return true; }
-
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
-    {
-        if (arguments.size() > 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.",
-                getName(), arguments.size());
+        FunctionArgumentDescriptors mandatory_args;
+        FunctionArgumentDescriptors optional_args{
+            {"expr", nullptr, nullptr, "Arbitrary Expression"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
         return std::make_shared<DataTypeUUID>();
     }
 
-    bool isDeterministic() const override { return false; }
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
     {
         auto col_res = ColumnVector<UUID>::create();
         typename ColumnVector<UUID>::Container & vec_to = col_res->getData();
 
-        size_t size = input_rows_count;
-        vec_to.resize(size);
-
-        /// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces.
-        RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
-
-        for (UUID & uuid : vec_to)
+        if (input_rows_count)
         {
-            ///  https://www.ietf.org/archive/id/draft-peabody-dispatch-new-uuid-format-04.html#section-5.2
+            vec_to.resize(input_rows_count);
 
-            const auto tm_point = std::chrono::system_clock::now();
-            UInt64 unix_ts_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
-                   tm_point.time_since_epoch()).count();
+            /// Not all random bytes produced here are required for the UUIDv7 but it's the simplest way to get the required number of them by using RandImpl
+            RandImpl::execute(reinterpret_cast<char *>(vec_to.data()), vec_to.size() * sizeof(UUID));
 
-            UUIDHelpers::getHighBytes(uuid) = (UUIDHelpers::getHighBytes(uuid) & 0x0000000000000fffull) | 0x0000000000007000ull | (unix_ts_ms << 16);
-            UUIDHelpers::getLowBytes(uuid) = (UUIDHelpers::getLowBytes(uuid) & 0x3fffffffffffffffull) | 0x8000000000000000ull;
+            /// Note: For performance reasons, clock_gettime is called once per chunk instead of once per UUID. This reduces precision but
+            /// it still complies with the UUID standard.
+            uint64_t timestamp = getTimestampMillisecond();
+            for (UUID & uuid : vec_to)
+            {
+                typename FillPolicy::Data data;
+                data.generate(uuid, timestamp);
+            }
         }
-
         return col_res;
     }
 };
-
 ) // DECLARE_SEVERAL_IMPLEMENTATIONS
 #undef DECLARE_SEVERAL_IMPLEMENTATIONS
 
-class FunctionGenerateUUIDv7 : public TargetSpecific::Default::FunctionGenerateUUIDv7
+template <typename FillPolicy>
+class FunctionGenerateUUIDv7Base : public TargetSpecific::Default::FunctionGenerateUUIDv7Base<FillPolicy>
 {
 public:
-    explicit FunctionGenerateUUIDv7(ContextPtr context) : selector(context)
-    {
-        selector.registerImplementation<TargetArch::Default,
-            TargetSpecific::Default::FunctionGenerateUUIDv7>();
+    using Self = FunctionGenerateUUIDv7Base<FillPolicy>;
+    using Parent = TargetSpecific::Default::FunctionGenerateUUIDv7Base<FillPolicy>;
 
-    #if USE_MULTITARGET_CODE
-        selector.registerImplementation<TargetArch::AVX2,
-            TargetSpecific::AVX2::FunctionGenerateUUIDv7>();
-    #endif
+    explicit FunctionGenerateUUIDv7Base(ContextPtr context) : selector(context)
+    {
+        selector.registerImplementation<TargetArch::Default, Parent>();
+
+#if USE_MULTITARGET_CODE
+        using ParentAVX2 = TargetSpecific::AVX2::FunctionGenerateUUIDv7Base<FillPolicy>;
+        selector.registerImplementation<TargetArch::AVX2, ParentAVX2>();
+#endif
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@@ -96,18 +256,34 @@ public:
 
     static FunctionPtr create(ContextPtr context)
     {
-        return std::make_shared<FunctionGenerateUUIDv7>(context);
+        return std::make_shared<Self>(context);
     }
 
 private:
     ImplementationSelector<IFunction> selector;
 };
 
+template<typename FillPolicy>
+void registerUUIDv7Generator(auto& factory)
+{
+    static constexpr auto doc_syntax_format = "{}([expression])";
+    static constexpr auto example_format = "SELECT {}()";
+    static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
+
+    FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
+    FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
+    FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
+    FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7.";
+    FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
+    FunctionDocumentation::Categories doc_categories = {"UUID"};
+
+    factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
+}
+
 REGISTER_FUNCTION(GenerateUUIDv7)
 {
-    factory.registerFunction<FunctionGenerateUUIDv7>();
+    registerUUIDv7Generator<GlobalCounterPolicy>(factory);
+    registerUUIDv7Generator<ThreadLocalCounterPolicy>(factory);
+    registerUUIDv7Generator<FillAllRandomPolicy>(factory);
 }
-
 }
-
-
diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp
index 4f336013ca8..1745e17b5e7 100644
--- a/src/Functions/serial.cpp
+++ b/src/Functions/serial.cpp
@@ -7,6 +7,9 @@
 #include <Functions/FunctionFactory.h>
 #include <Interpreters/Context.h>
 #include "Common/Logger.h"
+#include "Common/ZooKeeper/IKeeper.h"
+#include "Common/ZooKeeper/KeeperException.h"
+#include "Common/ZooKeeper/Types.h"
 #include <Common/ZooKeeper/ZooKeeper.h>
 
 namespace DB {
@@ -15,6 +18,7 @@ namespace ErrorCodes
 {
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int KEEPER_EXCEPTION;
 }
 
 class FunctionSerial : public IFunction
@@ -69,6 +73,15 @@ public:
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
+        if (zk == nullptr) {
+            throw Exception(ErrorCodes::KEEPER_EXCEPTION,
+            "ZooKeeper is not configured for function {}",
+            getName());
+        }
+        if (zk->expired()) {
+            zk = context->getZooKeeper();
+        }
+
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
         size_t size = input_rows_count;
@@ -77,78 +90,32 @@ public:
 
         const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString();
 
-        // if serial name used first time
-        zk->createAncestors(serial_path);
-        zk->createIfNotExists(serial_path, "");
+        // CAS in ZooKeeper
+        // `get` value and version, `trySet` new with version check
+        // I didn't get how to do it with `multi`
 
         Int64 counter;
+        std::string counter_path = serial_path + "/counter";
 
-        if (zk != nullptr) {
-            // Get Lock in ZooKeeper
-            // https://zookeeper.apache.org/doc/r3.2.2/recipes.html
+        // if serial name used first time
+        zk->createAncestors(counter_path);
+        zk->createIfNotExists(counter_path, "1");
 
-            // 1.
-            if (zk->expired()) {
-                zk = context->getZooKeeper();
+        Coordination::Stat stat;
+        while (true) {
+            std::string counter_string = zk->get(counter_path, &stat);
+            counter = std::stoll(counter_string);
+            std::string updated_counter = std::to_string(counter + input_rows_count);
+            Coordination::Error err = zk->trySet(counter_path, updated_counter);
+            if (err == Coordination::Error::ZOK) {
+                // CAS is done
+                break;
             }
-
-            std::string lock_path = serial_path + "/lock-";
-            std::string path_created = zk->create(lock_path, "", zkutil::CreateMode::EphemeralSequential);
-            Int64 created_sequence_number = std::stoll(path_created.substr(lock_path.size(), path_created.size() - lock_path.size()));
-
-            while (true) {
-                // 2.
-                zkutil::Strings children = zk->getChildren(serial_path);
-
-                // 3.
-                Int64 lowest_child_sequence_number = -1;
-                for (auto& child : children) {
-                    if (child == "counter") {
-                        continue;
-                    }
-                    std::string child_suffix = child.substr(5, 10);
-                    Int64 seq_number = std::stoll(child_suffix);
-
-                    if (lowest_child_sequence_number == -1 || seq_number < lowest_child_sequence_number) {
-                        lowest_child_sequence_number = seq_number;
-                    }
-                }
-
-                if (lowest_child_sequence_number == created_sequence_number) {
-                    break;
-                    // we have a lock in ZooKeeper, now can get the counter value
-                }
-
-                // 4. and 5.
-                Int64 prev_seq_number = created_sequence_number - 1;
-                std::string to_wait_key = std::to_string(prev_seq_number);
-                while (to_wait_key.size() != 10) {
-                    to_wait_key = "0" + to_wait_key;
-                }
-
-                zk->waitForDisappear(lock_path + to_wait_key);
+            if (err != Coordination::Error::ZBADVERSION) {
+                throw Exception(ErrorCodes::KEEPER_EXCEPTION,
+                "ZooKeeper trySet operation failed with unexpected error = {} in function {}",
+                err, getName());
             }
-
-            // Now we have a lock
-            // Update counter in ZooKeeper
-            std::string counter_path = serial_path + "/counter";
-            if (zk->exists(counter_path)) {
-                std::string counter_string = zk->get(counter_path, nullptr);
-                counter = std::stoll(counter_string);
-
-                LOG_INFO(getLogger("Serial Function"), "Got counter from Zookeeper = {}", counter);
-            } else {
-                counter = 1;
-            }
-            zk->createOrUpdate(counter_path, std::to_string(counter + input_rows_count), zkutil::CreateMode::Persistent);
-
-            // Unlock = delete node created on step 1.
-            zk->deleteEphemeralNodeIfContentMatches(path_created, "");
-        } else {
-            // ZooKeeper is not available
-            // What to do?
-
-            counter = 1;
         }
 
         // Make a result
@@ -157,7 +124,6 @@ public:
             ++counter;
         }
 
-
         return col_res;
     }
 
@@ -165,7 +131,39 @@ public:
 
 REGISTER_FUNCTION(Serial)
 {
-    factory.registerFunction<FunctionSerial>();
+    factory.registerFunction<FunctionSerial>(FunctionDocumentation
+    {
+        .description=R"(
+Generates and returns sequential numbers starting from the previous counter value.
+This function takes a constant string argument - a series identifier.
+The server should be configured with a ZooKeeper.
+)",
+        .syntax = "serial(identifier)",
+        .arguments{
+            {"series identifier", "Series identifier (String)"}
+        },
+        .returned_value = "Sequential numbers of type Int64 starting from the previous counter value",
+        .examples{
+            {"first call", "SELECT serial('name')", R"(
+┌─serial('name')─┐
+│              1 │
+└────────────────┘)"},
+            {"second call", "SELECT serial('name')", R"(
+┌─serial('name')─┐
+│              2 │
+└────────────────┘)"},
+            {"column call", "SELECT *, serial('name') FROM test_table", R"(
+┌─CounterID─┬─UserID─┬─ver─┬─serial('name')─┐
+│         1 │      3 │   3 │              3 │
+│         1 │      1 │   1 │              4 │
+│         1 │      2 │   2 │              5 │
+│         1 │      5 │   5 │              6 │
+│         1 │      4 │   4 │              7 │
+└───────────┴────────┴─────┴────────────────┘
+                  )"}},
+        .categories{"Unique identifiers"}
+    });
+
 }
 
 }
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
new file mode 100644
index 00000000000..60714f4064f
--- /dev/null
+++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
@@ -0,0 +1,8 @@
+1
+2
+1	3	3	3
+1	1	1	4
+1	2	2	5
+1	5	5	6
+1	4	4	7
+1
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
new file mode 100644
index 00000000000..3eacd1ae908
--- /dev/null
+++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
@@ -0,0 +1,20 @@
+SELECT serial('x');
+SELECT serial('x');
+
+DROP TABLE IF EXISTS default.test_table;
+
+CREATE TABLE test_table
+(
+    CounterID UInt32,
+    UserID UInt32,
+    ver UInt16
+) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/1-1/test_table', 'x', ver)
+PARTITION BY CounterID
+ORDER BY (CounterID, intHash32(UserID))
+SAMPLE BY intHash32(UserID);
+
+INSERT INTO test_table VALUES (1, 1, 1), (1, 2, 2), (1, 3, 3), (1, 4, 4), (1, 5, 5);
+
+SELECT *, serial('x') FROM test_table;
+
+SELECT serial('y');
\ No newline at end of file

From 9789d130a6cad5da2941037d91c69d9d63aa2733 Mon Sep 17 00:00:00 2001
From: Danila Puzov <danila.puzov.lenovo@gmail.com>
Date: Mon, 13 May 2024 01:11:23 +0300
Subject: [PATCH 117/392] Tests and docs for generateSnowflakeID and fixes

---
 src/Functions/generateSnowflakeID.cpp         | 144 +++++++++++++-----
 src/Functions/serial.cpp                      |  36 ++---
 .../03129_serial_test_zookeeper.reference     |  15 +-
 .../03129_serial_test_zookeeper.sql           |  24 +--
 .../03130_generate_snowflake_id.reference     |   3 +
 .../03130_generate_snowflake_id.sql           |  11 ++
 6 files changed, 154 insertions(+), 79 deletions(-)
 create mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.reference
 create mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.sql

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index dd837a58325..1decda0ab46 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -1,7 +1,11 @@
-#include <Functions/FunctionFactory.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionsRandom.h>
+#include <Functions/FunctionHelpers.h>
 #include <Core/ServerUUID.h>
+#include <Common/Logger.h>
+#include <Common/logger_useful.h>
+
 
 namespace DB
 {
@@ -38,15 +42,32 @@ constexpr auto machine_seq_num_size = 12;
 constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size);
 constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size;
 constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1;
+constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
+
+Int64 getMachineID()
+{
+    auto serverUUID = ServerUUID::get();
+
+    // hash serverUUID into 64 bits
+    Int64 h = UUIDHelpers::getHighBytes(serverUUID);
+    Int64 l = UUIDHelpers::getLowBytes(serverUUID);
+    return ((h * 11) ^ (l * 17)) & machine_id_mask;
+}
+
+Int64 getTimestamp()
+{
+    const auto tm_point = std::chrono::system_clock::now();
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+            tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1);
+}
 
 }
 
 class FunctionSnowflakeID : public IFunction
 {
 private:
-    mutable std::atomic<Int64> state{0};
-    // previous snowflake id
-    // state is 1 atomic value because we don't want use mutex
+    mutable std::atomic<Int64> lowest_available_snowflake_id{0};
+    // 1 atomic value because we don't want to use mutex
 
 public:
     static constexpr auto name = "generateSnowflakeID";
@@ -58,23 +79,19 @@ public:
 
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 0; }
-
+    bool isDeterministic() const override { return false; }
     bool isDeterministicInScopeOfQuery() const override { return false; }
     bool useDefaultImplementationForNulls() const override { return false; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
     bool isVariadic() const override { return true; }
 
-    bool isStateful() const override { return true; }
-    bool isDeterministic() const override { return false; }
-
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (arguments.size() > 1) {
+        if (!arguments.empty()) {
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.",
+                "Number of arguments for function {} doesn't match: passed {}, should be 0.",
                 getName(), arguments.size());
         }
-
         return std::make_shared<DataTypeInt64>();
     }
 
@@ -83,36 +100,57 @@ public:
     {
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
-        size_t size = input_rows_count;
-        vec_to.resize(size);
+        Int64 size64 = static_cast<Int64>(input_rows_count);
+        vec_to.resize(input_rows_count);
 
-        auto serverUUID = ServerUUID::get();
+        if (input_rows_count == 0) {
+            return col_res;
+        }
 
-        // hash serverUUID into 32 bytes
-        Int64 h = UUIDHelpers::getHighBytes(serverUUID);
-        Int64 l = UUIDHelpers::getLowBytes(serverUUID);
-        Int64 machine_id = ((h * 11) ^ (l * 17)) & machine_id_mask;
+        Int64 machine_id = getMachineID();
+        Int64 current_timestamp = getTimestamp();
+        Int64 current_machine_seq_num;
 
-        for (Int64 & el : vec_to) {
-            const auto tm_point = std::chrono::system_clock::now();
-            Int64 current_timestamp = std::chrono::duration_cast<std::chrono::milliseconds>(
-                   tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1);
+        Int64 available_id, next_available_id;
+        do
+        {
+            available_id = lowest_available_snowflake_id.load();
+            Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size);
+            Int64 available_machine_seq_num = available_id & machine_seq_num_mask;
 
-            Int64 last_state, new_state;
-            do {
-                last_state = state.load();
-                Int64 last_timestamp = (last_state & timestamp_mask) >> (machine_id_size + machine_seq_num_size);
-                Int64 machine_seq_num = last_state & machine_seq_num_mask;
+            if (current_timestamp > available_timestamp)
+            {
+                current_machine_seq_num = 0;
+            }
+            else
+            {
+                current_timestamp = available_timestamp;
+                current_machine_seq_num = available_machine_seq_num;
+            }
 
-                if (current_timestamp == last_timestamp) {
-                    ++machine_seq_num;
-                }
-                new_state = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | machine_seq_num;
-            } while (!state.compare_exchange_strong(last_state, new_state));
-            // failed CAS     => another thread updated state
-            // successful CAS => we have unique (timestamp, machine_seq_num) on this machine
+            // calculate new `lowest_available_snowflake_id`
+            Int64 new_timestamp;
+            Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
+            if (size64 >= seq_nums_in_current_timestamp_left) {
+                new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
+            } else {
+                new_timestamp = current_timestamp;
+            }
+            Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
+            next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num;
+        }
+        while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id));
+        // failed CAS     => another thread updated `lowest_available_snowflake_id`
+        // successful CAS => we have our range of exclusive values
 
-            el = new_state;
+        for (Int64 & el : vec_to)
+        {
+            el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num;
+            if (current_machine_seq_num++ == max_machine_seq_num)
+            {
+                current_machine_seq_num = 0;
+                ++current_timestamp;
+            }
         }
 
         return col_res;
@@ -122,7 +160,41 @@ public:
 
 REGISTER_FUNCTION(GenerateSnowflakeID)
 {
-    factory.registerFunction<FunctionSnowflakeID>();
+    factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
+    {
+        .description=R"(
+Generates Snowflake ID -- unique identificators contains:
+- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
+- The middle 10 bits are the machine ID.
+- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
+
+In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
+This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.
+)",
+        .syntax = "generateSnowflakeID()",
+        .arguments{},
+        .returned_value = "Column of Int64",
+        .examples{
+            {"single call", "SELECT generateSnowflakeID();", R"(
+┌─generateSnowflakeID()─┐
+│   7195510166884597760 │
+└───────────────────────┘)"},
+            {"column call", "SELECT generateSnowflakeID() FROM numbers(10);", R"(
+┌─generateSnowflakeID()─┐
+│   7195516038159417344 │
+│   7195516038159417345 │
+│   7195516038159417346 │
+│   7195516038159417347 │
+│   7195516038159417348 │
+│   7195516038159417349 │
+│   7195516038159417350 │
+│   7195516038159417351 │
+│   7195516038159417352 │
+│   7195516038159417353 │
+└───────────────────────┘)"},
+            },
+        .categories{"Unique identifiers", "Snowflake ID"}
+    });
 }
 
 }
diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp
index 1745e17b5e7..3da2f4ce218 100644
--- a/src/Functions/serial.cpp
+++ b/src/Functions/serial.cpp
@@ -1,18 +1,11 @@
-#include <cmath>
-#include <memory>
-#include <string>
-#include <unordered_map>
+#include <Common/ZooKeeper/ZooKeeper.h>
 #include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionFactory.h>
 #include <Interpreters/Context.h>
-#include "Common/Logger.h"
-#include "Common/ZooKeeper/IKeeper.h"
-#include "Common/ZooKeeper/KeeperException.h"
-#include "Common/ZooKeeper/Types.h"
-#include <Common/ZooKeeper/ZooKeeper.h>
 
-namespace DB {
+namespace DB
+{
 
 namespace ErrorCodes
 {
@@ -62,30 +55,26 @@ public:
             throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                 "Number of arguments for function {} doesn't match: passed {}, should be 1.",
                 getName(), arguments.size());
-        if (!isStringOrFixedString(arguments[0])) {
+        if (!isStringOrFixedString(arguments[0]))
             throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                 "Type of argument for function {} doesn't match: passed {}, should be string",
                 getName(), arguments[0]->getName());
-        }
 
         return std::make_shared<DataTypeInt64>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
     {
-        if (zk == nullptr) {
+        if (zk == nullptr)
             throw Exception(ErrorCodes::KEEPER_EXCEPTION,
             "ZooKeeper is not configured for function {}",
             getName());
-        }
-        if (zk->expired()) {
+        if (zk->expired())
             zk = context->getZooKeeper();
-        }
 
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
         size_t size = input_rows_count;
-        LOG_INFO(getLogger("Serial Function"), "Size = {}", size);
         vec_to.resize(size);
 
         const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString();
@@ -102,16 +91,19 @@ public:
         zk->createIfNotExists(counter_path, "1");
 
         Coordination::Stat stat;
-        while (true) {
+        while (true)
+        {
             std::string counter_string = zk->get(counter_path, &stat);
             counter = std::stoll(counter_string);
             std::string updated_counter = std::to_string(counter + input_rows_count);
             Coordination::Error err = zk->trySet(counter_path, updated_counter);
-            if (err == Coordination::Error::ZOK) {
+            if (err == Coordination::Error::ZOK)
+            {
                 // CAS is done
                 break;
             }
-            if (err != Coordination::Error::ZBADVERSION) {
+            if (err != Coordination::Error::ZBADVERSION)
+            {
                 throw Exception(ErrorCodes::KEEPER_EXCEPTION,
                 "ZooKeeper trySet operation failed with unexpected error = {} in function {}",
                 err, getName());
@@ -119,7 +111,8 @@ public:
         }
 
         // Make a result
-        for (auto& val : vec_to) {
+        for (auto& val : vec_to)
+        {
             val = counter;
             ++counter;
         }
@@ -163,7 +156,6 @@ The server should be configured with a ZooKeeper.
                   )"}},
         .categories{"Unique identifiers"}
     });
-
 }
 
 }
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
index 60714f4064f..479030db4be 100644
--- a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
+++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
@@ -1,8 +1,13 @@
 1
 2
-1	3	3	3
-1	1	1	4
-1	2	2	5
-1	5	5	6
-1	4	4	7
 1
+3
+4
+5
+6
+7
+1	1
+2	2
+3	3
+4	4
+5	5
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
index 3eacd1ae908..c3395009477 100644
--- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
+++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
@@ -1,20 +1,12 @@
+-- Tags: zookeeper
+
 SELECT serial('x');
 SELECT serial('x');
+SELECT serial('y');
+SELECT serial('x') FROM numbers(5);
 
-DROP TABLE IF EXISTS default.test_table;
+SELECT serial(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT serial('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT serial(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
-CREATE TABLE test_table
-(
-    CounterID UInt32,
-    UserID UInt32,
-    ver UInt16
-) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/1-1/test_table', 'x', ver)
-PARTITION BY CounterID
-ORDER BY (CounterID, intHash32(UserID))
-SAMPLE BY intHash32(UserID);
-
-INSERT INTO test_table VALUES (1, 1, 1), (1, 2, 2), (1, 3, 3), (1, 4, 4), (1, 5, 5);
-
-SELECT *, serial('x') FROM test_table;
-
-SELECT serial('y');
\ No newline at end of file
+SELECT serial('z'), serial('z') FROM numbers(5);
diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.reference b/tests/queries/0_stateless/03130_generate_snowflake_id.reference
new file mode 100644
index 00000000000..2049ba26379
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generate_snowflake_id.reference
@@ -0,0 +1,3 @@
+1
+1
+10
diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.sql b/tests/queries/0_stateless/03130_generate_snowflake_id.sql
new file mode 100644
index 00000000000..669814c9ecb
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generate_snowflake_id.sql
@@ -0,0 +1,11 @@
+SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0;
+SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0;
+
+SELECT generateSnowflakeID(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT count(*)
+FROM
+(
+    SELECT DISTINCT generateSnowflakeID()
+    FROM numbers(10)
+)
\ No newline at end of file

From 8e63d2f795d4e653ff4885212919725a7bb6a074 Mon Sep 17 00:00:00 2001
From: v01dxyz <v01dxyz@users.noreply.github.com>
Date: Mon, 13 May 2024 09:21:01 +0200
Subject: [PATCH 118/392] Compress STDOUT if redirected to file with a
 compression extension

* Add a new member to ClientBase: default_output_compression_method
* Move the code to get file path from file descriptor to a separate
  Common function.

The stateless test is almost a copy-paste of 02001_compress_output_file.

Fixes https://github.com/ClickHouse/ClickHouse/issues/63496
---
 programs/client/Client.cpp                    |  2 +-
 programs/local/LocalServer.cpp                |  2 +-
 src/Client/ClientBase.cpp                     | 10 +++++-
 src/Client/ClientBase.h                       |  3 +-
 src/Common/tryGetFileNameByFileDescriptor.cpp | 33 +++++++++++++++++++
 src/Common/tryGetFileNameByFileDescriptor.h   | 10 ++++++
 src/Formats/FormatFactory.cpp                 | 22 ++++---------
 .../03144_compress_stdout.reference           |  2 ++
 .../0_stateless/03144_compress_stdout.sh      | 23 +++++++++++++
 9 files changed, 88 insertions(+), 19 deletions(-)
 create mode 100644 src/Common/tryGetFileNameByFileDescriptor.cpp
 create mode 100644 src/Common/tryGetFileNameByFileDescriptor.h
 create mode 100644 tests/queries/0_stateless/03144_compress_stdout.reference
 create mode 100755 tests/queries/0_stateless/03144_compress_stdout.sh

diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 396cd3e646b..9ae5dd735ed 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -1178,7 +1178,7 @@ void Client::processConfig()
 
     pager = config().getString("pager", "");
 
-    setDefaultFormatsFromConfiguration();
+    setDefaultFormatsAndCompressionFromConfiguration();
 
     global_context->setClientName(std::string(DEFAULT_CLIENT_NAME));
     global_context->setQueryKindInitial();
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 5f2a51406e1..f18c0306254 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -611,7 +611,7 @@ void LocalServer::processConfig()
     if (config().has("macros"))
         global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
 
-    setDefaultFormatsFromConfiguration();
+    setDefaultFormatsAndCompressionFromConfiguration();
 
     /// Sets external authenticators config (LDAP, Kerberos).
     global_context->setExternalAuthenticatorsConfig(config());
diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp
index bd4430648c5..61d95e6eb4c 100644
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@@ -21,6 +21,7 @@
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/NetException.h>
+#include <Common/tryGetFileNameByFileDescriptor.h>
 #include <Columns/ColumnString.h>
 #include <Columns/ColumnsNumber.h>
 #include <Formats/FormatFactory.h>
@@ -643,6 +644,9 @@ try
         bool extras_into_stdout = need_render_progress || logs_into_stdout;
         bool select_only_into_file = select_into_file && !select_into_file_and_stdout;
 
+        if (!out_file_buf && default_output_compression_method != CompressionMethod::None)
+            out_file_buf = wrapWriteBufferWithCompressionMethod(out_buf, default_output_compression_method, 3, 0);
+
         /// It is not clear how to write progress and logs
         /// intermixed with data with parallel formatting.
         /// It may increase code complexity significantly.
@@ -735,7 +739,7 @@ bool ClientBase::isRegularFile(int fd)
     return fstat(fd, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
 }
 
-void ClientBase::setDefaultFormatsFromConfiguration()
+void ClientBase::setDefaultFormatsAndCompressionFromConfiguration()
 {
     if (config().has("output-format"))
     {
@@ -759,6 +763,10 @@ void ClientBase::setDefaultFormatsFromConfiguration()
             default_output_format = *format_from_file_name;
         else
             default_output_format = "TSV";
+
+        std::optional<String> file_name = tryGetFileNameFromFileDescriptor(STDOUT_FILENO);
+        if (file_name)
+            default_output_compression_method = chooseCompressionMethod(*file_name, "");
     }
     else if (is_interactive)
     {
diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h
index 64cbdbe8989..7a0489641c8 100644
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@@ -190,7 +190,7 @@ protected:
     /// Adjust some settings after command line options and config had been processed.
     void adjustSettings();
 
-    void setDefaultFormatsFromConfiguration();
+    void setDefaultFormatsAndCompressionFromConfiguration();
 
     void initTTYBuffer(ProgressOption progress);
 
@@ -224,6 +224,7 @@ protected:
     String pager;
 
     String default_output_format; /// Query results output format.
+    CompressionMethod default_output_compression_method = CompressionMethod::None;
     String default_input_format; /// Tables' format for clickhouse-local.
 
     bool select_into_file = false; /// If writing result INTO OUTFILE. It affects progress rendering.
diff --git a/src/Common/tryGetFileNameByFileDescriptor.cpp b/src/Common/tryGetFileNameByFileDescriptor.cpp
new file mode 100644
index 00000000000..47e81050388
--- /dev/null
+++ b/src/Common/tryGetFileNameByFileDescriptor.cpp
@@ -0,0 +1,33 @@
+#include <Common/tryGetFileNameByFileDescriptor.h>
+
+#ifdef OS_LINUX
+#    include <unistd.h>
+#elif defined(OS_DARWIN)
+#    include <fcntl.h>
+#endif
+
+#include <fmt/format.h>
+
+
+namespace DB
+{
+std::optional<String> tryGetFileNameFromFileDescriptor(int fd)
+{
+#ifdef OS_LINUX
+    std::string proc_path = fmt::format("/proc/self/fd/{}", fd);
+    char file_path[PATH_MAX] = {'\0'};
+    if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1)
+        return file_path;
+    return std::nullopt;
+#elif defined(OS_DARWIN)
+    char file_path[PATH_MAX] = {'\0'};
+    if (fcntl(fd, F_GETPATH, file_path) != -1)
+        return file_path;
+    return std::nullopt;
+#else
+    (void)fd;
+    return std::nullopt;
+#endif
+}
+
+}
diff --git a/src/Common/tryGetFileNameByFileDescriptor.h b/src/Common/tryGetFileNameByFileDescriptor.h
new file mode 100644
index 00000000000..c38ccb4f851
--- /dev/null
+++ b/src/Common/tryGetFileNameByFileDescriptor.h
@@ -0,0 +1,10 @@
+#pragma once
+
+#include <optional>
+#include <base/types.h>
+
+namespace DB
+{
+/// Supports only Linux/MacOS. On other platforms, returns nullopt.
+std::optional<String> tryGetFileNameFromFileDescriptor(int fd);
+}
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index b7e9899da46..783daba44fd 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -1,6 +1,7 @@
 #include <Formats/FormatFactory.h>
 
 #include <algorithm>
+#include <unistd.h>
 #include <Formats/FormatSettings.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ProcessList.h>
@@ -15,7 +16,7 @@
 #include <Poco/URI.h>
 #include <Common/Exception.h>
 #include <Common/KnownObjectNames.h>
-#include <unistd.h>
+#include <Common/tryGetFileNameByFileDescriptor.h>
 
 #include <boost/algorithm/string/case_conv.hpp>
 
@@ -692,21 +693,12 @@ String FormatFactory::getFormatFromFileName(String file_name)
 
 std::optional<String> FormatFactory::tryGetFormatFromFileDescriptor(int fd)
 {
-#ifdef OS_LINUX
-    std::string proc_path = fmt::format("/proc/self/fd/{}", fd);
-    char file_path[PATH_MAX] = {'\0'};
-    if (readlink(proc_path.c_str(), file_path, sizeof(file_path) - 1) != -1)
-        return tryGetFormatFromFileName(file_path);
+    std::optional<String> file_name = tryGetFileNameFromFileDescriptor(fd);
+
+    if (file_name)
+        return tryGetFormatFromFileName(*file_name);
+
     return std::nullopt;
-#elif defined(OS_DARWIN)
-    char file_path[PATH_MAX] = {'\0'};
-    if (fcntl(fd, F_GETPATH, file_path) != -1)
-        return tryGetFormatFromFileName(file_path);
-    return std::nullopt;
-#else
-    (void)fd;
-    return std::nullopt;
-#endif
 }
 
 String FormatFactory::getFormatFromFileDescriptor(int fd)
diff --git a/tests/queries/0_stateless/03144_compress_stdout.reference b/tests/queries/0_stateless/03144_compress_stdout.reference
new file mode 100644
index 00000000000..6f51dfc24e1
--- /dev/null
+++ b/tests/queries/0_stateless/03144_compress_stdout.reference
@@ -0,0 +1,2 @@
+Hello, World! From client.
+Hello, World! From local.
diff --git a/tests/queries/0_stateless/03144_compress_stdout.sh b/tests/queries/0_stateless/03144_compress_stdout.sh
new file mode 100755
index 00000000000..569754303a7
--- /dev/null
+++ b/tests/queries/0_stateless/03144_compress_stdout.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+set -e
+
+[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_client.gz
+
+${CLICKHOUSE_CLIENT} --query "SELECT * FROM (SELECT 'Hello, World! From client.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz
+gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client.gz
+cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client
+
+rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_client"
+
+[ -e "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz ] && rm "${CLICKHOUSE_TMP}"/test_compression_of_output_file_from_local.gz
+
+${CLICKHOUSE_LOCAL} --query "SELECT * FROM (SELECT 'Hello, World! From local.')" > ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz
+gunzip ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local.gz
+cat ${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local
+
+rm -f "${CLICKHOUSE_TMP}/test_compression_of_output_file_from_local"

From 6a94ba370a3a294f7f2b1471214be6ecfd6eaa7b Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Mon, 13 May 2024 09:43:03 +0200
Subject: [PATCH 119/392] Fix clang-tidy errors

---
 src/Functions/FunctionsHashing.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h
index bccdba5ee69..1091ec6c86f 100644
--- a/src/Functions/FunctionsHashing.h
+++ b/src/Functions/FunctionsHashing.h
@@ -94,8 +94,8 @@ namespace impl
                 i = 0;
             if (offsets != nullptr)
             {
-                const auto begin = offsets->begin();
-                auto upper = std::upper_bound(begin, offsets->end(), i);
+                const auto *const begin = offsets->begin();
+                const auto * upper = std::upper_bound(begin, offsets->end(), i);
                 if (upper == offsets->end())
                     throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i);
                 i = upper - begin;

From f1f668e7df24190eaf4f1d67360b9e53099289d2 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Fri, 10 May 2024 14:15:01 +0200
Subject: [PATCH 120/392] Setup node generator initial

---
 utils/keeper-bench/Runner.cpp | 288 ++++++++++++++++++++++++++++++----
 utils/keeper-bench/Runner.h   |   3 +
 utils/keeper-bench/main.cpp   |   2 +
 3 files changed, 265 insertions(+), 28 deletions(-)

diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index a893dac3851..0050230b6ec 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -1,17 +1,22 @@
 #include "Runner.h"
 #include <atomic>
-#include <condition_variable>
 #include <Poco/Util/AbstractConfiguration.h>
 
+#include <Coordination/CoordinationSettings.h>
+#include <Coordination/KeeperContext.h>
+#include <Coordination/KeeperStorage.h>
 #include "Common/ConcurrentBoundedQueue.h"
+#include "Common/Exception.h"
 #include "Common/ZooKeeper/IKeeper.h"
 #include "Common/ZooKeeper/ZooKeeperArgs.h"
 #include "Common/ZooKeeper/ZooKeeperCommon.h"
 #include "Common/ZooKeeper/ZooKeeperConstants.h"
 #include <Common/EventNotifier.h>
 #include <Common/Config/ConfigProcessor.h>
+#include "Coordination/KeeperSnapshotManager.h"
 #include "Core/ColumnWithTypeAndName.h"
 #include "Core/ColumnsWithTypeAndName.h"
+#include <Disks/DiskLocal.h>
 #include "IO/ReadBuffer.h"
 #include "IO/ReadBufferFromFile.h"
 #include "base/Decimal.h"
@@ -43,12 +48,14 @@ Runner::Runner(
         std::optional<size_t> concurrency_,
         const std::string & config_path,
         const std::string & input_request_log_,
+        const std::string & setup_nodes_snapshot_path_,
         const Strings & hosts_strings_,
         std::optional<double> max_time_,
         std::optional<double> delay_,
         std::optional<bool> continue_on_error_,
         std::optional<size_t> max_iterations_)
         : input_request_log(input_request_log_)
+        , setup_nodes_snapshot_path(setup_nodes_snapshot_path_)
         , info(std::make_shared<Stats>())
 {
 
@@ -381,18 +388,18 @@ struct ZooKeeperRequestBlock
 {
     explicit ZooKeeperRequestBlock(DB::Block block_)
         : block(std::move(block_))
-        , hostname_idx(block.getPositionByName("hostname")) //
-        , request_event_time_idx(block.getPositionByName("request_event_time")) //
-        , thread_id_idx(block.getPositionByName("thread_id")) //
-        , session_id_idx(block.getPositionByName("session_id")) //
-        , xid_idx(block.getPositionByName("xid")) //
+        , hostname_idx(block.getPositionByName("hostname"))
+        , request_event_time_idx(block.getPositionByName("request_event_time"))
+        , thread_id_idx(block.getPositionByName("thread_id"))
+        , session_id_idx(block.getPositionByName("session_id"))
+        , xid_idx(block.getPositionByName("xid"))
         , has_watch_idx(block.getPositionByName("has_watch"))
         , op_num_idx(block.getPositionByName("op_num"))
         , path_idx(block.getPositionByName("path"))
         , data_idx(block.getPositionByName("data"))
         , is_ephemeral_idx(block.getPositionByName("is_ephemeral"))
         , is_sequential_idx(block.getPositionByName("is_sequential"))
-        , response_event_time_idx(block.getPositionByName("response_event_time")) //
+        , response_event_time_idx(block.getPositionByName("response_event_time"))
         , error_idx(block.getPositionByName("error"))
         , requests_size_idx(block.getPositionByName("requests_size"))
         , version_idx(block.getPositionByName("version"))
@@ -519,6 +526,7 @@ struct RequestFromLog
 {
     Coordination::ZooKeeperRequestPtr request;
     std::optional<Coordination::Error> expected_result;
+    std::vector<std::optional<Coordination::Error>> subrequest_expected_results;
     int64_t session_id = 0;
     size_t executor_id = 0;
     bool has_watch = false;
@@ -586,7 +594,6 @@ struct ZooKeeperRequestFromLogReader
             idx_in_block = 0;
         }
 
-
         request_from_log.expected_result = current_block->getError(idx_in_block);
         request_from_log.session_id = current_block->getSessionId(idx_in_block);
         request_from_log.has_watch = current_block->hasWatch(idx_in_block);
@@ -693,6 +700,12 @@ struct ZooKeeperRequestFromLogReader
                     if (!subrequest_from_log)
                         throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to fetch subrequest for {}, subrequest index {}", op_num, i);
 
+                    if (!subrequest_from_log->expected_result && request_from_log.expected_result
+                        && request_from_log.expected_result == Coordination::Error::ZOK)
+                    {
+                        subrequest_from_log->expected_result = Coordination::Error::ZOK;
+                    }
+
                     requests.push_back(std::move(subrequest_from_log->request));
 
                     if (subrequest_from_log->session_id != request_from_log.session_id)
@@ -700,6 +713,8 @@ struct ZooKeeperRequestFromLogReader
 
                     if (subrequest_from_log->executor_id != request_from_log.executor_id)
                         throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Executor id mismatch for subrequest in {}, subrequest index {}", op_num, i);
+
+                    request_from_log.subrequest_expected_results.push_back(subrequest_from_log->expected_result);
                 }
 
                 request_from_log.request = std::make_shared<Coordination::ZooKeeperMultiRequest>(requests, default_acls);
@@ -731,7 +746,6 @@ private:
 namespace
 {
 
-
 struct RequestFromLogStats
 {
     struct Stats
@@ -744,6 +758,192 @@ struct RequestFromLogStats
     Stats read_requests;
 };
 
+struct SetupNodeCollector
+{
+    explicit SetupNodeCollector(const std::string & setup_nodes_snapshot_path)
+    {
+        if (setup_nodes_snapshot_path.empty())
+            return;
+
+        keeper_context = std::make_shared<DB::KeeperContext>(true, std::make_shared<Coordination::CoordinationSettings>());
+        keeper_context->setDigestEnabled(true);
+        keeper_context->setSnapshotDisk(
+            std::make_shared<DB::DiskLocal>("Keeper-snapshots", setup_nodes_snapshot_path));
+
+        snapshot_manager.emplace(1, keeper_context);
+        auto snapshot_result = snapshot_manager->restoreFromLatestSnapshot();
+        if (snapshot_result.storage == nullptr)
+        {
+            std::cerr << "No initial snapshot found" << std::endl;
+            initial_storage = std::make_unique<Coordination::KeeperStorage>(
+                /* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);
+            initial_storage->initializeSystemNodes();
+        }
+        else
+        {
+            std::cerr << "Loaded initial nodes from snapshot" << std::endl;
+            initial_storage = std::move(snapshot_result.storage);
+        }
+    }
+
+    void processRequest(const RequestFromLog & request_from_log)
+    {
+        if (!request_from_log.expected_result.has_value())
+            return;
+
+        auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result)
+        {
+            const auto & path = request.getPath();
+            if (processed_paths.contains(path))
+                return;
+
+            auto op_num = request.getOpNum();
+
+            if (op_num == Coordination::OpNum::Create)
+            {
+                if (expected_result == Coordination::Error::ZNODEEXISTS)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+                else if (expected_result == Coordination::Error::ZOK)
+                {
+                    /// we need to make sure ancestors exist
+                    auto position = path.find_last_of('/');
+                    if (position != 0)
+                    {
+                        auto parent_path = path.substr(0, position);
+                        if (!processed_paths.contains(parent_path))
+                        {
+                            addExpectedNode(parent_path);
+                            processed_paths.insert(parent_path);
+                        }
+                    }
+
+                    processed_paths.insert(path);
+                }
+            }
+            else if (op_num == Coordination::OpNum::Remove)
+            {
+                if (expected_result == Coordination::Error::ZOK)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+            }
+            else if (op_num == Coordination::OpNum::Set)
+            {
+                if (expected_result == Coordination::Error::ZOK)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+            }
+            else if (op_num == Coordination::OpNum::Check)
+            {
+                if (expected_result == Coordination::Error::ZOK)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+            }
+            else if (op_num == Coordination::OpNum::CheckNotExists)
+            {
+                if (expected_result == Coordination::Error::ZNODEEXISTS)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+            }
+            else if (request.isReadRequest())
+            {
+                if (expected_result == Coordination::Error::ZOK)
+                {
+                    addExpectedNode(path);
+                    processed_paths.insert(path);
+                }
+            }
+        };
+
+        const auto & request = request_from_log.request;
+        if (request->getOpNum() == Coordination::OpNum::Multi || request->getOpNum() == Coordination::OpNum::MultiRead)
+        {
+            const auto & multi_request = dynamic_cast<const Coordination::ZooKeeperMultiRequest &>(*request);
+            const auto & subrequests = multi_request.requests;
+
+            for (size_t i = 0; i < subrequests.size(); ++i)
+            {
+                const auto & zookeeper_request = dynamic_cast<const Coordination::ZooKeeperRequest &>(*subrequests[i]);
+                const auto subrequest_expected_result = request_from_log.subrequest_expected_results[i];
+                if (subrequest_expected_result.has_value())
+                    process_request(zookeeper_request, *subrequest_expected_result);
+
+            }
+        }
+        else
+            process_request(*request, *request_from_log.expected_result);
+    }
+
+    void addExpectedNode(const std::string & path)
+    {
+        std::lock_guard lock(nodes_mutex);
+
+        if (initial_storage->container.contains(path))
+            return;
+
+        std::cerr << "Adding expected node " << path << std::endl;
+
+        Coordination::Requests create_ops;
+
+        size_t pos = 1;
+        while (true)
+        {
+            pos = path.find('/', pos);
+            if (pos == std::string::npos)
+                break;
+
+            auto request = zkutil::makeCreateRequest(path.substr(0, pos), "", zkutil::CreateMode::Persistent, true);
+            create_ops.emplace_back(request);
+            ++pos;
+        }
+
+        auto request = zkutil::makeCreateRequest(path, "", zkutil::CreateMode::Persistent, true);
+        create_ops.emplace_back(request);
+
+        auto next_zxid = initial_storage->getNextZXID();
+
+        static Coordination::ACLs default_acls = []
+        {
+            Coordination::ACL acl;
+            acl.permissions = Coordination::ACL::All;
+            acl.scheme = "world";
+            acl.id = "anyone";
+            return Coordination::ACLs{std::move(acl)};
+        }();
+
+        auto multi_create_request = std::make_shared<Coordination::ZooKeeperMultiRequest>(create_ops, default_acls);
+        initial_storage->preprocessRequest(multi_create_request, 1, 0, next_zxid, /* check_acl = */ false);
+        auto responses = initial_storage->processRequest(multi_create_request, 1, next_zxid, /* check_acl = */ false);
+        if (responses.size() > 1 || responses[0].response->error != Coordination::Error::ZOK)
+            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid response after trying to create a node {}", responses[0].response->error);
+    }
+
+    void generateSnapshot()
+    {
+        std::cerr << "Generating snapshot with starting data" << std::endl;
+        std::lock_guard lock(nodes_mutex);
+        DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(initial_storage->getZXID(), 1, std::make_shared<nuraft::cluster_config>());
+        DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta);
+        snapshot_manager->serializeSnapshotToDisk(snapshot);
+    }
+
+    std::mutex nodes_mutex;
+    DB::KeeperContextPtr keeper_context;
+    Coordination::KeeperStoragePtr initial_storage;
+    std::unordered_set<std::string> processed_paths;
+    std::optional<Coordination::KeeperSnapshotManager> snapshot_manager;
+};
+
 void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type)
 {
     std::cerr << fmt::format(
@@ -751,7 +951,7 @@ void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_f
         type,
         stats_for_type.total,
         stats_for_type.unexpected_results,
-        static_cast<double>(stats_for_type.unexpected_results) / stats_for_type.total * 100)
+        stats_for_type.total != 0 ? static_cast<double>(stats_for_type.unexpected_results) / stats_for_type.total * 100 : 0.0)
               << std::endl;
 };
 
@@ -763,24 +963,40 @@ void requestFromLogExecutor(std::shared_ptr<ConcurrentBoundedQueue<RequestFromLo
     {
         auto request_promise = std::make_shared<std::promise<void>>();
         last_request = request_promise->get_future();
-        Coordination::ResponseCallback callback
-            = [&, request_promise, request = request_from_log.request, expected_result = request_from_log.expected_result](
-                  const Coordination::Response & response) mutable
+        Coordination::ResponseCallback callback = [&,
+                                                   request_promise,
+                                                   request = request_from_log.request,
+                                                   expected_result = request_from_log.expected_result,
+                                                   subrequest_expected_results = std::move(request_from_log.subrequest_expected_results)](
+                                                      const Coordination::Response & response) mutable
         {
             auto & stats = request->isReadRequest() ? request_stats.read_requests : request_stats.write_requests;
 
             stats.total.fetch_add(1, std::memory_order_relaxed);
 
-            if (*expected_result != response.error)
-                stats.unexpected_results.fetch_add(1, std::memory_order_relaxed);
+            if (expected_result)
+            {
+                if (*expected_result != response.error)
+                    stats.unexpected_results.fetch_add(1, std::memory_order_relaxed);
 
-            //if (!expected_result)
-            //    return;
+                if (*expected_result != response.error)
+                {
+                    std::cerr << fmt::format(
+                        "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result)
+                              << std::endl;
 
-            //if (*expected_result != response.error)
-            //    std::cerr << fmt::format(
-            //        "Unexpected result for {}, got {}, expected {}", request->getOpNum(), response.error, *expected_result)
-            //              << std::endl;
+                    if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
+                    {
+                        std::string subresponses;
+                        for (size_t i = 0; i < multi_response->responses.size(); ++i)
+                        {
+                            subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error);
+                        }
+
+                        std::cerr << "Subresponses\n" << subresponses << std::endl;
+                    }
+                }
+            }
 
             request_promise->set_value();
         };
@@ -827,6 +1043,9 @@ void Runner::runBenchmarkFromLog()
 
     RequestFromLogStats stats;
 
+    std::optional<SetupNodeCollector> setup_nodes_collector;
+    if (!setup_nodes_snapshot_path.empty())
+        setup_nodes_collector.emplace(setup_nodes_snapshot_path);
 
     std::unordered_map<uint64_t, std::shared_ptr<ConcurrentBoundedQueue<RequestFromLog>>> executor_id_to_queue;
 
@@ -850,7 +1069,7 @@ void Runner::runBenchmarkFromLog()
             return;
         }
 
-        auto executor_queue = std::make_shared<ConcurrentBoundedQueue<RequestFromLog>>(std::numeric_limits<uint64_t>().max());
+        auto executor_queue = std::make_shared<ConcurrentBoundedQueue<RequestFromLog>>(std::numeric_limits<uint64_t>::max());
         executor_id_to_queue.emplace(request.executor_id, executor_queue);
         auto scheduled = pool->trySchedule([&, executor_queue]() mutable
         {
@@ -865,6 +1084,7 @@ void Runner::runBenchmarkFromLog()
             throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to push to the executor's queue");
     };
 
+    if (!setup_nodes_collector)
     {
         auto setup_connection = getConnection(connection_infos[0], 0);
         benchmark_context.startup(*setup_connection);
@@ -875,14 +1095,26 @@ void Runner::runBenchmarkFromLog()
     delay_watch.restart();
     while (auto request_from_log = request_reader.getNextRequest())
     {
-        request_from_log->connection = get_zookeeper_connection(request_from_log->session_id);
-        push_request(std::move(*request_from_log));
+        if (setup_nodes_collector)
+        {
+            setup_nodes_collector->processRequest(*request_from_log);
+        }
+        else
+        {
+            request_from_log->connection = get_zookeeper_connection(request_from_log->session_id);
+            push_request(std::move(*request_from_log));
+        }
 
         if (delay > 0 && delay_watch.elapsedSeconds() > delay)
         {
-            dumpStats("Write", stats.write_requests);
-            dumpStats("Read", stats.read_requests);
-            std::cerr << std::endl;
+            if (setup_nodes_collector)
+                setup_nodes_collector->generateSnapshot();
+            else
+            {
+                dumpStats("Write", stats.write_requests);
+                dumpStats("Read", stats.read_requests);
+                std::cerr << std::endl;
+            }
             delay_watch.restart();
         }
     }
@@ -906,7 +1138,7 @@ void Runner::runBenchmarkWithGenerator()
         for (size_t i = 0; i < concurrency; ++i)
         {
             auto thread_connections = connections;
-            pool->scheduleOrThrowOnError([this, connections_ = std::move(thread_connections)]() mutable { thread(connections_); });
+            pool->scheduleOrThrowOnError([this, my_connections = std::move(thread_connections)]() mutable { thread(my_connections); });
         }
     }
     catch (...)
diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h
index 0c646eb2166..c19a4d82898 100644
--- a/utils/keeper-bench/Runner.h
+++ b/utils/keeper-bench/Runner.h
@@ -27,6 +27,7 @@ public:
 
     void startup(Coordination::ZooKeeper & zookeeper);
     void cleanup(Coordination::ZooKeeper & zookeeper);
+
 private:
     struct Node
     {
@@ -54,6 +55,7 @@ public:
         std::optional<size_t> concurrency_,
         const std::string & config_path,
         const std::string & input_request_log_,
+        const std::string & setup_nodes_snapshot_path_,
         const Strings & hosts_strings_,
         std::optional<double> max_time_,
         std::optional<double> delay_,
@@ -96,6 +98,7 @@ private:
     std::shared_ptr<Coordination::ZooKeeper> getConnection(const ConnectionInfo & connection_info, size_t connection_info_idx);
 
     std::string input_request_log;
+    std::string setup_nodes_snapshot_path;
 
     size_t concurrency = 1;
 
diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp
index 45fc28f3bca..0b963abf406 100644
--- a/utils/keeper-bench/main.cpp
+++ b/utils/keeper-bench/main.cpp
@@ -38,6 +38,7 @@ int main(int argc, char *argv[])
             ("help",                                                                         "produce help message")
             ("config",            value<std::string>()->default_value(""),                      "yaml/xml file containing configuration")
             ("input-request-log", value<std::string>()->default_value(""),                      "log of requests that will be replayed")
+            ("setup-nodes-snapshot-path", value<std::string>()->default_value(""),                      "directory containing snapshots with starting state")
             ("concurrency,c",     value<unsigned>(),                                            "number of parallel queries")
             ("report-delay,d",    value<double>(),                                              "delay between intermediate reports in seconds (set 0 to disable reports)")
             ("iterations,i",      value<size_t>(),                                              "amount of queries to be executed")
@@ -60,6 +61,7 @@ int main(int argc, char *argv[])
         Runner runner(valueToOptional<unsigned>(options["concurrency"]),
                       options["config"].as<std::string>(),
                       options["input-request-log"].as<std::string>(),
+                      options["setup-nodes-snapshot-path"].as<std::string>(),
                       options["hosts"].as<Strings>(),
                       valueToOptional<double>(options["time-limit"]),
                       valueToOptional<double>(options["report-delay"]),

From 4653ec618d117f840cec5ba8c6d95895f0bbf4af Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 13 May 2024 13:43:47 +0000
Subject: [PATCH 121/392] Add more tests and documentation, fix existing tests
 and special build

---
 docs/en/sql-reference/data-types/dynamic.md   |  86 ++++++++-
 src/Columns/ColumnDynamic.cpp                 |   7 +
 src/DataTypes/DataTypeDynamic.h               |   2 +-
 ...9_dynamic_all_merge_algorithms_1.reference |  14 +-
 ... => 03040_dynamic_type_alters_1.reference} |   0
 ...ters.sh => 03040_dynamic_type_alters_1.sh} |   3 +-
 .../03040_dynamic_type_alters_2.reference     | 182 ++++++++++++++++++
 .../03040_dynamic_type_alters_2.sh            |  57 ++++++
 .../03041_dynamic_type_check_table.reference  |  56 ++++++
 .../03041_dynamic_type_check_table.sh         |  45 +++++
 10 files changed, 442 insertions(+), 10 deletions(-)
 rename tests/queries/0_stateless/{03040_dynamic_type_alters.reference => 03040_dynamic_type_alters_1.reference} (100%)
 rename tests/queries/0_stateless/{03040_dynamic_type_alters.sh => 03040_dynamic_type_alters_1.sh} (57%)
 create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2.reference
 create mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters_2.sh
 create mode 100644 tests/queries/0_stateless/03041_dynamic_type_check_table.reference
 create mode 100755 tests/queries/0_stateless/03041_dynamic_type_check_table.sh

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
index e3cade25b55..a2c8ba532ce 100644
--- a/docs/en/sql-reference/data-types/dynamic.md
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -261,7 +261,7 @@ SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM te
 └─────────┴────────────────┴─────────┴─────────────────┘
 ```
 
-## Reading Variant type from the data
+## Reading Dynamic type from the data
 
 All text formats (TSV, CSV, CustomSeparated, Values, JSONEachRow, etc) supports reading `Dynamic` type. During data parsing ClickHouse tries to infer the type of each value and use it during insertion to `Dynamic` column. 
 
@@ -409,3 +409,87 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
 └─────┴────────────────┘
 ```
 
+## Reaching the limit in number of different data types stored inside Dynamic
+
+`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic).
+When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values.
+
+Let's see what happens when the limit is reached in different scenarios.
+
+### Reaching the limit during data parsing
+
+During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values:
+
+```sql
+SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
+{"d" : 42}
+{"d" : [1, 2, 3]}
+{"d" : "Hello, World!"}
+{"d" : "2020-01-01"}
+{"d" : ["str1", "str2", "str3"]}
+{"d" : {"a" : 1, "b" : [1, 2, 3]}}
+')
+```
+
+```text
+┌─d──────────────────────────┬─dynamicType(d)─┐
+│ 42                         │ Int64          │
+│ [1,2,3]                    │ Array(Int64)   │
+│ Hello, World!              │ String         │
+│ 2020-01-01                 │ String         │
+│ ["str1", "str2", "str3"]   │ String         │
+│ {"a" : 1, "b" : [1, 2, 3]} │ String         │
+└────────────────────────────┴────────────────┘
+```
+
+As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`.
+
+### During merges of data parts in MergeTree table engines
+
+During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts.
+In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`.  In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation.
+
+Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types:
+
+```sql
+CREATE TABLE test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree ORDER BY id;
+SYSTEM STOP MERGES test;
+INSERT INTO test SELECT number, number FROM numbers(5);
+INSERT INTO test SELECT number, range(number) FROM numbers(4);
+INSERT INTO test SELECT number, toDate(number) FROM numbers(3);
+INSERT INTO test SELECT number, map(number, number) FROM numbers(2);
+INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1);
+```
+
+Each insert will create a separate data pert with `Dynamic` column containing single type:
+```sql
+SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
+```
+
+```text
+┌─count()─┬─dynamicType(d)──────┬─_part─────┐
+│       5 │ UInt64              │ all_1_1_0 │
+│       4 │ Array(UInt64)       │ all_2_2_0 │
+│       3 │ Date                │ all_3_3_0 │
+│       2 │ Map(UInt64, UInt64) │ all_4_4_0 │
+│       1 │ String              │ all_5_5_0 │
+└─────────┴─────────────────────┴───────────┘
+```
+
+Now, let's merge all parts into one and see what will happen:
+
+```sql
+SYSTEM START MERGES test;
+OPTIMIZE TABLE test FINAL;
+SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
+```
+
+```text
+┌─count()─┬─dynamicType(d)─┬─_part─────┐
+│       5 │ UInt64         │ all_1_5_2 │
+│       6 │ String         │ all_1_5_2 │
+│       4 │ Array(UInt64)  │ all_1_5_2 │
+└─────────┴────────────────┴───────────┘
+```
+
+As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 76f536a3409..0f247638d92 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -290,6 +290,13 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size
     /// We cannot combine 2 Variant types as total number of variants exceeds the limit.
     /// In this case we will add most frequent variants from this range and insert them as usual,
     /// all other variants will be converted to String.
+    /// TODO: instead of keeping all current variants and just adding new most frequent variants
+    ///       from source columns we can also try to replace rarest existing variants with frequent
+    ///       variants from source column (so we will avoid casting new frequent variants to String
+    ///       and keeping rare existing ones). It will require rewriting of existing data in Variant
+    ///       column but will improve usability of Dynamic column for example during squashing blocks
+    ///       during insert.
+
     const auto & src_variant_column = dynamic_src.getVariantColumn();
 
     /// Calculate ranges for each variant in current range.
diff --git a/src/DataTypes/DataTypeDynamic.h b/src/DataTypes/DataTypeDynamic.h
index bd3d822fbb6..d5e4c5261ce 100644
--- a/src/DataTypes/DataTypeDynamic.h
+++ b/src/DataTypes/DataTypeDynamic.h
@@ -12,7 +12,7 @@ class DataTypeDynamic final : public IDataType
 public:
     static constexpr bool is_parametric = true;
 
-    DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES);
+    explicit DataTypeDynamic(size_t max_dynamic_types_ = DEFAULT_MAX_DYNAMIC_TYPES);
 
     TypeIndex getTypeId() const override { return TypeIndex::Dynamic; }
     const char * getFamilyName() const override { return "Dynamic"; }
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
index a7fbbabcd46..4b4a1e2ab51 100644
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
@@ -1,12 +1,12 @@
 MergeTree compact + horizontal merge
 ReplacingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 50000	UInt64
 100000	String
 SummingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 200000	1
 50000	String
 100000	UInt64
@@ -22,8 +22,8 @@ AggregatingMergeTree
 100000	1
 MergeTree wide + horizontal merge
 ReplacingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 50000	UInt64
 100000	String
 SummingMergeTree
@@ -49,16 +49,16 @@ ReplacingMergeTree
 50000	UInt64
 100000	String
 SummingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 200000	1
 50000	String
 100000	UInt64
 50000	2
 100000	1
 AggregatingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 200000	1
 50000	String
 100000	UInt64
@@ -66,8 +66,8 @@ AggregatingMergeTree
 100000	1
 MergeTree wide + vertical merge
 ReplacingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 50000	UInt64
 100000	String
 SummingMergeTree
@@ -79,8 +79,8 @@ SummingMergeTree
 50000	2
 100000	1
 AggregatingMergeTree
-100000	UInt64
 100000	String
+100000	UInt64
 200000	1
 50000	String
 100000	UInt64
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference
similarity index 100%
rename from tests/queries/0_stateless/03040_dynamic_type_alters.reference
rename to tests/queries/0_stateless/03040_dynamic_type_alters_1.reference
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
similarity index 57%
rename from tests/queries/0_stateless/03040_dynamic_type_alters.sh
rename to tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
index a20a92712e0..1f2a6a31ad7 100755
--- a/tests/queries/0_stateless/03040_dynamic_type_alters.sh
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --stacktrace --max_insert_threads 3 --group_by_two_level_threshold 1000000 --group_by_two_level_threshold_bytes 42526602 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 8125230 --max_read_buffer_size 859505 --prefer_localhost_replica 1 --max_block_size 34577 --max_threads 41 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 1 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 99 --optimize_aggregation_in_order 1 --aggregation_in_order_max_block_bytes 27635208 --use_uncompressed_cache 0 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 6451111320 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 50 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 0 --remote_filesystem_read_prefetch 1 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 64Mi --filesystem_prefetches_limit 10 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 0 --filesystem_prefetch_step_bytes 100Mi --compile_aggregate_expressions 0 --compile_sort_description 1 --merge_tree_coarse_index_granularity 32 --optimize_distinct_in_order 0 --max_bytes_before_external_sort 10737418240 --max_bytes_before_external_group_by 10737418240 --max_bytes_before_remerge_sort 1374192967 --min_compress_block_size 2152247 --max_compress_block_size 1830907 --merge_tree_compact_parts_min_granules_to_multibuffer_read 79 --optimize_sorting_by_input_stream_properties 1 --http_response_buffer_size 106072 --http_wait_end_of_query True --enable_memory_bound_merging_of_aggregation_results 0 --min_count_to_compile_expression 0 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 3 --session_timezone Africa/Khartoum --prefer_warmed_unmerged_parts_seconds 4 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.03 --ratio_of_defaults_for_sparse_serialization 0.9779014012142565 --prefer_fetch_merged_part_size_threshold 4254002758 --vertical_merge_algorithm_min_rows_to_activate 1 --vertical_merge_algorithm_min_columns_to_activate 1 --allow_vertical_merges_from_compact_to_wide_parts 1 --min_merge_bytes_to_use_direct_io 1 --index_granularity_bytes 4982992 --merge_max_block_size 16662 --index_granularity 22872 --min_bytes_for_wide_part 1073741824 --compress_marks 0 --compress_primary_key 0 --marks_compress_block_size 86328 --primary_key_compress_block_size 64101 --replace_long_file_name_to_hash 0 --max_file_name_length 81 --min_bytes_for_full_part_storage 536870912 --compact_parts_max_bytes_to_buffer 480908080 --compact_parts_max_granules_to_buffer 1 --compact_parts_merge_max_bytes_to_prefetch_part 4535313 --cache_populated_by_fetch 0"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1"
 
 function run()
 {
@@ -74,3 +74,4 @@ echo "MergeTree wide"
 $CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
 run
 $CH_CLIENT -q "drop table test;"
+
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference
new file mode 100644
index 00000000000..18a181464e9
--- /dev/null
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference
@@ -0,0 +1,182 @@
+MergeTree compact
+initial insert
+alter add column
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+alter rename column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+insert nested dynamic
+3	Array(Dynamic)
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N	[]	[]	[]
+1	1	\N	\N	\N	\N	\N	[]	[]	[]
+2	2	\N	\N	\N	\N	\N	[]	[]	[]
+3	3	3	\N	3	\N	\N	[]	[]	[]
+4	4	4	\N	4	\N	\N	[]	[]	[]
+5	5	5	\N	5	\N	\N	[]	[]	[]
+6	6	str_6	str_6	\N	\N	\N	[]	[]	[]
+7	7	str_7	str_7	\N	\N	\N	[]	[]	[]
+8	8	str_8	str_8	\N	\N	\N	[]	[]	[]
+9	9	\N	\N	\N	\N	\N	[]	[]	[]
+10	10	\N	\N	\N	\N	\N	[]	[]	[]
+11	11	\N	\N	\N	\N	\N	[]	[]	[]
+12	12	12	\N	12	\N	\N	[]	[]	[]
+13	13	str_13	str_13	\N	\N	\N	[]	[]	[]
+14	14	\N	\N	\N	\N	\N	[]	[]	[]
+15	15	[15]	\N	\N	\N	\N	[15]	[NULL]	[NULL]
+16	16	['str_16']	\N	\N	\N	\N	[NULL]	['str_16']	[NULL]
+17	17	[17]	\N	\N	\N	\N	[17]	[NULL]	[NULL]
+alter rename column 2
+3	Array(Dynamic)
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N	[]	[]	[]
+1	1	\N	\N	\N	\N	\N	[]	[]	[]
+2	2	\N	\N	\N	\N	\N	[]	[]	[]
+3	3	3	\N	3	\N	\N	[]	[]	[]
+4	4	4	\N	4	\N	\N	[]	[]	[]
+5	5	5	\N	5	\N	\N	[]	[]	[]
+6	6	str_6	str_6	\N	\N	\N	[]	[]	[]
+7	7	str_7	str_7	\N	\N	\N	[]	[]	[]
+8	8	str_8	str_8	\N	\N	\N	[]	[]	[]
+9	9	\N	\N	\N	\N	\N	[]	[]	[]
+10	10	\N	\N	\N	\N	\N	[]	[]	[]
+11	11	\N	\N	\N	\N	\N	[]	[]	[]
+12	12	12	\N	12	\N	\N	[]	[]	[]
+13	13	str_13	str_13	\N	\N	\N	[]	[]	[]
+14	14	\N	\N	\N	\N	\N	[]	[]	[]
+15	15	[15]	\N	\N	\N	\N	[15]	[NULL]	[NULL]
+16	16	['str_16']	\N	\N	\N	\N	[NULL]	['str_16']	[NULL]
+17	17	[17]	\N	\N	\N	\N	[17]	[NULL]	[NULL]
+MergeTree wide
+initial insert
+alter add column
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+alter rename column 1
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+insert nested dynamic
+3	Array(Dynamic)
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N	[]	[]	[]
+1	1	\N	\N	\N	\N	\N	[]	[]	[]
+2	2	\N	\N	\N	\N	\N	[]	[]	[]
+3	3	3	\N	3	\N	\N	[]	[]	[]
+4	4	4	\N	4	\N	\N	[]	[]	[]
+5	5	5	\N	5	\N	\N	[]	[]	[]
+6	6	str_6	str_6	\N	\N	\N	[]	[]	[]
+7	7	str_7	str_7	\N	\N	\N	[]	[]	[]
+8	8	str_8	str_8	\N	\N	\N	[]	[]	[]
+9	9	\N	\N	\N	\N	\N	[]	[]	[]
+10	10	\N	\N	\N	\N	\N	[]	[]	[]
+11	11	\N	\N	\N	\N	\N	[]	[]	[]
+12	12	12	\N	12	\N	\N	[]	[]	[]
+13	13	str_13	str_13	\N	\N	\N	[]	[]	[]
+14	14	\N	\N	\N	\N	\N	[]	[]	[]
+15	15	[15]	\N	\N	\N	\N	[15]	[NULL]	[NULL]
+16	16	['str_16']	\N	\N	\N	\N	[NULL]	['str_16']	[NULL]
+17	17	[17]	\N	\N	\N	\N	[17]	[NULL]	[NULL]
+alter rename column 2
+3	Array(Dynamic)
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N	[]	[]	[]
+1	1	\N	\N	\N	\N	\N	[]	[]	[]
+2	2	\N	\N	\N	\N	\N	[]	[]	[]
+3	3	3	\N	3	\N	\N	[]	[]	[]
+4	4	4	\N	4	\N	\N	[]	[]	[]
+5	5	5	\N	5	\N	\N	[]	[]	[]
+6	6	str_6	str_6	\N	\N	\N	[]	[]	[]
+7	7	str_7	str_7	\N	\N	\N	[]	[]	[]
+8	8	str_8	str_8	\N	\N	\N	[]	[]	[]
+9	9	\N	\N	\N	\N	\N	[]	[]	[]
+10	10	\N	\N	\N	\N	\N	[]	[]	[]
+11	11	\N	\N	\N	\N	\N	[]	[]	[]
+12	12	12	\N	12	\N	\N	[]	[]	[]
+13	13	str_13	str_13	\N	\N	\N	[]	[]	[]
+14	14	\N	\N	\N	\N	\N	[]	[]	[]
+15	15	[15]	\N	\N	\N	\N	[15]	[NULL]	[NULL]
+16	16	['str_16']	\N	\N	\N	\N	[NULL]	['str_16']	[NULL]
+17	17	[17]	\N	\N	\N	\N	[17]	[NULL]	[NULL]
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh
new file mode 100755
index 00000000000..6491e64372f
--- /dev/null
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1"
+
+function run()
+{
+    echo "initial insert"
+    $CH_CLIENT -q "insert into test select number, number from numbers(3)"
+
+    echo "alter add column"
+    $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter add column 1"
+    $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)"
+    $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)"
+    $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)"
+    $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "alter rename column 1"
+    $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)"
+    $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert nested dynamic"
+    $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)"
+    $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)"
+    $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x"
+
+    echo "alter rename column 2"
+    $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)"
+    $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;"
+run
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+run
+$CH_CLIENT -q "drop table test;"
+
diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.reference b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference
new file mode 100644
index 00000000000..b1ea186a917
--- /dev/null
+++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.reference
@@ -0,0 +1,56 @@
+MergeTree compact
+initial insert
+alter add column
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+check table
+1
+MergeTree wide
+initial insert
+alter add column
+3	None
+0	0	\N	\N	\N	\N
+1	1	\N	\N	\N	\N
+2	2	\N	\N	\N	\N
+insert after alter add column
+4	String
+4	UInt64
+7	None
+0	0	\N	\N	\N	\N	\N
+1	1	\N	\N	\N	\N	\N
+2	2	\N	\N	\N	\N	\N
+3	3	3	\N	3	\N	\N
+4	4	4	\N	4	\N	\N
+5	5	5	\N	5	\N	\N
+6	6	str_6	str_6	\N	\N	\N
+7	7	str_7	str_7	\N	\N	\N
+8	8	str_8	str_8	\N	\N	\N
+9	9	\N	\N	\N	\N	\N
+10	10	\N	\N	\N	\N	\N
+11	11	\N	\N	\N	\N	\N
+12	12	12	\N	12	\N	\N
+13	13	str_13	str_13	\N	\N	\N
+14	14	\N	\N	\N	\N	\N
+check table
+1
diff --git a/tests/queries/0_stateless/03041_dynamic_type_check_table.sh b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh
new file mode 100755
index 00000000000..3d802485be3
--- /dev/null
+++ b/tests/queries/0_stateless/03041_dynamic_type_check_table.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1"
+
+function run()
+{
+    echo "initial insert"
+    $CH_CLIENT -q "insert into test select number, number from numbers(3)"
+
+    echo "alter add column"
+    $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "insert after alter add column"
+    $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)"
+    $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)"
+    $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)"
+    $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x"
+
+    echo "check table"
+    $CH_CLIENT -q "check table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;"
+run
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide"
+$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;"
+run
+$CH_CLIENT -q "drop table test;"
+

From 86406c9ac15d4438f257e0aa6b2ca75ea0750add Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 13 May 2024 14:43:32 +0000
Subject: [PATCH 122/392] Fix build

---
 src/DataTypes/Serializations/SerializationDynamic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h
index 4803bc25d18..7471ff54cf7 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.h
+++ b/src/DataTypes/Serializations/SerializationDynamic.h
@@ -105,7 +105,7 @@ private:
     {
         DynamicStructureSerializationVersion structure_version;
         DataTypePtr variant_type;
-        ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ};
+        ColumnDynamic::Statistics statistics = {.source = ColumnDynamic::Statistics::Source::READ, .data = {}};
 
         explicit DeserializeBinaryBulkStateDynamicStructure(UInt64 structure_version_) : structure_version(structure_version_) {}
     };

From 904800afc8e77bc5567ba2096258aec4802d8cee Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 13 May 2024 17:44:14 +0200
Subject: [PATCH 123/392] Apply recent changes to storages3/hdfs/azure

---
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  2 +-
 .../ObjectStorage/StorageObjectStorage.cpp    | 29 ++++++++++++-------
 .../ObjectStorage/StorageObjectStorage.h      |  3 +-
 3 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 74707b61238..c24874d0a94 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -592,7 +592,7 @@ std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
     ContextPtr context)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
-    auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
+    auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true);
 
     auto new_uri{uri};
     new_uri.bucket = new_namespace;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index a187a8fc54d..01790760747 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -206,7 +206,7 @@ void StorageObjectStorage::read(
     size_t num_streams)
 {
     updateConfiguration(local_context);
-    if (partition_by && configuration->withWildcard())
+    if (partition_by && configuration->withPartitionWildcard())
     {
         throw Exception(ErrorCodes::NOT_IMPLEMENTED,
                         "Reading from a partitioned {} storage is not implemented yet",
@@ -247,7 +247,14 @@ SinkToStoragePtr StorageObjectStorage::write(
     const auto sample_block = metadata_snapshot->getSampleBlock();
     const auto & settings = configuration->getQuerySettings(local_context);
 
-    if (configuration->withWildcard())
+    if (configuration->withGlobsIgnorePartitionWildcard())
+    {
+        throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
+                        "Path '{}' contains globs, so the table is in readonly mode",
+                        configuration->getPath());
+    }
+
+    if (configuration->withPartitionWildcard())
     {
         ASTPtr partition_by_ast = nullptr;
         if (auto insert_query = std::dynamic_pointer_cast<ASTInsertQuery>(query))
@@ -265,14 +272,6 @@ SinkToStoragePtr StorageObjectStorage::write(
         }
     }
 
-    if (configuration->withGlobs())
-    {
-        throw Exception(
-            ErrorCodes::DATABASE_ACCESS_DENIED,
-            "{} key '{}' contains globs, so the table is in readonly mode",
-            getName(), configuration->getPath());
-    }
-
     auto paths = configuration->getPaths();
     if (auto new_key = checkAndGetNewFileOnInsertIfNeeded(
             *object_storage, *configuration, settings, paths.front(), paths.size()))
@@ -428,13 +427,21 @@ StorageObjectStorage::Configuration::Configuration(const Configuration & other)
     structure = other.structure;
 }
 
-bool StorageObjectStorage::Configuration::withWildcard() const
+bool StorageObjectStorage::Configuration::withPartitionWildcard() const
 {
     static const String PARTITION_ID_WILDCARD = "{_partition_id}";
     return getPath().find(PARTITION_ID_WILDCARD) != String::npos
         || getNamespace().find(PARTITION_ID_WILDCARD) != String::npos;
 }
 
+bool StorageObjectStorage::Configuration::withGlobsIgnorePartitionWildcard() const
+{
+    if (!withPartitionWildcard())
+        return withGlobs();
+    else
+        return PartitionedSink::replaceWildcards(getPath(), "").find_first_of("*?{") != std::string::npos;
+}
+
 bool StorageObjectStorage::Configuration::isPathWithGlobs() const
 {
     return getPath().find_first_of("*?{") != std::string::npos;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 3f8ff79ad54..a396bad9d6e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -163,8 +163,9 @@ public:
     virtual void addStructureAndFormatToArgs(
         ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
 
-    bool withWildcard() const;
+    bool withPartitionWildcard() const;
     bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); }
+    bool withGlobsIgnorePartitionWildcard() const;
     bool isPathWithGlobs() const;
     bool isNamespaceWithGlobs() const;
     virtual std::string getPathWithoutGlobs() const;

From 61f7b95e3d4ec7711df7fadb332eabf02913ba75 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 13 May 2024 16:04:20 +0000
Subject: [PATCH 124/392] Fix build

---
 src/DataTypes/Serializations/SerializationDynamic.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index d0ecc3b80a2..cb9d4a2f7bc 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -31,7 +31,7 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB
     ISerialization::SerializeBinaryBulkStatePtr variant_state;
 
     /// Variants statistics. Map (Variant name) -> (Variant size).
-    ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ };
+    ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} };
 
     SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
 };

From f3b9a326fede69769811dc9309bfb5d00aefd874 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 13 May 2024 19:59:16 +0200
Subject: [PATCH 125/392] Fix build

---
 src/TableFunctions/TableFunctionObjectStorage.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index a997b34a75c..9f16a9a0b25 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -192,6 +192,15 @@ void registerTableFunctionObjectStorage(TableFunctionFactory & factory)
 #if USE_HDFS
     factory.registerFunction<TableFunctionObjectStorage<HDFSDefinition, StorageHDFSConfiguration>>(
     {
+        .documentation =
+        {
+            .description=R"(The table function can be used to read the data stored on HDFS virtual filesystem.)",
+            .examples{
+            {
+                "hdfs",
+                "SELECT * FROM  hdfs(url, format, compression, structure])", ""
+            }}
+        },
         .allow_readonly = false
     });
 #endif

From 007c9be4db352567ef9a414a3aaecd1380d9de0d Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <salvatore.mesoraca@aiven.io>
Date: Tue, 14 May 2024 10:14:23 +0200
Subject: [PATCH 126/392] Restart CI


From 0abb2be5eb55183e83c218cf352c88c7fb497939 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 14 May 2024 18:40:09 +0200
Subject: [PATCH 127/392] Review fixes

---
 docs/en/operations/settings/settings.md       | 50 +++++++++++++++++++
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |  7 ++-
 .../ObjectStorages/HDFS/HDFSObjectStorage.h   |  3 ++
 src/Storages/Cache/SchemaCache.cpp            |  1 -
 .../ObjectStorage/StorageObjectStorage.cpp    |  6 +--
 .../ObjectStorage/StorageObjectStorage.h      |  6 ++-
 .../StorageObjectStorageSource.cpp            |  2 +-
 .../StorageObjectStorageSource.h              |  2 +-
 src/Storages/ObjectStorage/Utils.cpp          |  6 +--
 .../registerStorageObjectStorage.cpp          |  6 +--
 10 files changed, 70 insertions(+), 19 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 91b544c6a82..72bd1ca8e2c 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3665,6 +3665,16 @@ Possible values:
 
 Default value: `0`.
 
+## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
+
+Ignore ansense of file if it does not exist when reading certain keys.
+
+Possible values:
+- 1 — `SELECT` returns empty result.
+- 0 — `SELECT` throws an exception.
+
+Default value: `0`.
+
 ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
 
 Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
@@ -3697,6 +3707,46 @@ Possible values:
 
 Default value: `0`.
 
+## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match}
+
+Throw an error if matched zero files according to glob expansion rules.
+
+Possible values:
+- 1 — `SELECT` throws an exception.
+- 0 — `SELECT` returns empty result.
+
+Default value: `0`.
+
+## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist}
+
+Ignore ansense of file if it does not exist when reading certain keys.
+
+Possible values:
+- 1 — `SELECT` returns empty result.
+- 0 — `SELECT` throws an exception.
+
+Default value: `0`.
+
+## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match}
+
+Throw an error if matched zero files according to glob expansion rules.
+
+Possible values:
+- 1 — `SELECT` throws an exception.
+- 0 — `SELECT` returns empty result.
+
+Default value: `0`.
+
+## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist}
+
+Ignore ansense of file if it does not exist when reading certain keys.
+
+Possible values:
+- 1 — `SELECT` returns empty result.
+- 0 — `SELECT` throws an exception.
+
+Default value: `0`.
+
 ## engine_url_skip_empty_files {#engine_url_skip_empty_files}
 
 Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 6c2f310a7d1..1f3a4bdf6c7 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -186,7 +186,6 @@ ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) co
 void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
 {
     initializeHDFSFS();
-    auto * log = &Poco::Logger::get("HDFSObjectStorage");
     LOG_TEST(log, "Trying to list files for {}", path);
 
     HDFSFileInfo ls;
@@ -210,9 +209,6 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
     for (int i = 0; i < ls.length; ++i)
     {
         const String file_path = fs::path(ls.file_info[i].mName).lexically_normal();
-        const size_t last_slash = file_path.rfind('/');
-        const String file_name = file_path.substr(last_slash);
-
         const bool is_directory = ls.file_info[i].mKind == 'D';
         if (is_directory)
         {
@@ -227,6 +223,9 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
                     Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod),
                     {}}));
         }
+
+        if (children.size() >= max_keys)
+            break;
     }
 }
 
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
index e747b283400..8aae90d0721 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@@ -39,6 +39,7 @@ public:
         bool lazy_initialize)
         : config(config_)
         , settings(std::move(settings_))
+        , log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")"))
     {
         const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2);
         url = hdfs_root_path_;
@@ -134,6 +135,8 @@ private:
     std::string url;
     std::string url_without_path;
     std::string data_directory;
+
+    LoggerPtr log;
 };
 
 }
diff --git a/src/Storages/Cache/SchemaCache.cpp b/src/Storages/Cache/SchemaCache.cpp
index 5dc39f04ae0..299dd292772 100644
--- a/src/Storages/Cache/SchemaCache.cpp
+++ b/src/Storages/Cache/SchemaCache.cpp
@@ -1,6 +1,5 @@
 #include <Storages/Cache/SchemaCache.h>
 #include <Common/ProfileEvents.h>
-#include <Common/logger_useful.h>
 #include <ctime>
 
 namespace ProfileEvents
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 01790760747..c5affb7989f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -61,10 +61,6 @@ StorageObjectStorage::StorageObjectStorage(
     metadata.setConstraints(constraints_);
     metadata.setComment(comment);
 
-    StoredObjects objects;
-    for (const auto & key : configuration->getPaths())
-        objects.emplace_back(key);
-
     setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns()));
     setInMemoryMetadata(metadata);
 }
@@ -93,7 +89,7 @@ void StorageObjectStorage::updateConfiguration(ContextPtr context)
 {
     /// FIXME: we should be able to update everything apart from client if static_configuration == true.
     if (!configuration->isStaticConfiguration())
-        object_storage->applyNewSettings(context->getConfigRef(), "s3.", context);
+        object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context);
 }
 
 namespace
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index a396bad9d6e..928d49f9604 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -124,7 +124,6 @@ protected:
 
     ConfigurationPtr configuration;
     const ObjectStoragePtr object_storage;
-    const std::string engine_name;
     const std::optional<FormatSettings> format_settings;
     const ASTPtr partition_by;
     const bool distributed_processing;
@@ -148,7 +147,9 @@ public:
         ContextPtr local_context,
         bool with_table_structure);
 
+    /// Storage type: s3, hdfs, azure.
     virtual std::string getTypeName() const = 0;
+    /// Engine name: S3, HDFS, Azure.
     virtual std::string getEngineName() const = 0;
 
     virtual Path getPath() const = 0;
@@ -158,7 +159,10 @@ public:
     virtual void setPaths(const Paths & paths) = 0;
 
     virtual String getDataSourceDescription() = 0;
+    /// Sometimes object storages have something similar to chroot or namespace, for example
+    /// buckets in S3. If object storage doesn't have any namepaces return empty string.
     virtual String getNamespace() const = 0;
+
     virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
     virtual void addStructureAndFormatToArgs(
         ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index cb3f732ce83..e28924617e0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -321,7 +321,7 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const S
     const bool object_too_small = object_size <= 2 * getContext()->getSettings().max_download_buffer_size;
     const bool use_prefetch = object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool;
     read_settings.remote_fs_method = use_prefetch ? RemoteFSReadMethod::threadpool : RemoteFSReadMethod::read;
-    /// User's S3 object may change, don't cache it.
+    /// User's object may change, don't cache it.
     read_settings.use_page_cache_for_disks_without_file_cache = false;
 
     // Create a read buffer that will prefetch the first ~1 MB of the file.
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index a8df00bc0ac..08d545f9b85 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -62,7 +62,7 @@ protected:
     const std::optional<FormatSettings> format_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
-    const ReadFromFormatInfo read_from_format_info;
+    const ReadFromFormatInfo & read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
 
     ColumnsDescription columns_desc;
diff --git a/src/Storages/ObjectStorage/Utils.cpp b/src/Storages/ObjectStorage/Utils.cpp
index bde3cb7e1cb..e49e14d2a0c 100644
--- a/src/Storages/ObjectStorage/Utils.cpp
+++ b/src/Storages/ObjectStorage/Utils.cpp
@@ -38,9 +38,9 @@ std::optional<String> checkAndGetNewFileOnInsertIfNeeded(
     throw Exception(
         ErrorCodes::BAD_ARGUMENTS,
         "Object in bucket {} with key {} already exists. "
-        "If you want to overwrite it, enable setting s3_truncate_on_insert, if you "
-        "want to create a new file on each insert, enable setting s3_create_new_file_on_insert",
-        configuration.getNamespace(), key);
+        "If you want to overwrite it, enable setting {}_truncate_on_insert, if you "
+        "want to create a new file on each insert, enable setting {}_create_new_file_on_insert",
+        configuration.getNamespace(), key, configuration.getTypeName(), configuration.getTypeName());
 }
 
 void resolveSchemaAndFormat(
diff --git a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
index 74c8aeaad7d..bf595b2f5d4 100644
--- a/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/registerStorageObjectStorage.cpp
@@ -106,17 +106,17 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
 
 void registerStorageS3(StorageFactory & factory)
 {
-    return registerStorageS3Impl("S3", factory);
+    registerStorageS3Impl("S3", factory);
 }
 
 void registerStorageCOS(StorageFactory & factory)
 {
-    return registerStorageS3Impl("COSN", factory);
+    registerStorageS3Impl("COSN", factory);
 }
 
 void registerStorageOSS(StorageFactory & factory)
 {
-    return registerStorageS3Impl("OSS", factory);
+    registerStorageS3Impl("OSS", factory);
 }
 
 #endif

From 3778cee49e1d6ac1f0f4f470ba5d63458c33df3b Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Tue, 14 May 2024 18:41:19 +0200
Subject: [PATCH 128/392] Update src/Core/Settings.h

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Core/Settings.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index aa20f68ac0d..066a551b37b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -117,9 +117,9 @@ class IColumn;
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
-    M(Bool, s3_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageS3", 0) \
-    M(Bool, hdfs_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS", 0) \
-    M(Bool, azure_ignore_file_doesnt_exist, false, "Ignore if files does not exits and return 0 zeros for StorageAzure", 0) \
+    M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in S3 table engine", 0) \
+    M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
+    M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
     M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
     M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \

From be693ceba7fa17e2c03c54197fb0d0f301640cc1 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 14 May 2024 18:46:35 +0200
Subject: [PATCH 129/392] Minor

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index c5affb7989f..bc5b347d1e0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -402,7 +402,6 @@ void StorageObjectStorage::Configuration::initialize(
     else
         configuration.fromAST(engine_args, local_context, with_table_structure);
 
-    // FIXME: it should be - if (format == "auto" && get_format_from_file)
     if (configuration.format == "auto")
         configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
     else

From 65f404c153fb96602ec07c4f3919af14468b8d7d Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 14 May 2024 21:28:40 +0200
Subject: [PATCH 130/392] Review fixes

---
 docs/en/operations/settings/settings.md       |  2 +-
 src/Core/Settings.h                           |  6 +++---
 .../ObjectStorages/HDFS/HDFSObjectStorage.cpp |  2 +-
 src/Storages/ObjectStorage/S3/Configuration.h |  2 ++
 .../ObjectStorage/StorageObjectStorage.h      |  5 +++--
 .../StorageObjectStorageSource.cpp            | 19 ++++++++-----------
 .../StorageObjectStorageSource.h              |  2 +-
 7 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 72bd1ca8e2c..88e945a710c 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3667,7 +3667,7 @@ Default value: `0`.
 
 ## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
 
-Ignore ansense of file if it does not exist when reading certain keys.
+Ignore absense of file if it does not exist when reading certain keys.
 
 Possible values:
 - 1 — `SELECT` returns empty result.
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 066a551b37b..afadaa88f6d 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -117,9 +117,9 @@ class IColumn;
     M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
     M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
-    M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in S3 table engine", 0) \
-    M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
-    M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the reqested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
+    M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \
+    M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
+    M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
     M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
     M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
     M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
index 1f3a4bdf6c7..dcb2af9d4d3 100644
--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@@ -224,7 +224,7 @@ void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithM
                     {}}));
         }
 
-        if (children.size() >= max_keys)
+        if (max_keys && children.size() >= max_keys)
             break;
     }
 }
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index b28b1c226a7..0bd7f1ab108 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -15,12 +15,14 @@ public:
     using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
 
     static constexpr auto type_name = "s3";
+    static constexpr auto namespace_name = "bucket";
 
     StorageS3Configuration() = default;
     StorageS3Configuration(const StorageS3Configuration & other);
 
     std::string getTypeName() const override { return type_name; }
     std::string getEngineName() const override { return url.storage_name; }
+    std::string getNamespaceType() const override { return namespace_name; }
 
     Path getPath() const override { return url.key; }
     void setPath(const Path & path) override { url.key = path; }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 928d49f9604..26b153ca0db 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -151,6 +151,9 @@ public:
     virtual std::string getTypeName() const = 0;
     /// Engine name: S3, HDFS, Azure.
     virtual std::string getEngineName() const = 0;
+    /// Sometimes object storages have something similar to chroot or namespace, for example
+    /// buckets in S3. If object storage doesn't have any namepaces return empty string.
+    virtual std::string getNamespaceType() const { return "namespace"; }
 
     virtual Path getPath() const = 0;
     virtual void setPath(const Path & path) = 0;
@@ -159,8 +162,6 @@ public:
     virtual void setPaths(const Paths & paths) = 0;
 
     virtual String getDataSourceDescription() = 0;
-    /// Sometimes object storages have something similar to chroot or namespace, for example
-    /// buckets in S3. If object storage doesn't have any namepaces return empty string.
     virtual String getNamespace() const = 0;
 
     virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index e28924617e0..737f733615f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -43,7 +43,7 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     ObjectStoragePtr object_storage_,
     ConfigurationPtr configuration_,
     const ReadFromFormatInfo & info,
-    std::optional<FormatSettings> format_settings_,
+    const std::optional<FormatSettings> & format_settings_,
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IIterator> file_iterator_,
@@ -95,7 +95,8 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
             local_context->getSettingsRef().max_threads);
 
     if (configuration->isNamespaceWithGlobs())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression can not have wildcards inside namespace name");
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Expression can not have wildcards inside {} name", configuration->getNamespaceType());
 
     auto settings = configuration->getQuerySettings(local_context);
 
@@ -425,15 +426,13 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
 {
     std::lock_guard lock(next_mutex);
     auto object_info = nextImplUnlocked(processor);
-    if (object_info)
+    if (first_iteration && !object_info && throw_on_zero_files_match)
     {
-        if (first_iteration)
-            first_iteration = false;
-    }
-    else if (first_iteration && throw_on_zero_files_match)
-    {
-        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Can not match any files");
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST,
+                        "Can not match any files with path {}",
+                        configuration->getPath());
     }
+    first_iteration = false;
     return object_info;
 }
 
@@ -456,8 +455,6 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
             }
 
             new_batch = std::move(result.value());
-            LOG_TEST(logger, "Batch size: {}", new_batch.size());
-
             for (auto it = new_batch.begin(); it != new_batch.end();)
             {
                 if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher))
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 08d545f9b85..9c67a125f5e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -31,7 +31,7 @@ public:
         ObjectStoragePtr object_storage_,
         ConfigurationPtr configuration,
         const ReadFromFormatInfo & info,
-        std::optional<FormatSettings> format_settings_,
+        const std::optional<FormatSettings> & format_settings_,
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IIterator> file_iterator_,

From a7b135ea8b8962ec4db318305391881ec1ff4ff8 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 15 May 2024 12:42:38 +0200
Subject: [PATCH 131/392] Fix style check

---
 docs/en/operations/settings/settings.md            | 2 +-
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 88e945a710c..131948eace9 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3667,7 +3667,7 @@ Default value: `0`.
 
 ## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
 
-Ignore absense of file if it does not exist when reading certain keys.
+Ignore absence of file if it does not exist when reading certain keys.
 
 Possible values:
 - 1 — `SELECT` returns empty result.
diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 996f7da234a..3c72ef0f737 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -253,6 +253,7 @@ DockerHub
 DoubleDelta
 Doxygen
 Durre
+doesnt
 ECMA
 Ecto
 EdgeAngle

From 4c8bdad0e709b64ed045aed6092a429767370395 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 15 May 2024 12:54:59 +0200
Subject: [PATCH 132/392] Simplify glob iterator

---
 .../ObjectStorage/StorageObjectStorageCluster.cpp |  8 +++-----
 .../ObjectStorage/StorageObjectStorageSource.cpp  | 15 +++------------
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index 193894a1d44..a43d9da0fa3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -82,11 +82,9 @@ void StorageObjectStorageCluster::updateQueryToSendIfNeeded(
 RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExtension(
     const ActionsDAG::Node * predicate, const ContextPtr & local_context) const
 {
-    const auto settings = configuration->getQuerySettings(local_context);
-    auto iterator = std::make_shared<StorageObjectStorageSource::GlobIterator>(
-        object_storage, configuration, predicate, virtual_columns, local_context,
-        nullptr, settings.list_object_keys_size, settings.throw_on_zero_files_match,
-        local_context->getFileProgressCallback());
+    auto iterator = StorageObjectStorageSource::createFileIterator(
+        configuration, object_storage, /* distributed_processing */false, local_context,
+        predicate, virtual_columns, nullptr, local_context->getFileProgressCallback());
 
     auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String
     {
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 737f733615f..8d5df96ca6e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -407,18 +407,9 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
     else
     {
-        const auto object_key = configuration_->getPath();
-        auto object_metadata = object_storage->getObjectMetadata(object_key);
-        auto object_info = std::make_shared<ObjectInfo>(object_key, object_metadata);
-
-        object_infos.emplace_back(object_info);
-        if (read_keys)
-            read_keys->emplace_back(object_info);
-
-        if (file_progress_callback)
-            file_progress_callback(FileProgress(0, object_metadata.size_bytes));
-
-        is_finished = true;
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Using glob iterator with path without globs is not allowed (used path: {})",
+                        configuration->getPath());
     }
 }
 

From a09bb5f0b7e2134ec576c3f20b492515cf258432 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 15 May 2024 11:42:11 +0000
Subject: [PATCH 133/392] Fix tests

---
 .../SerializationDynamicElement.cpp              |  2 +-
 ...3039_dynamic_all_merge_algorithms_1.reference | 16 ++++++++--------
 .../03039_dynamic_all_merge_algorithms_1.sh      |  8 ++++----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/DataTypes/Serializations/SerializationDynamicElement.cpp b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
index b0a4e63d0a5..dafd6d663b0 100644
--- a/src/DataTypes/Serializations/SerializationDynamicElement.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamicElement.cpp
@@ -72,7 +72,7 @@ void SerializationDynamicElement::deserializeBinaryBulkStatePrefix(
 
     auto dynamic_element_state = std::make_shared<DeserializeBinaryBulkStateDynamicElement>();
     dynamic_element_state->structure_state = std::move(structure_state);
-    const auto & variant_type = checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(structure_state)->variant_type;
+    const auto & variant_type = checkAndGetState<SerializationDynamic::DeserializeBinaryBulkStateDynamicStructure>(dynamic_element_state->structure_state)->variant_type;
     /// Check if we actually have required element in the Variant.
     if (auto global_discr = assert_cast<const DataTypeVariant &>(*variant_type).tryGetVariantDiscriminator(dynamic_element_name))
     {
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
index 4b4a1e2ab51..6c69b81c183 100644
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
@@ -10,16 +10,16 @@ SummingMergeTree
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 AggregatingMergeTree
 100000	String
 100000	UInt64
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 MergeTree wide + horizontal merge
 ReplacingMergeTree
 100000	String
@@ -32,16 +32,16 @@ SummingMergeTree
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 AggregatingMergeTree
 100000	String
 100000	UInt64
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 MergeTree compact + vertical merge
 ReplacingMergeTree
 100000	String
@@ -54,16 +54,16 @@ SummingMergeTree
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 AggregatingMergeTree
 100000	String
 100000	UInt64
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 MergeTree wide + vertical merge
 ReplacingMergeTree
 100000	String
@@ -76,13 +76,13 @@ SummingMergeTree
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
 AggregatingMergeTree
 100000	String
 100000	UInt64
 200000	1
 50000	String
 100000	UInt64
-50000	2
 100000	1
+50000	2
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
index 9298fe28fec..198c6ca93ff 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
@@ -30,10 +30,10 @@ function test()
     $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)"
 
     $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from test group by sum"
+    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
     $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from test group by sum"
+    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
     $CH_CLIENT -q "drop table test"
 
     echo "AggregatingMergeTree"
@@ -43,10 +43,10 @@ function test()
     $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number"
 
     $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
     $CH_CLIENT -nm -q "system start merges test; optimize table test final"
     $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
     $CH_CLIENT -q "drop table test"
 }
 

From 12e512c70ddfe32f81f78ee7d58ae47c38d34ee9 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Wed, 15 May 2024 14:15:45 +0200
Subject: [PATCH 134/392] Delete
 tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference

Removed mispelled file
---
 .../0.2973_parse_crlf_with_tsv_files.reference       | 12 ------------
 1 file changed, 12 deletions(-)
 delete mode 100644 tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference

diff --git a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference b/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference
deleted file mode 100644
index 14cf3a564e4..00000000000
--- a/tests/queries/0_stateless/0.2973_parse_crlf_with_tsv_files.reference
+++ /dev/null
@@ -1,12 +0,0 @@
-/home/shaun/Desktop/ClickHouse/user_files/02973_parse_crlf_with_tsv_files_test_data_without_crlf.tsv
-<-- Read UNIX endings -->
-
-Akiba_Hebrew_Academy	2017-08-01	241
-Aegithina_tiphia	2018-02-01	34
-1971-72_Utah_Stars_season	2016-10-01	1
-
-<-- Read DOS endings with setting input_format_tsv_crlf_end_of_line=1 -->
-
-Akiba_Hebrew_Academy	2017-08-01	241
-Aegithina_tiphia	2018-02-01	34
-1971-72_Utah_Stars_season	2016-10-01	1

From 53f5b958036d4ef3f69c3a22be96cf4c2e1b8c4a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 15 May 2024 13:25:44 +0200
Subject: [PATCH 135/392] Fix typo

---
 docs/en/operations/settings/settings.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 131948eace9..1772a3aa861 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3719,7 +3719,7 @@ Default value: `0`.
 
 ## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist}
 
-Ignore ansense of file if it does not exist when reading certain keys.
+Ignore absence of file if it does not exist when reading certain keys.
 
 Possible values:
 - 1 — `SELECT` returns empty result.
@@ -3739,7 +3739,7 @@ Default value: `0`.
 
 ## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist}
 
-Ignore ansense of file if it does not exist when reading certain keys.
+Ignore absence of file if it does not exist when reading certain keys.
 
 Possible values:
 - 1 — `SELECT` returns empty result.

From ae10e7ded1080d5bd72372dc611cdcb7b96137ef Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Wed, 15 May 2024 13:09:00 +0000
Subject: [PATCH 136/392] Remove data from all disks after DROP with Lazy
 database.

---
 src/Databases/DatabaseOnDisk.cpp              | 28 +++---
 .../test_lazy_database/__init__.py            |  0
 .../configs/storage_policy.xml                | 12 +++
 tests/integration/test_lazy_database/test.py  | 88 +++++++++++++++++++
 4 files changed, 117 insertions(+), 11 deletions(-)
 create mode 100644 tests/integration/test_lazy_database/__init__.py
 create mode 100644 tests/integration/test_lazy_database/configs/storage_policy.xml
 create mode 100644 tests/integration/test_lazy_database/test.py

diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 67b45c7d08d..72a9ba318b1 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -5,6 +5,7 @@
 #include <span>
 #include <Databases/DatabaseAtomic.h>
 #include <Databases/DatabaseOrdinary.h>
+#include <Disks/IDisk.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
@@ -324,31 +325,36 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na
 
     StoragePtr table = detachTable(local_context, table_name);
 
-    /// This is possible for Lazy database.
-    if (!table)
-        return;
-
     bool renamed = false;
     try
     {
         fs::rename(table_metadata_path, table_metadata_path_drop);
         renamed = true;
-        table->drop();
-        table->is_dropped = true;
-
-        fs::path table_data_dir(local_context->getPath() + table_data_path_relative);
-        if (fs::exists(table_data_dir))
-            (void)fs::remove_all(table_data_dir);
+        // The table might be not loaded for Lazy database engine.
+        if (table)
+        {
+            table->drop();
+            table->is_dropped = true;
+        }
     }
     catch (...)
     {
         LOG_WARNING(log, getCurrentExceptionMessageAndPattern(/* with_stacktrace */ true));
-        attachTable(local_context, table_name, table, table_data_path_relative);
+        if (table)
+            attachTable(local_context, table_name, table, table_data_path_relative);
         if (renamed)
             fs::rename(table_metadata_path_drop, table_metadata_path);
         throw;
     }
 
+    for (const auto & [disk_name, disk] : getContext()->getDisksMap())
+    {
+        if (disk->isReadOnly() || !disk->exists(table_data_path_relative))
+            continue;
+
+        LOG_INFO(log, "Removing data directory from disk {} with path {} for dropped table {} ", disk_name, table_data_path_relative, table_name);
+        disk->removeRecursive(table_data_path_relative);
+    }
     (void)fs::remove(table_metadata_path_drop);
 }
 
diff --git a/tests/integration/test_lazy_database/__init__.py b/tests/integration/test_lazy_database/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_lazy_database/configs/storage_policy.xml b/tests/integration/test_lazy_database/configs/storage_policy.xml
new file mode 100644
index 00000000000..58771d6b284
--- /dev/null
+++ b/tests/integration/test_lazy_database/configs/storage_policy.xml
@@ -0,0 +1,12 @@
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/</endpoint>
+                <access_key_id>minio</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+            </s3>
+        </disks>
+    </storage_configuration>
+</clickhouse>
diff --git a/tests/integration/test_lazy_database/test.py b/tests/integration/test_lazy_database/test.py
new file mode 100644
index 00000000000..6890aa87374
--- /dev/null
+++ b/tests/integration/test_lazy_database/test.py
@@ -0,0 +1,88 @@
+import logging
+import time
+import pytest
+import os
+from helpers.cluster import ClickHouseCluster
+
+
+@pytest.fixture(scope="module")
+def cluster():
+    try:
+        cluster = ClickHouseCluster(__file__)
+        cluster.add_instance(
+            "node",
+            main_configs=["configs/storage_policy.xml"],
+            with_minio=True,
+        )
+        logging.info("Starting cluster...")
+        cluster.start()
+        logging.info("Cluster started")
+
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def assert_objects_count(cluster, objects_count, path="data/"):
+    minio = cluster.minio_client
+    s3_objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True))
+    if objects_count != len(s3_objects):
+        for s3_object in s3_objects:
+            object_meta = minio.stat_object(cluster.minio_bucket, s3_object.object_name)
+            logging.info("Existing S3 object: %s", str(object_meta))
+        assert objects_count == len(s3_objects)
+
+
+def list_of_files_on_ch_disk(node, disk, path):
+    disk_path = node.query(
+        f"SELECT path FROM system.disks WHERE name='{disk}'"
+    ).splitlines()[0]
+    return node.exec_in_container(
+        ["bash", "-c", f"ls {os.path.join(disk_path, path)}"], user="root"
+    )
+
+
+@pytest.mark.parametrize(
+    "engine",
+    [
+        pytest.param("Log"),
+    ],
+)
+@pytest.mark.parametrize(
+    "disk,check_s3",
+    [
+        pytest.param("default", False),
+        pytest.param("s3", True),
+    ],
+)
+@pytest.mark.parametrize(
+    "delay",
+    [
+        pytest.param(0),
+        pytest.param(4),
+    ],
+)
+def test_drop_table(cluster, engine, disk, check_s3, delay):
+    node = cluster.instances["node"]
+
+    node.query("DROP DATABASE IF EXISTS lazy")
+    node.query("CREATE DATABASE lazy ENGINE=Lazy(2)")
+    node.query(
+        "CREATE TABLE lazy.table (id UInt64) ENGINE={} SETTINGS disk = '{}'".format(
+            engine,
+            disk,
+        )
+    )
+
+    node.query("INSERT INTO lazy.table SELECT number FROM numbers(10)")
+    assert node.query("SELECT count(*) FROM lazy.table") == "10\n"
+    if delay:
+        time.sleep(delay)
+    node.query("DROP TABLE lazy.table SYNC")
+
+    if check_s3:
+        # There mustn't be any orphaned data
+        assert_objects_count(cluster, 0)
+
+    # Local data must be removed
+    assert list_of_files_on_ch_disk(node, disk, "data/lazy/") == ""

From 47dfeaa487743d81c66bb280e8eeb8f31ef21507 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 12 May 2024 21:57:37 +0800
Subject: [PATCH 137/392] fix comments

Change-Id: I2677dc20fc515bbbe91f54154fc4c081f164758e
---
 .../Formats/Impl/Parquet/ParquetDataBuffer.h  |   9 +-
 .../Impl/Parquet/ParquetDataValuesReader.cpp  |  18 +-
 .../Impl/Parquet/ParquetDataValuesReader.h    |  13 +-
 .../Impl/Parquet/ParquetLeafColReader.cpp     |  33 +-
 .../Impl/Parquet/ParquetRecordReader.cpp      | 326 +++++++++++++-----
 .../Impl/Parquet/ParquetRecordReader.h        |   6 +-
 .../Formats/Impl/ParquetBlockInputFormat.cpp  |   2 +
 .../02998_native_parquet_reader.sh            |   4 +-
 .../native_parquet_reader.parquet}            | Bin
 9 files changed, 296 insertions(+), 115 deletions(-)
 rename tests/queries/0_stateless/{02998_native_parquet_reader.parquet => data_parquet/native_parquet_reader.parquet} (100%)

diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
index 5c37375fa0c..57df6f59f72 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataBuffer.h
@@ -48,7 +48,7 @@ public:
         consume(bytes);
     }
 
-    void ALWAYS_INLINE readDateTime64(DateTime64 & dst)
+    void ALWAYS_INLINE readDateTime64FromInt96(DateTime64 & dst)
     {
         static const int max_scale_num = 9;
         static const UInt64 pow10[max_scale_num + 1]
@@ -110,10 +110,7 @@ public:
 
         // refer to: RawBytesToDecimalBytes in reader_internal.cc, Decimal128::FromBigEndian in decimal.cc
         auto status = TArrowDecimal::FromBigEndian(getArrowData(), elem_bytes_num);
-        if (unlikely(!status.ok()))
-        {
-            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Read parquet decimal failed: {}", status.status().ToString());
-        }
+        assert(status.ok());
         status.ValueUnsafe().ToBytes(reinterpret_cast<uint8_t *>(out));
         consume(elem_bytes_num);
     }
@@ -144,7 +141,7 @@ private:
 class LazyNullMap
 {
 public:
-    LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {}
+    explicit LazyNullMap(UInt64 size_) : size(size_), col_nullable(nullptr) {}
 
     template <typename T>
     requires std::is_integral_v<T>
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 6743086e9e6..1f0c7105572 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -276,8 +276,7 @@ void ParquetPlainValuesReader<ColumnString>::readBatch(
 
                 auto idx = cursor;
                 cursor += count;
-                // the type of offset_data is PaddedPODArray, which makes sure that the -1 index is available
-                for (auto val_offset = offset_data[idx - 1]; idx < cursor; idx++)
+                for (auto val_offset = chars_size_bak; idx < cursor; idx++)
                 {
                     offset_data[idx] = ++val_offset;
                 }
@@ -288,7 +287,7 @@ void ParquetPlainValuesReader<ColumnString>::readBatch(
 
 
 template <>
-void ParquetPlainValuesReader<ColumnDecimal<DateTime64>>::readBatch(
+void ParquetPlainValuesReader<ColumnDecimal<DateTime64>, ParquetReaderTypes::TimestampInt96>::readBatch(
     MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
 {
     auto cursor = col_ptr->size();
@@ -302,21 +301,21 @@ void ParquetPlainValuesReader<ColumnDecimal<DateTime64>>::readBatch(
         null_map,
         /* individual_visitor */ [&](size_t nest_cursor)
         {
-            plain_data_buffer.readDateTime64(column_data[nest_cursor]);
+            plain_data_buffer.readDateTime64FromInt96(column_data[nest_cursor]);
         },
         /* repeated_visitor */ [&](size_t nest_cursor, UInt32 count)
         {
             auto * col_data_pos = column_data + nest_cursor;
             for (UInt32 i = 0; i < count; i++)
             {
-                plain_data_buffer.readDateTime64(col_data_pos[i]);
+                plain_data_buffer.readDateTime64FromInt96(col_data_pos[i]);
             }
         }
     );
 }
 
-template <typename TColumn>
-void ParquetPlainValuesReader<TColumn>::readBatch(
+template <typename TColumn, ParquetReaderTypes reader_type>
+void ParquetPlainValuesReader<TColumn, reader_type>::readBatch(
     MutableColumnPtr & col_ptr, LazyNullMap & null_map, UInt32 num_values)
 {
     auto cursor = col_ptr->size();
@@ -542,11 +541,14 @@ void ParquetRleDictReader<TColumnVector>::readBatch(
 
 
 template class ParquetPlainValuesReader<ColumnInt32>;
+template class ParquetPlainValuesReader<ColumnUInt32>;
 template class ParquetPlainValuesReader<ColumnInt64>;
+template class ParquetPlainValuesReader<ColumnUInt64>;
 template class ParquetPlainValuesReader<ColumnFloat32>;
 template class ParquetPlainValuesReader<ColumnFloat64>;
 template class ParquetPlainValuesReader<ColumnDecimal<Decimal32>>;
 template class ParquetPlainValuesReader<ColumnDecimal<Decimal64>>;
+template class ParquetPlainValuesReader<ColumnDecimal<DateTime64>>;
 template class ParquetPlainValuesReader<ColumnString>;
 
 template class ParquetFixedLenPlainReader<ColumnDecimal<Decimal128>>;
@@ -557,7 +559,9 @@ template class ParquetRleLCReader<ColumnUInt16>;
 template class ParquetRleLCReader<ColumnUInt32>;
 
 template class ParquetRleDictReader<ColumnInt32>;
+template class ParquetRleDictReader<ColumnUInt32>;
 template class ParquetRleDictReader<ColumnInt64>;
+template class ParquetRleDictReader<ColumnUInt64>;
 template class ParquetRleDictReader<ColumnFloat32>;
 template class ParquetRleDictReader<ColumnFloat64>;
 template class ParquetRleDictReader<ColumnDecimal<Decimal32>>;
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 688de4f52eb..0f916ff862d 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -24,7 +24,7 @@ public:
     /**
      * @brief Used when the bit_width is 0, so all elements have same value.
      */
-    RleValuesReader(UInt32 total_size, Int32 val = 0)
+    explicit RleValuesReader(UInt32 total_size, Int32 val = 0)
         : bit_reader(nullptr), bit_width(0), cur_group_size(total_size), cur_value(val), cur_group_is_packed(false)
         {}
 
@@ -72,7 +72,8 @@ public:
      * @tparam SteppedValidVisitor  A callback with signature:
      *  void(size_t cursor, const std::vector<UInt8> & valid_index_steps)
      *  for n valid elements with null value interleaved in a BitPacked group,
-     *  i-th item in valid_index_steps describes how many elements in column there are after (i-1)-th valid element.
+     *  i-th item in valid_index_steps describes how many elements there are
+     *  from i-th valid element (include) to (i+1)-th valid element (exclude).
      *
      *  take following BitPacked group with 2 valid elements for example:
      *      null valid null null valid null
@@ -138,10 +139,16 @@ public:
 using ParquetDataValuesReaderPtr = std::unique_ptr<ParquetDataValuesReader>;
 
 
+enum class ParquetReaderTypes
+{
+    Normal,
+    TimestampInt96,
+};
+
 /**
  * The definition level is RLE or BitPacked encoding, while data is read directly
  */
-template <typename TColumn>
+template <typename TColumn, ParquetReaderTypes reader_type = ParquetReaderTypes::Normal>
 class ParquetPlainValuesReader : public ParquetDataValuesReader
 {
 public:
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
index 52dfad7606a..9e1cae9bb65 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetLeafColReader.cpp
@@ -110,16 +110,24 @@ ColumnPtr readDictPage<ColumnString>(
 template <>
 ColumnPtr readDictPage<ColumnDecimal<DateTime64>>(
     const parquet::DictionaryPage & page,
-    const parquet::ColumnDescriptor & /* col_des */,
+    const parquet::ColumnDescriptor & col_des,
     const DataTypePtr & data_type)
 {
+
     const auto & datetime_type = assert_cast<const DataTypeDateTime64 &>(*data_type);
     auto dict_col = ColumnDecimal<DateTime64>::create(page.num_values(), datetime_type.getScale());
     auto * col_data = dict_col->getData().data();
     ParquetDataBuffer buffer(page.data(), page.size(), datetime_type.getScale());
-    for (auto i = 0; i < page.num_values(); i++)
+    if (col_des.physical_type() == parquet::Type::INT64)
     {
-        buffer.readDateTime64(col_data[i]);
+        buffer.readBytes(dict_col->getData().data(), page.num_values() * sizeof(Int64));
+    }
+    else
+    {
+        for (auto i = 0; i < page.num_values(); i++)
+        {
+            buffer.readDateTime64FromInt96(col_data[i]);
+        }
     }
     return dict_col;
 }
@@ -190,8 +198,12 @@ std::unique_ptr<ParquetDataValuesReader> createPlainReader(
     RleValuesReaderPtr def_level_reader,
     ParquetDataBuffer buffer)
 {
-    return std::make_unique<ParquetPlainValuesReader<TColumn>>(
-        col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer));
+    if (std::is_same_v<TColumn, ColumnDecimal<DateTime64>> && col_des.physical_type() == parquet::Type::INT96)
+        return std::make_unique<ParquetPlainValuesReader<TColumn, ParquetReaderTypes::TimestampInt96>>(
+            col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer));
+    else
+        return std::make_unique<ParquetPlainValuesReader<TColumn>>(
+            col_des.max_definition_level(), std::move(def_level_reader), std::move(buffer));
 }
 
 
@@ -287,6 +299,7 @@ void ParquetLeafColReader<TColumn>::degradeDictionary()
     null_map = std::make_unique<LazyNullMap>(reading_rows_num);
     auto col_existing = std::move(column);
     column = ColumnString::create();
+    reserveColumnStrRows(column, reading_rows_num);
 
     ColumnString & col_dest = *static_cast<ColumnString *>(column.get());
     const ColumnString & col_dict_str = *static_cast<const ColumnString *>(dictionary.get());
@@ -294,8 +307,9 @@ void ParquetLeafColReader<TColumn>::degradeDictionary()
     visitColStrIndexType(dictionary->size(), [&]<typename TColVec>(TColVec *)
     {
         const TColVec & col_src = *static_cast<const TColVec *>(col_existing.get());
-        reserveColumnStrRows(column, reading_rows_num);
 
+        // It will be easier to create a ColumnLowCardinality and call convertToFullColumn() on it,
+        // while the performance loss is ignorable, the implementation can be updated next time.
         col_dest.getOffsets().resize(col_src.size());
         for (size_t i = 0; i < col_src.size(); i++)
         {
@@ -378,6 +392,11 @@ void ParquetLeafColReader<TColumn>::readPage()
             LOG_DEBUG(log, "{} values in dictionary page of column {}", dict_page.num_values(), col_descriptor.name());
 
             dictionary = readDictPage<TColumn>(dict_page, col_descriptor, base_data_type);
+            if (unlikely(dictionary->size() < 2))
+            {
+                // must not small than ColumnUnique<ColumnString>::numSpecialValues()
+                dictionary->assumeMutable()->insertManyDefaults(2);
+            }
             if (std::is_same_v<TColumn, ColumnString>)
             {
                 reading_low_cardinality = true;
@@ -508,7 +527,9 @@ std::unique_ptr<ParquetDataValuesReader> ParquetLeafColReader<TColumn>::createDi
 
 
 template class ParquetLeafColReader<ColumnInt32>;
+template class ParquetLeafColReader<ColumnUInt32>;
 template class ParquetLeafColReader<ColumnInt64>;
+template class ParquetLeafColReader<ColumnUInt64>;
 template class ParquetLeafColReader<ColumnFloat32>;
 template class ParquetLeafColReader<ColumnFloat64>;
 template class ParquetLeafColReader<ColumnString>;
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 9cde433b983..fddd8059925 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -36,8 +36,7 @@ namespace ErrorCodes
         try { (s); }                                                          \
         catch (const ::parquet::ParquetException & e)                         \
         {                                                                     \
-            auto msg = PreformattedMessage::create("Excepted when reading parquet: {}", e.what()); \
-            throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION);   \
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "Parquet exception: {}", e.what());   \
         }                                                                     \
     } while (false)
 
@@ -45,102 +44,252 @@ namespace
 {
 
 std::unique_ptr<parquet::ParquetFileReader> createFileReader(
-    std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file)
+    std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file,
+    std::shared_ptr<parquet::FileMetaData> metadata = nullptr)
 {
     std::unique_ptr<parquet::ParquetFileReader> res;
-    THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open(std::move(arrow_file)));
+    THROW_PARQUET_EXCEPTION(res = parquet::ParquetFileReader::Open(
+            std::move(arrow_file),
+            parquet::default_reader_properties(),
+            metadata));
     return res;
 }
 
-std::unique_ptr<ParquetColumnReader> createColReader(
-    const parquet::ColumnDescriptor & col_descriptor,
-    DataTypePtr ch_type,
-    std::unique_ptr<parquet::ColumnChunkMetaData> meta,
-    std::unique_ptr<parquet::PageReader> reader)
+class ColReaderFactory
 {
-    if (col_descriptor.logical_type()->is_date() && parquet::Type::INT32 == col_descriptor.physical_type())
+public:
+    ColReaderFactory(
+        const parquet::ArrowReaderProperties & reader_properties_,
+        const parquet::ColumnDescriptor & col_descriptor_,
+        DataTypePtr ch_type_,
+        std::unique_ptr<parquet::ColumnChunkMetaData> meta_,
+        std::unique_ptr<parquet::PageReader> page_reader_)
+        : reader_properties(reader_properties_)
+        , col_descriptor(col_descriptor_)
+        , ch_type(std::move(ch_type_))
+        , meta(std::move(meta_))
+        , page_reader(std::move(page_reader_)) {}
+
+    std::unique_ptr<ParquetColumnReader> makeReader();
+
+private:
+    const parquet::ArrowReaderProperties & reader_properties;
+    const parquet::ColumnDescriptor & col_descriptor;
+    DataTypePtr ch_type;
+    std::unique_ptr<parquet::ColumnChunkMetaData> meta;
+    std::unique_ptr<parquet::PageReader> page_reader;
+
+
+    UInt32 getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit);
+    UInt32 getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit);
+
+    std::unique_ptr<ParquetColumnReader> fromInt32();
+    std::unique_ptr<ParquetColumnReader> fromInt64();
+    std::unique_ptr<ParquetColumnReader> fromByteArray();
+    std::unique_ptr<ParquetColumnReader> fromFLBA();
+
+    std::unique_ptr<ParquetColumnReader> fromInt32INT(const parquet::IntLogicalType & int_type);
+    std::unique_ptr<ParquetColumnReader> fromInt64INT(const parquet::IntLogicalType & int_type);
+
+    template<class DataType>
+    auto makeLeafReader()
     {
-        return std::make_unique<ParquetLeafColReader<ColumnInt32>>(
-            col_descriptor, std::make_shared<DataTypeDate32>(), std::move(meta), std::move(reader));
+        return std::make_unique<ParquetLeafColReader<typename DataType::ColumnType>>(
+            col_descriptor, std::make_shared<DataType>(), std::move(meta), std::move(page_reader));
     }
-    else if (col_descriptor.logical_type()->is_decimal())
+
+    template<class DecimalType>
+    auto makeDecimalLeafReader()
     {
-        switch (col_descriptor.physical_type())
+        auto data_type = std::make_shared<DataTypeDecimal<DecimalType>>(
+            col_descriptor.type_precision(), col_descriptor.type_scale());
+        return std::make_unique<ParquetLeafColReader<ColumnDecimal<DecimalType>>>(
+            col_descriptor, std::move(data_type), std::move(meta), std::move(page_reader));
+    }
+
+    std::unique_ptr<ParquetColumnReader> throwUnsupported(std::string msg = "")
+    {
+        throw Exception(
+            ErrorCodes::PARQUET_EXCEPTION,
+            "Unsupported logical type: {} and physical type: {} for field =={}=={}",
+            col_descriptor.logical_type()->ToString(), col_descriptor.physical_type(), col_descriptor.name(), msg);
+    }
+};
+
+UInt32 ColReaderFactory::getScaleFromLogicalTimestamp(parquet::LogicalType::TimeUnit::unit tm_unit)
+{
+    switch (tm_unit)
+    {
+        case parquet::LogicalType::TimeUnit::MILLIS:
+            return 3;
+        case parquet::LogicalType::TimeUnit::MICROS:
+            return 6;
+        case parquet::LogicalType::TimeUnit::NANOS:
+            return 9;
+        default:
+            throwUnsupported(PreformattedMessage::create(", invalid timestamp unit: {}", tm_unit));
+            return 0;
+    }
+}
+
+UInt32 ColReaderFactory::getScaleFromArrowTimeUnit(arrow::TimeUnit::type tm_unit)
+{
+    switch (tm_unit)
+    {
+        case arrow::TimeUnit::MILLI:
+            return 3;
+        case arrow::TimeUnit::MICRO:
+            return 6;
+        case arrow::TimeUnit::NANO:
+            return 9;
+        default:
+            throwUnsupported(PreformattedMessage::create(", invalid arrow time unit: {}", tm_unit));
+            return 0;
+    }
+}
+
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt32()
+{
+    switch (col_descriptor.logical_type()->type())
+    {
+        case parquet::LogicalType::Type::INT:
+            return fromInt32INT(dynamic_cast<const parquet::IntLogicalType &>(*col_descriptor.logical_type()));
+        case parquet::LogicalType::Type::NONE:
+            return makeLeafReader<DataTypeInt32>();
+        case parquet::LogicalType::Type::DATE:
+            return makeLeafReader<DataTypeDate32>();
+        case parquet::LogicalType::Type::DECIMAL:
+            return makeDecimalLeafReader<Decimal32>();
+        default:
+            return throwUnsupported();
+    }
+}
+
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt64()
+{
+    switch (col_descriptor.logical_type()->type())
+    {
+        case parquet::LogicalType::Type::INT:
+            return fromInt64INT(dynamic_cast<const parquet::IntLogicalType &>(*col_descriptor.logical_type()));
+        case parquet::LogicalType::Type::NONE:
+            return makeLeafReader<DataTypeInt64>();
+        case parquet::LogicalType::Type::TIMESTAMP:
         {
-            case parquet::Type::INT32:
-            {
-                auto data_type = std::make_shared<DataTypeDecimal32>(
-                    col_descriptor.type_precision(), col_descriptor.type_scale());
-                return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal32>>>(
-                    col_descriptor, data_type, std::move(meta), std::move(reader));
-            }
-            case parquet::Type::INT64:
-            {
-                auto data_type = std::make_shared<DataTypeDecimal64>(
-                    col_descriptor.type_precision(), col_descriptor.type_scale());
-                return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal64>>>(
-                    col_descriptor, data_type, std::move(meta), std::move(reader));
-            }
-            case parquet::Type::FIXED_LEN_BYTE_ARRAY:
-            {
-                if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal128)))
-                {
-                    auto data_type = std::make_shared<DataTypeDecimal128>(
-                        col_descriptor.type_precision(), col_descriptor.type_scale());
-                    return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal128>>>(
-                        col_descriptor, data_type, std::move(meta), std::move(reader));
-                }
-                else
-                {
-                    auto data_type = std::make_shared<DataTypeDecimal256>(
-                        col_descriptor.type_precision(), col_descriptor.type_scale());
-                    return std::make_unique<ParquetLeafColReader<ColumnDecimal<Decimal256>>>(
-                        col_descriptor, data_type, std::move(meta), std::move(reader));
-                }
-            }
-            default:
-                throw Exception(
-                    ErrorCodes::PARQUET_EXCEPTION,
-                    "Type not supported for decimal: {}",
-                    col_descriptor.physical_type());
+            const auto & tm_type = dynamic_cast<const parquet::TimestampLogicalType &>(*col_descriptor.logical_type());
+            auto read_type = std::make_shared<DataTypeDateTime64>(getScaleFromLogicalTimestamp(tm_type.time_unit()));
+            return std::make_unique<ParquetLeafColReader<ColumnDecimal<DateTime64>>>(
+                col_descriptor, std::move(read_type), std::move(meta), std::move(page_reader));
         }
+        case parquet::LogicalType::Type::DECIMAL:
+            return makeDecimalLeafReader<Decimal64>();
+        default:
+            return throwUnsupported();
     }
-    else
+}
+
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromByteArray()
+{
+    switch (col_descriptor.logical_type()->type())
     {
-        switch (col_descriptor.physical_type())
-        {
-            case parquet::Type::INT32:
-                return std::make_unique<ParquetLeafColReader<ColumnInt32>>(
-                    col_descriptor, std::make_shared<DataTypeInt32>(), std::move(meta), std::move(reader));
-            case parquet::Type::INT64:
-                return std::make_unique<ParquetLeafColReader<ColumnInt64>>(
-                    col_descriptor, std::make_shared<DataTypeInt64>(), std::move(meta), std::move(reader));
-            case parquet::Type::FLOAT:
-                return std::make_unique<ParquetLeafColReader<ColumnFloat32>>(
-                    col_descriptor, std::make_shared<DataTypeFloat32>(), std::move(meta), std::move(reader));
-            case parquet::Type::INT96:
-            {
-                DataTypePtr read_type = ch_type;
-                if (!isDateTime64(ch_type))
-                {
-                    read_type = std::make_shared<DataTypeDateTime64>(ParquetRecordReader::default_datetime64_scale);
-                }
-                return std::make_unique<ParquetLeafColReader<ColumnDecimal<DateTime64>>>(
-                    col_descriptor, read_type, std::move(meta), std::move(reader));
-            }
-            case parquet::Type::DOUBLE:
-                return std::make_unique<ParquetLeafColReader<ColumnFloat64>>(
-                    col_descriptor, std::make_shared<DataTypeFloat64>(), std::move(meta), std::move(reader));
-            case parquet::Type::BYTE_ARRAY:
-                return std::make_unique<ParquetLeafColReader<ColumnString>>(
-                    col_descriptor, std::make_shared<DataTypeString>(), std::move(meta), std::move(reader));
-            default:
-                throw Exception(
-                    ErrorCodes::PARQUET_EXCEPTION, "Type not supported: {}", col_descriptor.physical_type());
-        }
+        case parquet::LogicalType::Type::STRING:
+            return makeLeafReader<DataTypeString>();
+        default:
+            return throwUnsupported();
     }
 }
 
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromFLBA()
+{
+    switch (col_descriptor.logical_type()->type())
+    {
+        case parquet::LogicalType::Type::DECIMAL:
+        {
+            if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal128)))
+                return makeDecimalLeafReader<Decimal128>();
+            else if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal256)))
+                return makeDecimalLeafReader<Decimal256>();
+
+            return throwUnsupported(PreformattedMessage::create(
+                ", invalid type length: {}", col_descriptor.type_length()));
+        }
+        default:
+            return throwUnsupported();
+    }
+}
+
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt32INT(const parquet::IntLogicalType & int_type)
+{
+    switch (int_type.bit_width())
+    {
+        case sizeof(Int32):
+        {
+            if (int_type.is_signed())
+                return makeLeafReader<DataTypeInt32>();
+            else
+                return makeLeafReader<DataTypeUInt32>();
+        }
+        default:
+            return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width()));
+    }
+}
+
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt64INT(const parquet::IntLogicalType & int_type)
+{
+    switch (int_type.bit_width())
+    {
+        case sizeof(Int64):
+        {
+            if (int_type.is_signed())
+                return makeLeafReader<DataTypeInt64>();
+            else
+                return makeLeafReader<DataTypeUInt64>();
+        }
+        default:
+            return throwUnsupported(PreformattedMessage::create(", bit width: {}", int_type.bit_width()));
+    }
+}
+
+// refer: GetArrowType method in schema_internal.cc of arrow
+std::unique_ptr<ParquetColumnReader> ColReaderFactory::makeReader()
+{
+    // this method should to be called only once for each instance
+    SCOPE_EXIT({ page_reader = nullptr; });
+    assert(page_reader);
+
+    switch (col_descriptor.physical_type())
+    {
+        case parquet::Type::BOOLEAN:
+            break;
+        case parquet::Type::INT32:
+            return fromInt32();
+        case parquet::Type::INT64:
+            return fromInt64();
+        case parquet::Type::INT96:
+        {
+            DataTypePtr read_type = ch_type;
+            if (!isDateTime64(ch_type))
+            {
+                auto scale = getScaleFromArrowTimeUnit(reader_properties.coerce_int96_timestamp_unit());
+                read_type = std::make_shared<DataTypeDateTime64>(scale);
+            }
+            return std::make_unique<ParquetLeafColReader<ColumnDecimal<DateTime64>>>(
+                col_descriptor, read_type, std::move(meta), std::move(page_reader));
+        }
+        case parquet::Type::FLOAT:
+            return makeLeafReader<DataTypeFloat32>();
+        case parquet::Type::DOUBLE:
+            return makeLeafReader<DataTypeFloat64>();
+        case parquet::Type::BYTE_ARRAY:
+            return fromByteArray();
+        case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+            return fromFLBA();
+        default:
+            break;
+    }
+
+    return throwUnsupported();
+}
+
 } // anonymous namespace
 
 ParquetRecordReader::ParquetRecordReader(
@@ -148,8 +297,9 @@ ParquetRecordReader::ParquetRecordReader(
     parquet::ArrowReaderProperties reader_properties_,
     std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file,
     const FormatSettings & format_settings,
-    std::vector<int> row_groups_indices_)
-    : file_reader(createFileReader(std::move(arrow_file)))
+    std::vector<int> row_groups_indices_,
+    std::shared_ptr<parquet::FileMetaData> metadata)
+    : file_reader(createFileReader(std::move(arrow_file), std::move(metadata)))
     , reader_properties(reader_properties_)
     , header(std::move(header_))
     , max_block_size(format_settings.parquet.max_block_size)
@@ -210,15 +360,17 @@ void ParquetRecordReader::loadNextRowGroup()
     column_readers.clear();
     for (size_t i = 0; i < parquet_col_indice.size(); i++)
     {
-        column_readers.emplace_back(createColReader(
+        ColReaderFactory factory(
+            reader_properties,
             *file_reader->metadata()->schema()->Column(parquet_col_indice[i]),
             header.getByPosition(i).type,
             cur_row_group_reader->metadata()->ColumnChunk(parquet_col_indice[i]),
-            cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i])));
+            cur_row_group_reader->GetColumnPageReader(parquet_col_indice[i]));
+        column_readers.emplace_back(factory.makeReader());
     }
 
     auto duration = watch.elapsedNanoseconds() / 1e6;
-    LOG_DEBUG(log, "reading row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration);
+    LOG_DEBUG(log, "begin to read row group {} consumed {} ms", row_groups_indices[next_row_group_idx], duration);
 
     ++next_row_group_idx;
     cur_row_group_left_rows = cur_row_group_reader->metadata()->num_rows();
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
index 4789be59ec8..2f728a586a0 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.h
@@ -22,13 +22,11 @@ public:
         parquet::ArrowReaderProperties reader_properties_,
         std::shared_ptr<::arrow::io::RandomAccessFile> arrow_file,
         const FormatSettings & format_settings,
-        std::vector<int> row_groups_indices_);
+        std::vector<int> row_groups_indices_,
+        std::shared_ptr<parquet::FileMetaData> metadata = nullptr);
 
     Chunk readChunk();
 
-    // follow the scale generated by spark
-    static constexpr UInt8 default_datetime64_scale = 9;
-
 private:
     std::unique_ptr<parquet::ParquetFileReader> file_reader;
     parquet::ArrowReaderProperties reader_properties;
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 2e849f09fda..7fc7b9c3cab 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -3,6 +3,7 @@
 
 #if USE_PARQUET
 
+#include <Common/logger_useful.h>
 #include <Common/ThreadPool.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/SchemaInferenceUtils.h>
@@ -623,6 +624,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un
             return;
         }
 
+        // TODO support defaults_for_omitted_fields feature when supporting nested columns
         auto num_rows = chunk.getNumRows();
         res = get_pending_chunk(num_rows, std::move(chunk));
     }
diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.sh b/tests/queries/0_stateless/02998_native_parquet_reader.sh
index 4e5169c4bf0..d6369c4921b 100755
--- a/tests/queries/0_stateless/02998_native_parquet_reader.sh
+++ b/tests/queries/0_stateless/02998_native_parquet_reader.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh
 
-PAR_PATH="$CURDIR"/02998_native_parquet_reader.parquet
+PAR_PATH="$CURDIR"/data_parquet/native_parquet_reader.parquet
 # the content of parquet file can be generated by following codes
 # <<EndOfCodes
 # 
@@ -208,4 +208,4 @@ CH_SCHEMA="\
 QUERY="SELECT * from file('$PAR_PATH', 'Parquet', '$CH_SCHEMA')"
 
 # there may be more than on group in parquet files, unstable results may generated by multithreads
-$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY"
+$CLICKHOUSE_LOCAL --multiquery --max_threads 1 --max_parsing_threads 1 --input_format_parquet_use_native_reader true --query "$QUERY"
diff --git a/tests/queries/0_stateless/02998_native_parquet_reader.parquet b/tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet
similarity index 100%
rename from tests/queries/0_stateless/02998_native_parquet_reader.parquet
rename to tests/queries/0_stateless/data_parquet/native_parquet_reader.parquet

From 047dd03f430527ffbe84980eeda8656dc7e372d6 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 15 May 2024 19:27:15 +0200
Subject: [PATCH 138/392] Fix special build

---
 src/Columns/ColumnDynamic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index c6626433877..40e8e350733 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -137,7 +137,7 @@ public:
 
     void insertData(const char * pos, size_t length) override
     {
-        return variant_column->insertData(pos, length);
+        variant_column->insertData(pos, length);
     }
 
     void insert(const Field & x) override;

From 4cfe2665de328a7a7921e0b0a76ddf9b4e2d5486 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 15 May 2024 20:28:17 +0200
Subject: [PATCH 139/392] Update src/Formats/FormatSettings.h

---
 src/Formats/FormatSettings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 004b16b6061..bf3269bd42d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -363,7 +363,7 @@ struct FormatSettings
         bool skip_trailing_empty_lines = false;
         bool allow_variable_number_of_columns = false;
         bool crlf_end_of_line_input = false;
-    } tsv;
+    } tsv{};
 
     struct
     {

From 04fb84d4ade10df2a4fc9f6cb6f94ac4993d1ffd Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Wed, 15 May 2024 21:57:15 +0200
Subject: [PATCH 140/392] Update src/Core/SettingsChangesHistory.h

Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
---
 src/Core/SettingsChangesHistory.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 6edfcc129f8..e004e83355b 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -91,13 +91,13 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."},
               {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
-              {"hdfs_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"},
-              {"azure_throw_on_zero_files_match", false, false, "Throw an error, when ListObjects request cannot match any files"},
-              {"s3_validate_request_settings", true, true, "Validate S3 request settings"},
+              {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
+              {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
+              {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
               {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
-              {"hdfs_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageHDFS"},
-              {"azure_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageAzureBlob"},
-              {"s3_ignore_file_doesnt_exist", false, false, "Ignore if files does not exits and return 0 zeros for StorageS3"},
+              {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
+              {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
+              {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
               }},

From a63e846724f503607fe38b34fda37345ee8111c5 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 15 May 2024 22:13:48 +0200
Subject: [PATCH 141/392] Review fixes

---
 docs/en/operations/settings/settings.md       | 20 +++++++++++++++++++
 .../StorageObjectStorageSink.cpp              |  2 +-
 .../ObjectStorage/StorageObjectStorageSink.h  |  2 +-
 src/Storages/S3Queue/S3QueueSource.cpp        |  4 ++--
 .../TableFunctionObjectStorage.cpp            |  5 ++---
 .../TableFunctionObjectStorage.h              | 10 ++++++++--
 .../TableFunctionObjectStorageCluster.h       | 19 ++++++++++--------
 7 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 1772a3aa861..3a79eb64c67 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -3675,6 +3675,16 @@ Possible values:
 
 Default value: `0`.
 
+## s3_validate_request_settings {#s3_validate_request_settings}
+
+Enables s3 request settings validation.
+
+Possible values:
+- 1 — validate settings.
+- 0 — do not validate settings.
+
+Default value: `1`.
+
 ## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
 
 Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
@@ -3747,6 +3757,16 @@ Possible values:
 
 Default value: `0`.
 
+## azure_skip_empty_files {#azure_skip_empty_files}
+
+Enables or disables skipping empty files in S3 engine.
+
+Possible values:
+- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
+- 1 — `SELECT` returns empty result for empty file.
+
+Default value: `0`.
+
 ## engine_url_skip_empty_files {#engine_url_skip_empty_files}
 
 Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
index 81bdeaa43a3..0a3cf19a590 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp
@@ -15,7 +15,7 @@ namespace ErrorCodes
 StorageObjectStorageSink::StorageObjectStorageSink(
     ObjectStoragePtr object_storage,
     ConfigurationPtr configuration,
-    std::optional<FormatSettings> format_settings_,
+    const std::optional<FormatSettings> & format_settings_,
     const Block & sample_block_,
     ContextPtr context,
     const std::string & blob_path)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
index a3c8ef68cf0..45cf83d606f 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h
@@ -13,7 +13,7 @@ public:
     StorageObjectStorageSink(
         ObjectStoragePtr object_storage,
         ConfigurationPtr configuration,
-        std::optional<FormatSettings> format_settings_,
+        const std::optional<FormatSettings> & format_settings_,
         const Block & sample_block_,
         ContextPtr context,
         const std::string & blob_path = "");
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 0cee94769c4..458f681d7b5 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -235,7 +235,7 @@ Chunk StorageS3QueueSource::generate()
                 catch (...)
                 {
                     LOG_ERROR(log, "Failed to set file {} as failed: {}",
-                             key_with_info->key, getCurrentExceptionMessage(true));
+                             key_with_info->relative_path, getCurrentExceptionMessage(true));
                 }
 
                 appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
@@ -262,7 +262,7 @@ Chunk StorageS3QueueSource::generate()
                 catch (...)
                 {
                     LOG_ERROR(log, "Failed to set file {} as failed: {}",
-                             key_with_info->key, getCurrentExceptionMessage(true));
+                              key_with_info->relative_path, getCurrentExceptionMessage(true));
                 }
 
                 appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
diff --git a/src/TableFunctions/TableFunctionObjectStorage.cpp b/src/TableFunctions/TableFunctionObjectStorage.cpp
index 9f16a9a0b25..550d9cc799b 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.cpp
+++ b/src/TableFunctions/TableFunctionObjectStorage.cpp
@@ -116,9 +116,8 @@ StoragePtr TableFunctionObjectStorage<Definition, Configuration>::executeImpl(
         columns,
         ConstraintsDescription{},
         String{},
-        /// No format_settings for table function Azure
-        std::nullopt,
-        /* distributed_processing */ false,
+        /* format_settings */std::nullopt,
+        /* distributed_processing */false,
         nullptr);
 
     storage->startup();
diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h
index bbc40cc6191..86b8f0d5e14 100644
--- a/src/TableFunctions/TableFunctionObjectStorage.h
+++ b/src/TableFunctions/TableFunctionObjectStorage.h
@@ -32,6 +32,7 @@ struct AzureDefinition
                                       " - storage_account_url, container_name, blobpath, account_name, account_key, format\n"
                                       " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression\n"
                                       " - storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure\n";
+    static constexpr auto max_number_of_arguments = 8;
 };
 
 struct S3Definition
@@ -51,6 +52,7 @@ struct S3Definition
                                       " - url, access_key_id, secret_access_key, format, structure, compression_method\n"
                                       " - url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
                                       "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
+    static constexpr auto max_number_of_arguments = 8;
 };
 
 struct GCSDefinition
@@ -58,6 +60,7 @@ struct GCSDefinition
     static constexpr auto name = "gcs";
     static constexpr auto storage_type_name = "GCS";
     static constexpr auto signature = S3Definition::signature;
+    static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments;
 };
 
 struct COSNDefinition
@@ -65,6 +68,7 @@ struct COSNDefinition
     static constexpr auto name = "cosn";
     static constexpr auto storage_type_name = "COSN";
     static constexpr auto signature = S3Definition::signature;
+    static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments;
 };
 
 struct OSSDefinition
@@ -72,6 +76,7 @@ struct OSSDefinition
     static constexpr auto name = "oss";
     static constexpr auto storage_type_name = "OSS";
     static constexpr auto signature = S3Definition::signature;
+    static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments;
 };
 
 struct HDFSDefinition
@@ -82,6 +87,7 @@ struct HDFSDefinition
                                       " - uri, format\n"
                                       " - uri, format, structure\n"
                                       " - uri, format, structure, compression_method\n";
+    static constexpr auto max_number_of_arguments = 4;
 };
 
 template <typename Definition, typename Configuration>
@@ -91,7 +97,7 @@ public:
     static constexpr auto name = Definition::name;
     static constexpr auto signature = Definition::signature;
 
-    static size_t getMaxNumberOfArguments() { return 8; }
+    static size_t getMaxNumberOfArguments() { return Definition::max_number_of_arguments; }
 
     String getName() const override { return name; }
 
@@ -105,7 +111,7 @@ public:
 
     bool supportsReadingSubsetOfColumns(const ContextPtr & context) override
     {
-        return FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
+        return configuration->format != "auto" && FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(configuration->format, context);
     }
 
     std::unordered_set<String> getVirtualsToCheckBeforeUsingStructureHint() const override
diff --git a/src/TableFunctions/TableFunctionObjectStorageCluster.h b/src/TableFunctions/TableFunctionObjectStorageCluster.h
index 76786fafe99..296791b8bda 100644
--- a/src/TableFunctions/TableFunctionObjectStorageCluster.h
+++ b/src/TableFunctions/TableFunctionObjectStorageCluster.h
@@ -17,17 +17,10 @@ class StorageAzureConfiguration;
 
 struct AzureClusterDefinition
 {
-    /**
-    * azureBlobStorageCluster(cluster_name, source, [access_key_id, secret_access_key,] format, compression_method, structure)
-    * A table function, which allows to process many files from Azure Blob Storage on a specific cluster
-    * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
-    * in Azure Blob Storage file path and dispatch each file dynamically.
-    * On worker node it asks initiator about next task to process, processes it.
-    * This is repeated until the tasks are finished.
-    */
     static constexpr auto name = "azureBlobStorageCluster";
     static constexpr auto storage_type_name = "AzureBlobStorageCluster";
     static constexpr auto signature = " - cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]";
+    static constexpr auto max_number_of_arguments = AzureDefinition::max_number_of_arguments + 1;
 };
 
 struct S3ClusterDefinition
@@ -44,6 +37,7 @@ struct S3ClusterDefinition
                                       " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n"
                                       " - cluster, url, access_key_id, secret_access_key, session_token, format, structure, compression_method\n"
                                       "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)";
+    static constexpr auto max_number_of_arguments = S3Definition::max_number_of_arguments + 1;
 };
 
 struct HDFSClusterDefinition
@@ -54,8 +48,17 @@ struct HDFSClusterDefinition
                                       " - cluster_name, uri, format\n"
                                       " - cluster_name, uri, format, structure\n"
                                       " - cluster_name, uri, format, structure, compression_method\n";
+    static constexpr auto max_number_of_arguments = HDFSDefinition::max_number_of_arguments + 1;
 };
 
+/**
+* Class implementing s3/hdfs/azureBlobStorage)Cluster(...) table functions,
+* which allow to process many files from S3/HDFS/Azure blob storage on a specific cluster.
+* On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks
+* in file path and dispatch each file dynamically.
+* On worker node it asks initiator about next task to process, processes it.
+* This is repeated until the tasks are finished.
+*/
 template <typename Definition, typename Configuration>
 class TableFunctionObjectStorageCluster : public ITableFunctionCluster<TableFunctionObjectStorage<Definition, Configuration>>
 {

From f19615788bf05be3440cddf552d0bf51e33cbc5c Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 15 May 2024 22:37:33 +0000
Subject: [PATCH 142/392] Fix special build

---
 src/Columns/ColumnDynamic.cpp                       | 6 +++---
 src/DataTypes/DataTypeDynamic.cpp                   | 2 +-
 src/DataTypes/Serializations/SerializationDynamic.h | 2 +-
 src/Parsers/ParserDataType.cpp                      | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index 0f247638d92..d63a03dbafd 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -227,7 +227,7 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
     auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
 
     /// If variants are different, we need to extend our variant with new variants.
-    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
     {
         variant_col.insertFrom(*dynamic_src.variant_column, n, *global_discriminators_mapping);
         return;
@@ -281,7 +281,7 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size
     auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
 
     /// If variants are different, we need to extend our variant with new variants.
-    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
     {
         variant_col.insertRangeFrom(*dynamic_src.variant_column, start, length, *global_discriminators_mapping);
         return;
@@ -443,7 +443,7 @@ void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, si
     auto & variant_col = assert_cast<ColumnVariant &>(*variant_column);
 
     /// If variants are different, we need to extend our variant with new variants.
-    if (auto global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
+    if (auto * global_discriminators_mapping = combineVariants(dynamic_src.variant_info))
     {
         variant_col.insertManyFrom(*dynamic_src.variant_column, position, length, *global_discriminators_mapping);
         return;
diff --git a/src/DataTypes/DataTypeDynamic.cpp b/src/DataTypes/DataTypeDynamic.cpp
index 2c6b3eba906..c920e69c13b 100644
--- a/src/DataTypes/DataTypeDynamic.cpp
+++ b/src/DataTypes/DataTypeDynamic.cpp
@@ -67,7 +67,7 @@ static DataTypePtr create(const ASTPtr & arguments)
     if (identifier_name != "max_types")
         throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected identifier: {}. Dynamic data type argument should be in a form 'max_types=N'", identifier_name);
 
-    auto literal = argument->arguments->children[1]->as<ASTLiteral>();
+    auto * literal = argument->arguments->children[1]->as<ASTLiteral>();
 
     if (!literal || literal->value.getType() != Field::Types::UInt64 || literal->value.get<UInt64>() == 0 || literal->value.get<UInt64>() > 255)
         throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "'max_types' argument for Dynamic type should be a positive integer between 1 and 255");
diff --git a/src/DataTypes/Serializations/SerializationDynamic.h b/src/DataTypes/Serializations/SerializationDynamic.h
index 7471ff54cf7..001a3cf87ce 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.h
+++ b/src/DataTypes/Serializations/SerializationDynamic.h
@@ -11,7 +11,7 @@ class SerializationDynamicElement;
 class SerializationDynamic : public ISerialization
 {
 public:
-    SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
+    explicit SerializationDynamic(size_t max_dynamic_types_) : max_dynamic_types(max_dynamic_types_)
     {
     }
 
diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp
index c88b5e0e3a2..78d62456fcf 100644
--- a/src/Parsers/ParserDataType.cpp
+++ b/src/Parsers/ParserDataType.cpp
@@ -55,7 +55,7 @@ private:
 class ParserDataTypeArgument : public IParserBase
 {
 public:
-    ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_)
+    explicit ParserDataTypeArgument(std::string_view type_name_) : type_name(type_name_)
     {
     }
 

From d7f95ddfcf1c6b0f25c273615caf4be42986778c Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 16 May 2024 13:16:01 +0200
Subject: [PATCH 143/392] CI: Enable Arm integration tests job in CI

---
 .github/PULL_REQUEST_TEMPLATE.md |   9 +-
 tests/ci/ci_config.py            | 139 +++++++++++++++++--------------
 2 files changed, 81 insertions(+), 67 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 3e0131a388a..64dc9049bc2 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -42,25 +42,25 @@ At a minimum, the following information should be added (but add more as needed)
 > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/
 
 <details>
-    <summary>Modify your CI run</summary>
+    <summary>CI Settings</summary>
 
 **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
 **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
 
-#### Include tests (required builds will be added automatically):
-- [ ] <!---ci_include_fast--> Fast test
+#### Run these jobs only (required builds will be added automatically):
 - [ ] <!---ci_include_integration--> Integration Tests
 - [ ] <!---ci_include_stateless--> Stateless tests
 - [ ] <!---ci_include_stateful--> Stateful tests
 - [ ] <!---ci_include_unit--> Unit tests
 - [ ] <!---ci_include_performance--> Performance tests
+- [ ] <!---ci_include_aarch64--> All with aarch64
 - [ ] <!---ci_include_asan--> All with ASAN
 - [ ] <!---ci_include_tsan--> All with TSAN
 - [ ] <!---ci_include_analyzer--> All with Analyzer
 - [ ] <!---ci_include_azure --> All with Azure
 - [ ] <!---ci_include_KEYWORD--> Add your option here
 
-#### Exclude tests:
+#### Deny these jobs:
 - [ ] <!---ci_exclude_fast--> Fast test
 - [ ] <!---ci_exclude_integration--> Integration Tests
 - [ ] <!---ci_exclude_stateless--> Stateless tests
@@ -72,7 +72,6 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---ci_exclude_ubsan--> All with UBSAN
 - [ ] <!---ci_exclude_coverage--> All with Coverage
 - [ ] <!---ci_exclude_aarch64--> All with Aarch64
-- [ ] <!---ci_exclude_KEYWORD--> Add your option here
 
 #### Extra options:
 - [ ] <!---do_not_test--> do not test (only style check)
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 588f4934125..84041b8782f 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -448,9 +448,9 @@ bugfix_validate_check = DigestConfig(
     ],
     exclude_files=[".md"],
     docker=IMAGES.copy()
-    + [
-        "clickhouse/stateless-test",
-    ],
+           + [
+               "clickhouse/stateless-test",
+           ],
 )
 # common test params
 docker_server_job_config = JobConfig(
@@ -570,7 +570,7 @@ class CIConfig:
         if self.is_build_job(job_name):
             stage_type = CIStages.BUILDS_1
             if job_name in CI_CONFIG.get_builds_for_report(
-                JobNames.BUILD_CHECK_SPECIAL
+                    JobNames.BUILD_CHECK_SPECIAL
             ):
                 # special builds go to Build_2 stage to not delay Builds_1/Test_1
                 stage_type = CIStages.BUILDS_2
@@ -584,7 +584,7 @@ class CIConfig:
                 required_build = CI_CONFIG.test_configs[job_name].required_build
                 assert required_build
                 if required_build in CI_CONFIG.get_builds_for_report(
-                    JobNames.BUILD_CHECK
+                        JobNames.BUILD_CHECK
                 ):
                     stage_type = CIStages.TESTS_1
                 else:
@@ -597,10 +597,10 @@ class CIConfig:
     def get_job_config(self, check_name: str) -> JobConfig:
         res = None
         for config in (
-            self.build_config,
-            self.builds_report_config,
-            self.test_configs,
-            self.other_jobs_configs,
+                self.build_config,
+                self.builds_report_config,
+                self.test_configs,
+                self.other_jobs_configs,
         ):
             if check_name in config:  # type: ignore
                 res = config[check_name].job_config  # type: ignore
@@ -612,47 +612,47 @@ class CIConfig:
         if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST:
             result = Runners.BUILDER
         elif any(
-            words in check_name.lower()
-            for words in [
-                "install packages",
-                "compatibility check",
-                "docker",
-                "build check",
-                "jepsen",
-                "style check",
-            ]
+                words in check_name.lower()
+                for words in [
+                    "install packages",
+                    "compatibility check",
+                    "docker",
+                    "build check",
+                    "jepsen",
+                    "style check",
+                ]
         ):
             result = Runners.STYLE_CHECKER
         elif check_name == JobNames.DOCS_CHECK:
             # docs job is demanding
             result = Runners.FUNC_TESTER_ARM
         elif any(
-            words in check_name.lower()
-            for words in [
-                "stateless",
-                "stateful",
-                "clickbench",
-                "sqllogic test",
-                "libfuzzer",
-                "bugfix validation",
-            ]
+                words in check_name.lower()
+                for words in [
+                    "stateless",
+                    "stateful",
+                    "clickbench",
+                    "sqllogic test",
+                    "libfuzzer",
+                    "bugfix validation",
+                ]
         ):
             result = Runners.FUNC_TESTER
         elif any(
-            words in check_name.lower()
-            for words in ["stress", "upgrade", "integration", "performance comparison"]
+                words in check_name.lower()
+                for words in ["stress", "upgrade", "integration", "performance comparison"]
         ):
             result = Runners.STRESS_TESTER
         elif any(
-            words in check_name.lower()
-            for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"]
+                words in check_name.lower()
+                for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"]
         ):
             result = Runners.FUZZER_UNIT_TESTER
 
         assert result, f"BUG, no runner for [{check_name}]"
 
         if (
-            "aarch" in check_name.lower() or "arm64" in check_name.lower()
+                "aarch" in check_name.lower() or "arm64" in check_name.lower()
         ) and "aarch" not in result:
             if result == Runners.STRESS_TESTER:
                 # FIXME: no arm stress tester group atm
@@ -683,10 +683,10 @@ class CIConfig:
         check_name = self.normalize_string(check_name)
 
         for config in (
-            self.build_config,
-            self.builds_report_config,
-            self.test_configs,
-            self.other_jobs_configs,
+                self.build_config,
+                self.builds_report_config,
+                self.test_configs,
+                self.other_jobs_configs,
         ):
             for job_name in config:  # type: ignore
                 if check_name == self.normalize_string(job_name):
@@ -714,10 +714,10 @@ class CIConfig:
     def get_digest_config(self, check_name: str) -> DigestConfig:
         res = None
         for config in (
-            self.other_jobs_configs,
-            self.build_config,
-            self.builds_report_config,
-            self.test_configs,
+                self.other_jobs_configs,
+                self.build_config,
+                self.builds_report_config,
+                self.test_configs,
         ):
             if check_name in config:  # type: ignore
                 res = config[check_name].job_config.digest  # type: ignore
@@ -732,15 +732,15 @@ class CIConfig:
         """
         assert branch
         for config in (
-            self.other_jobs_configs,
-            self.build_config,
-            self.builds_report_config,
-            self.test_configs,
+                self.other_jobs_configs,
+                self.build_config,
+                self.builds_report_config,
+                self.test_configs,
         ):
             yield from config  # type: ignore
 
     def get_builds_for_report(
-        self, report_name: str, release: bool = False, backport: bool = False
+            self, report_name: str, release: bool = False, backport: bool = False
     ) -> List[str]:
         # hack to modify build list for release and bp wf
         assert not (release and backport), "Invalid input"
@@ -1155,16 +1155,20 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN,
+            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN,
+            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN,
+            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN,
+            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         # End stateful tests for parallel replicas
         JobNames.STATELESS_TEST_ASAN: TestConfig(
@@ -1207,7 +1211,8 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig(
             Build.PACKAGE_ASAN,
-            job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True, run_by_ci_option=True),  # type: ignore
+            job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True,
+                                 run_by_ci_option=True),  # type: ignore
         ),
         JobNames.STATELESS_TEST_S3_TSAN: TestConfig(
             Build.PACKAGE_TSAN,
@@ -1220,28 +1225,39 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)
+            # type: ignore
         ),
         JobNames.STRESS_TEST_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN,
+            job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)
+            # type: ignore
         ),
         JobNames.UPGRADE_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN,
+            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
+            # type: ignore
         ),
         JobNames.STRESS_TEST_AZURE_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
+            Build.PACKAGE_TSAN,
+            job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
         ),
         JobNames.STRESS_TEST_AZURE_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
+            Build.PACKAGE_MSAN,
+            job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_TSAN: TestConfig(
-            Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN,
+            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
+            # type: ignore
         ),
         JobNames.UPGRADE_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN,
+            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
+            # type: ignore
         ),
         JobNames.UPGRADE_TEST_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
@@ -1260,8 +1276,7 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.INTEGRATION_TEST_ARM: TestConfig(
             Build.PACKAGE_AARCH64,
-            # add [run_by_label="test arm"] to not run in regular pr workflow by default
-            job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"),  # type: ignore
+            job_config=JobConfig(num_batches=5, **integration_test_common_params),  # type: ignore
         ),
         JobNames.INTEGRATION_TEST: TestConfig(
             Build.PACKAGE_RELEASE,
@@ -1335,7 +1350,8 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.PERFORMANCE_TEST_ARM64: TestConfig(
             Build.PACKAGE_AARCH64,
-            job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params),
+            # type: ignore
         ),
         JobNames.SQLANCER: TestConfig(
             Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params
@@ -1365,7 +1381,6 @@ CI_CONFIG = CIConfig(
 )
 CI_CONFIG.validate()
 
-
 # checks required by Mergeable Check
 REQUIRED_CHECKS = [
     "PR Check",

From 22573361de3c4cdbd105e47856f00d1411d081e8 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 16 May 2024 13:58:19 +0200
Subject: [PATCH 144/392] fixing typos and var names

---
 tests/ci/ci.py              |  34 +++----
 tests/ci/ci_config.py       | 195 +++++++++++++++++-------------------
 tests/ci/test_ci_options.py |   6 +-
 3 files changed, 110 insertions(+), 125 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 08048564383..3ed584f5d93 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -71,12 +71,12 @@ class PendingState:
 class CiCache:
     """
     CI cache is a bunch of records. Record is a file stored under special location on s3.
-    The file name has following format
+    The file name has the following format
 
         <RECORD_TYPE>_[<ATTRIBUTES>]--<JOB_NAME>_<JOB_DIGEST>_<BATCH>_<NUM_BATCHES>.ci
 
     RECORD_TYPE:
-        SUCCESSFUL - for successfuly finished jobs
+        SUCCESSFUL - for successfully finished jobs
         PENDING - for pending jobs
 
     ATTRIBUTES:
@@ -508,7 +508,7 @@ class CiCache:
         self, job: str, batch: int, num_batches: int, release_branch: bool
     ) -> bool:
         """
-        checks if a given job have already been done successfuly
+        checks if a given job have already been done successfully
         """
         return self.exist(
             self.RecordType.SUCCESSFUL, job, batch, num_batches, release_branch
@@ -749,7 +749,7 @@ class CiOptions:
     # list of specified jobs to run
     ci_jobs: Optional[List[str]] = None
 
-    # btaches to run for all multi-batch jobs
+    # batches to run for all multi-batch jobs
     job_batches: Optional[List[int]] = None
 
     do_not_test: bool = False
@@ -903,7 +903,7 @@ class CiOptions:
         if self.ci_sets:
             for tag in self.ci_sets:
                 label_config = CI_CONFIG.get_label_config(tag)
-                assert label_config, f"Unknonwn tag [{tag}]"
+                assert label_config, f"Unknown tag [{tag}]"
                 print(
                     f"NOTE: CI Set's tag: [{tag}], add jobs: [{label_config.run_jobs}]"
                 )
@@ -953,7 +953,7 @@ class CiOptions:
                     jobs_params[job] = {
                         "batches": list(range(num_batches)),
                         "num_batches": num_batches,
-                        "run_if_ci_option_include_set": job_config.run_by_ci_option
+                        "run_by_ci_option": job_config.run_by_ci_option
                         and pr_info.is_pr,
                     }
 
@@ -969,7 +969,7 @@ class CiOptions:
         for job in jobs_to_do[:]:
             job_param = jobs_params[job]
             if (
-                job_param["run_if_ci_option_include_set"]
+                job_param["run_by_ci_option"]
                 and job not in jobs_to_do_requested
             ):
                 print(
@@ -1010,7 +1010,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
     parser.add_argument(
         "--pre",
         action="store_true",
-        help="Action that executes prerequesetes for the job provided in --job-name",
+        help="Action that executes prerequisites for the job provided in --job-name",
     )
     parser.add_argument(
         "--run",
@@ -1080,7 +1080,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         "--skip-jobs",
         action="store_true",
         default=False,
-        help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
+        help="skip fetching data about job runs, used in --configure action (for debugging and nightly ci)",
     )
     parser.add_argument(
         "--force",
@@ -1298,7 +1298,7 @@ def _configure_docker_jobs(docker_digest_or_latest: bool) -> Dict:
     missing_amd64 = []
     missing_aarch64 = []
     if not docker_digest_or_latest:
-        # look for missing arm and amd images only among missing multiarch manifests @missing_multi_dict
+        # look for missing arm and amd images only among missing multi-arch manifests @missing_multi_dict
         # to avoid extra dockerhub api calls
         missing_amd64 = list(
             check_missing_images_on_dockerhub(missing_multi_dict, "amd64")
@@ -1396,7 +1396,7 @@ def _configure_jobs(
         ):
             continue
 
-        # fill job randomization buckets (for jobs with configured @random_bucket property))
+        # fill job randomization buckets (for jobs with configured @random_bucket property)
         if job_config.random_bucket:
             if not job_config.random_bucket in randomization_buckets:
                 randomization_buckets[job_config.random_bucket] = set()
@@ -1445,7 +1445,7 @@ def _configure_jobs(
             jobs_params[job] = {
                 "batches": batches_to_do,
                 "num_batches": num_batches,
-                "run_if_ci_option_include_set": job_config.run_by_ci_option
+                "run_by_ci_option": job_config.run_by_ci_option
                 and pr_info.is_pr,
             }
         elif add_to_skip:
@@ -1490,8 +1490,8 @@ def _configure_jobs(
 def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
     """
     populates GH Actions' workflow with real jobs
-    "Builds_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}]
-    "Tests_1": [{"job_name": NAME, "runner_type": RUNER_TYPE}]
+    "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}]
+    "Tests_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}]
     ...
     """
     result = {}  # type: Dict[str, Any]
@@ -1582,7 +1582,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]:
         for match in matches
         if match in CILabels or match.startswith("job_") or match.startswith("batch_")
     ]
-    print(f"CI modifyers from commit message: [{res}]")
+    print(f"CI modifiers from commit message: [{res}]")
     res_2 = []
     if pr_info.is_pr:
         matches = [match[-1] for match in re.findall(pattern, pr_info.body)]
@@ -1593,7 +1593,7 @@ def _fetch_commit_tokens(message: str, pr_info: PRInfo) -> List[str]:
             or match.startswith("job_")
             or match.startswith("batch_")
         ]
-        print(f"CI modifyers from PR body: [{res_2}]")
+        print(f"CI modifiers from PR body: [{res_2}]")
     return list(set(res + res_2))
 
 
@@ -1659,7 +1659,7 @@ def _upload_build_artifacts(
     report_url = ci_cache.upload_build_report(build_result)
     print(f"Report file has been uploaded to [{report_url}]")
 
-    # Upload head master binaries
+    # Upload master head's binaries
     static_bin_name = CI_CONFIG.build_config[build_name].static_binary_name
     if pr_info.is_master and static_bin_name:
         # Full binary with debug info:
diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 84041b8782f..dc67e05455c 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -50,9 +50,9 @@ class CILabels(metaclass=WithIter):
     CI_SET_ARM = "ci_set_arm"
     CI_SET_INTEGRATION = "ci_set_integration"
     CI_SET_OLD_ANALYZER = "ci_set_old_analyzer"
-    CI_SET_STATLESS = "ci_set_stateless"
+    CI_SET_STATELESS = "ci_set_stateless"
     CI_SET_STATEFUL = "ci_set_stateful"
-    CI_SET_STATLESS_ASAN = "ci_set_stateless_asan"
+    CI_SET_STATELESS_ASAN = "ci_set_stateless_asan"
     CI_SET_STATEFUL_ASAN = "ci_set_stateful_asan"
 
     libFuzzer = "libFuzzer"
@@ -203,7 +203,7 @@ class DigestConfig:
     include_paths: List[Union[str, Path]] = field(default_factory=list)
     # file suffixes to exclude from digest
     exclude_files: List[str] = field(default_factory=list)
-    # directories to exlude from digest
+    # directories to exclude from digest
     exclude_dirs: List[Union[str, Path]] = field(default_factory=list)
     # docker names to include into digest
     docker: List[str] = field(default_factory=list)
@@ -214,7 +214,7 @@ class DigestConfig:
 @dataclass
 class LabelConfig:
     """
-    configures different CI scenarious per GH label
+    configures different CI scenarios per GH label
     """
 
     run_jobs: Iterable[str] = frozenset()
@@ -228,7 +228,7 @@ class JobConfig:
 
     # configures digest calculation for the job
     digest: DigestConfig = field(default_factory=DigestConfig)
-    # will be triggered for the job if omited in CI workflow yml
+    # will be triggered for the job if omitted in CI workflow yml
     run_command: str = ""
     # job timeout, seconds
     timeout: Optional[int] = None
@@ -239,7 +239,7 @@ class JobConfig:
     # to run always regardless of the job digest or/and label
     run_always: bool = False
     # if the job needs to be run on the release branch, including master (e.g. building packages, docker server).
-    # NOTE: Subsequent runs on the same branch with the similar digest are still considered skippable.
+    # NOTE: Subsequent runs on the same branch with the similar digest are still considered skip-able.
     required_on_release_branch: bool = False
     # job is for pr workflow only
     pr_only: bool = False
@@ -448,9 +448,9 @@ bugfix_validate_check = DigestConfig(
     ],
     exclude_files=[".md"],
     docker=IMAGES.copy()
-           + [
-               "clickhouse/stateless-test",
-           ],
+    + [
+        "clickhouse/stateless-test",
+    ],
 )
 # common test params
 docker_server_job_config = JobConfig(
@@ -467,7 +467,7 @@ compatibility_test_common_params = {
     "digest": compatibility_check_digest,
     "run_command": "compatibility_check.py",
 }
-statless_test_common_params = {
+stateless_test_common_params = {
     "digest": stateless_check_digest,
     "run_command": 'functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT',
     "timeout": 10800,
@@ -570,7 +570,7 @@ class CIConfig:
         if self.is_build_job(job_name):
             stage_type = CIStages.BUILDS_1
             if job_name in CI_CONFIG.get_builds_for_report(
-                    JobNames.BUILD_CHECK_SPECIAL
+                JobNames.BUILD_CHECK_SPECIAL
             ):
                 # special builds go to Build_2 stage to not delay Builds_1/Test_1
                 stage_type = CIStages.BUILDS_2
@@ -584,7 +584,7 @@ class CIConfig:
                 required_build = CI_CONFIG.test_configs[job_name].required_build
                 assert required_build
                 if required_build in CI_CONFIG.get_builds_for_report(
-                        JobNames.BUILD_CHECK
+                    JobNames.BUILD_CHECK
                 ):
                     stage_type = CIStages.TESTS_1
                 else:
@@ -597,10 +597,10 @@ class CIConfig:
     def get_job_config(self, check_name: str) -> JobConfig:
         res = None
         for config in (
-                self.build_config,
-                self.builds_report_config,
-                self.test_configs,
-                self.other_jobs_configs,
+            self.build_config,
+            self.builds_report_config,
+            self.test_configs,
+            self.other_jobs_configs,
         ):
             if check_name in config:  # type: ignore
                 res = config[check_name].job_config  # type: ignore
@@ -612,47 +612,47 @@ class CIConfig:
         if self.is_build_job(check_name) or check_name == JobNames.FAST_TEST:
             result = Runners.BUILDER
         elif any(
-                words in check_name.lower()
-                for words in [
-                    "install packages",
-                    "compatibility check",
-                    "docker",
-                    "build check",
-                    "jepsen",
-                    "style check",
-                ]
+            words in check_name.lower()
+            for words in [
+                "install packages",
+                "compatibility check",
+                "docker",
+                "build check",
+                "jepsen",
+                "style check",
+            ]
         ):
             result = Runners.STYLE_CHECKER
         elif check_name == JobNames.DOCS_CHECK:
             # docs job is demanding
             result = Runners.FUNC_TESTER_ARM
         elif any(
-                words in check_name.lower()
-                for words in [
-                    "stateless",
-                    "stateful",
-                    "clickbench",
-                    "sqllogic test",
-                    "libfuzzer",
-                    "bugfix validation",
-                ]
+            words in check_name.lower()
+            for words in [
+                "stateless",
+                "stateful",
+                "clickbench",
+                "sqllogic test",
+                "libfuzzer",
+                "bugfix validation",
+            ]
         ):
             result = Runners.FUNC_TESTER
         elif any(
-                words in check_name.lower()
-                for words in ["stress", "upgrade", "integration", "performance comparison"]
+            words in check_name.lower()
+            for words in ["stress", "upgrade", "integration", "performance comparison"]
         ):
             result = Runners.STRESS_TESTER
         elif any(
-                words in check_name.lower()
-                for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"]
+            words in check_name.lower()
+            for words in ["ast fuzzer", "unit tests", "sqlancer", "sqltest"]
         ):
             result = Runners.FUZZER_UNIT_TESTER
 
         assert result, f"BUG, no runner for [{check_name}]"
 
         if (
-                "aarch" in check_name.lower() or "arm64" in check_name.lower()
+            "aarch" in check_name.lower() or "arm64" in check_name.lower()
         ) and "aarch" not in result:
             if result == Runners.STRESS_TESTER:
                 # FIXME: no arm stress tester group atm
@@ -661,7 +661,7 @@ class CIConfig:
                 # crosscompile - no arm required
                 pass
             else:
-                # switch to aarch64 runnner
+                # switch to aarch64 runner
                 result += "-aarch64"
 
         return result
@@ -683,10 +683,10 @@ class CIConfig:
         check_name = self.normalize_string(check_name)
 
         for config in (
-                self.build_config,
-                self.builds_report_config,
-                self.test_configs,
-                self.other_jobs_configs,
+            self.build_config,
+            self.builds_report_config,
+            self.test_configs,
+            self.other_jobs_configs,
         ):
             for job_name in config:  # type: ignore
                 if check_name == self.normalize_string(job_name):
@@ -708,16 +708,16 @@ class CIConfig:
                     break
         assert (
             res
-        ), f"Error: Experimantal feature... Invlid request or not supported job [{check_name}]"
+        ), f"Error: Experimental feature... Invalid request or not supported job [{check_name}]"
         return res
 
     def get_digest_config(self, check_name: str) -> DigestConfig:
         res = None
         for config in (
-                self.other_jobs_configs,
-                self.build_config,
-                self.builds_report_config,
-                self.test_configs,
+            self.other_jobs_configs,
+            self.build_config,
+            self.builds_report_config,
+            self.test_configs,
         ):
             if check_name in config:  # type: ignore
                 res = config[check_name].job_config.digest  # type: ignore
@@ -732,15 +732,15 @@ class CIConfig:
         """
         assert branch
         for config in (
-                self.other_jobs_configs,
-                self.build_config,
-                self.builds_report_config,
-                self.test_configs,
+            self.other_jobs_configs,
+            self.build_config,
+            self.builds_report_config,
+            self.test_configs,
         ):
             yield from config  # type: ignore
 
     def get_builds_for_report(
-            self, report_name: str, release: bool = False, backport: bool = False
+        self, report_name: str, release: bool = False, backport: bool = False
     ) -> List[str]:
         # hack to modify build list for release and bp wf
         assert not (release and backport), "Invalid input"
@@ -811,16 +811,16 @@ class CIConfig:
                     f"The following names of the build report '{build_report_name}' "
                     f"are missed in build_config: {missed_names}",
                 )
-        # And finally, all of tests' requirements must be in the builds
+        # And finally, all tests' requirements must be in the builds
         for test_name, test_config in self.test_configs.items():
             if test_config.required_build not in self.build_config.keys():
                 logging.error(
-                    "The requierment '%s' for '%s' is not found in builds",
+                    "The requirement '%s' for '%s' is not found in builds",
                     test_config,
                     test_name,
                 )
                 errors.append(
-                    f"The requierment '{test_config}' for "
+                    f"The requirement '{test_config}' for "
                     f"'{test_name}' is not found in builds"
                 )
 
@@ -861,7 +861,7 @@ CI_CONFIG = CIConfig(
                 JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER,
             ]
         ),
-        CILabels.CI_SET_STATLESS: LabelConfig(
+        CILabels.CI_SET_STATELESS: LabelConfig(
             run_jobs=[
                 JobNames.STYLE_CHECK,
                 JobNames.FAST_TEST,
@@ -869,7 +869,7 @@ CI_CONFIG = CIConfig(
                 JobNames.STATELESS_TEST_RELEASE,
             ]
         ),
-        CILabels.CI_SET_STATLESS_ASAN: LabelConfig(
+        CILabels.CI_SET_STATELESS_ASAN: LabelConfig(
             run_jobs=[
                 JobNames.STYLE_CHECK,
                 JobNames.FAST_TEST,
@@ -1155,68 +1155,63 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig(
-            Build.PACKAGE_ASAN,
-            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig(
-            Build.PACKAGE_MSAN,
-            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN,
-            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig(
-            Build.PACKAGE_TSAN,
-            job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params)  # type: ignore
         ),
         # End stateful tests for parallel replicas
         JobNames.STATELESS_TEST_ASAN: TestConfig(
             Build.PACKAGE_ASAN,
-            job_config=JobConfig(num_batches=4, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=4, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_TSAN: TestConfig(
             Build.PACKAGE_TSAN,
-            job_config=JobConfig(num_batches=5, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=5, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_MSAN: TestConfig(
             Build.PACKAGE_MSAN,
-            job_config=JobConfig(num_batches=6, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=6, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_UBSAN: TestConfig(
             Build.PACKAGE_UBSAN,
-            job_config=JobConfig(num_batches=2, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=2, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG,
-            job_config=JobConfig(num_batches=5, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=5, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_RELEASE: TestConfig(
-            Build.PACKAGE_RELEASE, job_config=JobConfig(**statless_test_common_params)  # type: ignore
+            Build.PACKAGE_RELEASE, job_config=JobConfig(**stateless_test_common_params)  # type: ignore
         ),
         JobNames.STATELESS_TEST_RELEASE_COVERAGE: TestConfig(
             Build.PACKAGE_RELEASE_COVERAGE,
-            job_config=JobConfig(num_batches=6, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=6, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_AARCH64: TestConfig(
-            Build.PACKAGE_AARCH64, job_config=JobConfig(**statless_test_common_params)  # type: ignore
+            Build.PACKAGE_AARCH64, job_config=JobConfig(**stateless_test_common_params)  # type: ignore
         ),
         JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: TestConfig(
             Build.PACKAGE_RELEASE,
-            job_config=JobConfig(num_batches=4, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=4, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_S3_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG,
-            job_config=JobConfig(num_batches=6, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=6, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STATELESS_TEST_AZURE_ASAN: TestConfig(
             Build.PACKAGE_ASAN,
-            job_config=JobConfig(num_batches=4, **statless_test_common_params, release_only=True,
-                                 run_by_ci_option=True),  # type: ignore
+            job_config=JobConfig(num_batches=4, **stateless_test_common_params, release_only=True, run_by_ci_option=True),  # type: ignore
         ),
         JobNames.STATELESS_TEST_S3_TSAN: TestConfig(
             Build.PACKAGE_TSAN,
-            job_config=JobConfig(num_batches=5, **statless_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=5, **stateless_test_common_params),  # type: ignore
         ),
         JobNames.STRESS_TEST_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params)  # type: ignore
@@ -1225,39 +1220,28 @@ CI_CONFIG = CIConfig(
             Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)
-            # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_UBSAN: TestConfig(
-            Build.PACKAGE_UBSAN,
-            job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
+            Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)
-            # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_ASAN: TestConfig(
-            Build.PACKAGE_ASAN,
-            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
-            # type: ignore
+            Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.STRESS_TEST_AZURE_TSAN: TestConfig(
-            Build.PACKAGE_TSAN,
-            job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
         ),
         JobNames.STRESS_TEST_AZURE_MSAN: TestConfig(
-            Build.PACKAGE_MSAN,
-            job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params, release_only=True, run_by_ci_option=True)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_TSAN: TestConfig(
-            Build.PACKAGE_TSAN,
-            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
-            # type: ignore
+            Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_MSAN: TestConfig(
-            Build.PACKAGE_MSAN,
-            job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)
-            # type: ignore
+            Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params)  # type: ignore
         ),
         JobNames.UPGRADE_TEST_DEBUG: TestConfig(
             Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params)  # type: ignore
@@ -1276,7 +1260,8 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.INTEGRATION_TEST_ARM: TestConfig(
             Build.PACKAGE_AARCH64,
-            job_config=JobConfig(num_batches=5, **integration_test_common_params),  # type: ignore
+            # add [run_by_label="test arm"] to not run in regular pr workflow by default
+            job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"),  # type: ignore
         ),
         JobNames.INTEGRATION_TEST: TestConfig(
             Build.PACKAGE_RELEASE,
@@ -1330,7 +1315,7 @@ CI_CONFIG = CIConfig(
         JobNames.STATELESS_TEST_FLAKY_ASAN: TestConfig(
             # replace to non-default
             Build.PACKAGE_ASAN,
-            job_config=JobConfig(pr_only=True, **{**statless_test_common_params, "timeout": 3600}),  # type: ignore
+            job_config=JobConfig(pr_only=True, **{**stateless_test_common_params, "timeout": 3600}),  # type: ignore
         ),
         JobNames.JEPSEN_KEEPER: TestConfig(
             Build.BINARY_RELEASE,
@@ -1350,8 +1335,7 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.PERFORMANCE_TEST_ARM64: TestConfig(
             Build.PACKAGE_AARCH64,
-            job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params),
-            # type: ignore
+            job_config=JobConfig(num_batches=4, run_by_label="pr-performance", **perf_test_common_params),  # type: ignore
         ),
         JobNames.SQLANCER: TestConfig(
             Build.PACKAGE_RELEASE, job_config=sqllancer_test_common_params
@@ -1381,6 +1365,7 @@ CI_CONFIG = CIConfig(
 )
 CI_CONFIG.validate()
 
+
 # checks required by Mergeable Check
 REQUIRED_CHECKS = [
     "PR Check",
@@ -1479,7 +1464,7 @@ CHECK_DESCRIPTIONS = [
         "Checks if new added or modified tests are flaky by running them repeatedly, "
         "in parallel, with more randomization. Functional tests are run 100 times "
         "with address sanitizer, and additional randomization of thread scheduling. "
-        "Integrational tests are run up to 10 times. If at least once a new test has "
+        "Integration tests are run up to 10 times. If at least once a new test has "
         "failed, or was too long, this check will be red. We don't allow flaky tests, "
         'read <a href="https://clickhouse.com/blog/decorating-a-christmas-tree-with-'
         'the-help-of-flaky-tests/">the doc</a>',
@@ -1569,7 +1554,7 @@ CHECK_DESCRIPTIONS = [
         lambda x: x.startswith("ClickBench"),
     ),
     CheckDescription(
-        "Falback for unknown",
+        "Fallback for unknown",
         "There's no description for the check yet, please add it to "
         "tests/ci/ci_config.py:CHECK_DESCRIPTIONS",
         lambda x: True,
diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py
index 0f10f7d4f85..c07c094d439 100644
--- a/tests/ci/test_ci_options.py
+++ b/tests/ci/test_ci_options.py
@@ -161,7 +161,7 @@ class TestCIOptions(unittest.TestCase):
             "Stateless tests (azure, asan)": {
                 "batches": list(range(3)),
                 "num_batches": 3,
-                "run_if_ci_option_include_set": True,
+                "run_by_ci_option": True,
             }
         }
         jobs_to_do, jobs_to_skip, job_params = ci_options.apply(
@@ -226,10 +226,10 @@ class TestCIOptions(unittest.TestCase):
                 job_params[job] = {
                     "batches": list(range(3)),
                     "num_batches": 3,
-                    "run_if_ci_option_include_set": "azure" in job,
+                    "run_by_ci_option": "azure" in job,
                 }
             else:
-                job_params[job] = {"run_if_ci_option_include_set": False}
+                job_params[job] = {"run_by_ci_option": False}
 
         jobs_to_do, jobs_to_skip, job_params = ci_options.apply(
             jobs_to_do, jobs_to_skip, job_params, PRInfo()

From 2fe684da0917dfca12bce6fa215bd566370d9db5 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Thu, 16 May 2024 14:51:04 +0200
Subject: [PATCH 145/392] Add dynamic tests

---
 .../03150_dynamic_type_mv_insert.reference    |  35 ++
 .../03150_dynamic_type_mv_insert.sql          |  34 ++
 ...151_dynamic_type_scale_max_types.reference |  26 ++
 .../03151_dynamic_type_scale_max_types.sql    |  23 ++
 .../03152_dynamic_type_simple.reference       |  25 ++
 .../0_stateless/03152_dynamic_type_simple.sql |  29 ++
 .../03153_dynamic_type_empty.reference        |  15 +
 .../0_stateless/03153_dynamic_type_empty.sql  |   5 +
 ..._dynamic_type_concurrent_inserts.reference |   7 +
 .../03156_dynamic_type_concurrent_inserts.sh  |  21 ++
 .../03157_dynamic_type_json.reference         |   5 +
 .../0_stateless/03157_dynamic_type_json.sql   |  13 +
 .../03158_dynamic_type_from_variant.reference |  17 +
 .../03158_dynamic_type_from_variant.sql       |  15 +
 .../03159_dynamic_type_all_types.reference    | 300 ++++++++++++++++++
 .../03159_dynamic_type_all_types.sql          |  99 ++++++
 .../03160_dynamic_type_agg.reference          |   1 +
 .../0_stateless/03160_dynamic_type_agg.sql    |  10 +
 .../03162_dynamic_type_nested.reference       |   4 +
 .../0_stateless/03162_dynamic_type_nested.sql |  16 +
 20 files changed, 700 insertions(+)
 create mode 100644 tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
 create mode 100644 tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
 create mode 100644 tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
 create mode 100644 tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
 create mode 100644 tests/queries/0_stateless/03152_dynamic_type_simple.reference
 create mode 100644 tests/queries/0_stateless/03152_dynamic_type_simple.sql
 create mode 100644 tests/queries/0_stateless/03153_dynamic_type_empty.reference
 create mode 100644 tests/queries/0_stateless/03153_dynamic_type_empty.sql
 create mode 100644 tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference
 create mode 100755 tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh
 create mode 100644 tests/queries/0_stateless/03157_dynamic_type_json.reference
 create mode 100644 tests/queries/0_stateless/03157_dynamic_type_json.sql
 create mode 100644 tests/queries/0_stateless/03158_dynamic_type_from_variant.reference
 create mode 100644 tests/queries/0_stateless/03158_dynamic_type_from_variant.sql
 create mode 100644 tests/queries/0_stateless/03159_dynamic_type_all_types.reference
 create mode 100644 tests/queries/0_stateless/03159_dynamic_type_all_types.sql
 create mode 100644 tests/queries/0_stateless/03160_dynamic_type_agg.reference
 create mode 100644 tests/queries/0_stateless/03160_dynamic_type_agg.sql
 create mode 100644 tests/queries/0_stateless/03162_dynamic_type_nested.reference
 create mode 100644 tests/queries/0_stateless/03162_dynamic_type_nested.sql

diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
new file mode 100644
index 00000000000..0b76d30953e
--- /dev/null
+++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.reference
@@ -0,0 +1,35 @@
+1	2024-01-01	Date
+2	1704056400	Decimal(18, 3)
+3	1	String
+4	2	String
+
+1	2024-01-01	Date
+1	2024-01-01	Date
+2	1704056400	Decimal(18, 3)
+2	1704056400	Decimal(18, 3)
+3	1	String
+3	1	String
+4	2	String
+4	2	String
+
+1	2024-01-01	String
+1	2024-01-01	String
+2	1704056400	String
+2	1704056400	String
+3	1	String
+3	1	String
+4	2	String
+4	2	String
+
+1	2024-01-01	Date
+1	2024-01-01	String
+1	2024-01-01	String
+2	1704056400	Decimal(18, 3)
+2	1704056400	String
+2	1704056400	String
+3	1	String
+3	1	String
+3	1	String
+4	2	String
+4	2	String
+4	2	String
diff --git a/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
new file mode 100644
index 00000000000..ad5ea9512c6
--- /dev/null
+++ b/tests/queries/0_stateless/03150_dynamic_type_mv_insert.sql
@@ -0,0 +1,34 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE null_table
+(
+    n1 UInt8,
+    n2 Dynamic(max_types=3)
+)
+ENGINE = Null;
+
+CREATE MATERIALIZED VIEW dummy_rmv TO to_table
+AS SELECT * FROM null_table;
+
+CREATE TABLE to_table
+(
+    n1 UInt8,
+    n2 Dynamic(max_types=4)
+)
+ENGINE = MergeTree ORDER BY n1;
+
+INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+select '';
+INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+select '';
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1);
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+select '';
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=10);
+INSERT INTO null_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
new file mode 100644
index 00000000000..d96fbf658d8
--- /dev/null
+++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.reference
@@ -0,0 +1,26 @@
+1	2024-01-01	Date
+2	1704056400	String
+3	1	String
+4	2	String
+
+1	2024-01-01	Date
+1	2024-01-01	Date
+2	1704056400	Decimal(18, 3)
+2	1704056400	String
+3	1	Float32
+3	1	String
+4	2	Float64
+4	2	String
+
+1	2024-01-01	String
+1	2024-01-01	String
+1	2024-01-01	String
+2	1704056400	String
+2	1704056400	String
+2	1704056400	String
+3	1	String
+3	1	String
+3	1	String
+4	2	String
+4	2	String
+4	2	String
diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
new file mode 100644
index 00000000000..04322fc4f0c
--- /dev/null
+++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
@@ -0,0 +1,23 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE to_table
+(
+    n1 UInt8,
+    n2 Dynamic(max_types=2)
+)
+ENGINE = MergeTree ORDER BY n1;
+
+INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+select '';
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=5);
+INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+select '';
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=1);
+INSERT INTO to_table ( n1, n2 ) VALUES (1, '2024-01-01'), (2, toDateTime64('2024-01-01', 3, 'Asia/Istanbul')), (3, toFloat32(1)), (4, toFloat64(2));
+SELECT *, dynamicType(n2) FROM to_table ORDER BY ALL;
+
+ALTER TABLE to_table MODIFY COLUMN n2 Dynamic(max_types=500); -- { serverError UNEXPECTED_AST_STRUCTURE }
diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.reference b/tests/queries/0_stateless/03152_dynamic_type_simple.reference
new file mode 100644
index 00000000000..5f243209ff3
--- /dev/null
+++ b/tests/queries/0_stateless/03152_dynamic_type_simple.reference
@@ -0,0 +1,25 @@
+string1	String
+42	Int64
+3.14	Float64
+[1,2]	Array(Int64)
+2021-01-01	Date
+string2	String
+
+\N	None	42	Int64
+42	Int64	string	String
+string	String	[1, 2]	String
+[1,2]	Array(Int64)	\N	None
+    ┌─d────────────────────────┬─dynamicType(d)─┬─d.Int64─┬─d.String─┬─────d.Date─┬─d.Float64─┬──────────d.DateTime─┬─d.Array(Int64)─┬─d.Array(String)──────────┐
+ 1. │ 42                       │ Int64          │      42 │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
+ 2. │ string1                  │ String         │    ᴺᵁᴸᴸ │ string1  │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
+ 3. │ 2021-01-01               │ Date           │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │ 2021-01-01 │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
+ 4. │ [1,2,3]                  │ Array(Int64)   │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ [1,2,3]        │ []                       │
+ 5. │ 3.14                     │ Float64        │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      3.14 │                ᴺᵁᴸᴸ │ []             │ []                       │
+ 6. │ string2                  │ String         │    ᴺᵁᴸᴸ │ string2  │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
+ 7. │ 2021-01-01 12:00:00      │ DateTime       │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │ 2021-01-01 12:00:00 │ []             │ []                       │
+ 8. │ ['array','of','strings'] │ Array(String)  │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ ['array','of','strings'] │
+ 9. │ ᴺᵁᴸᴸ                     │ None           │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │      ᴺᵁᴸᴸ │                ᴺᵁᴸᴸ │ []             │ []                       │
+10. │ 42.42                    │ Float64        │    ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ     │       ᴺᵁᴸᴸ │     42.42 │                ᴺᵁᴸᴸ │ []             │ []                       │
+    └──────────────────────────┴────────────────┴─────────┴──────────┴────────────┴───────────┴─────────────────────┴────────────────┴──────────────────────────┘
+
+49995000
diff --git a/tests/queries/0_stateless/03152_dynamic_type_simple.sql b/tests/queries/0_stateless/03152_dynamic_type_simple.sql
new file mode 100644
index 00000000000..fd5328faf15
--- /dev/null
+++ b/tests/queries/0_stateless/03152_dynamic_type_simple.sql
@@ -0,0 +1,29 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE test_max_types (d Dynamic(max_types=5)) ENGINE = Memory;
+INSERT INTO test_max_types VALUES ('string1'), (42), (3.14), ([1, 2]), (toDate('2021-01-01')), ('string2');
+SELECT d, dynamicType(d) FROM test_max_types;
+
+SELECT '';
+CREATE TABLE test_nested_dynamic (d1 Dynamic, d2 Dynamic(max_types=2)) ENGINE = Memory;
+INSERT INTO test_nested_dynamic VALUES (NULL, 42), (42, 'string'), ('string', [1, 2]), ([1, 2], NULL);
+SELECT d1, dynamicType(d1), d2, dynamicType(d2) FROM test_nested_dynamic;
+
+CREATE TABLE test_rapid_schema (d Dynamic) ENGINE = Memory;
+INSERT INTO test_rapid_schema VALUES (42), ('string1'), (toDate('2021-01-01')), ([1, 2, 3]), (3.14), ('string2'), (toDateTime('2021-01-01 12:00:00')), (['array', 'of', 'strings']), (NULL), (toFloat64(42.42));
+
+SELECT d, dynamicType(d), d.Int64, d.String, d.Date, d.Float64, d.DateTime, d.`Array(Int64)`, d.`Array(String)`
+FROM test_rapid_schema FORMAT PrettyCompactMonoBlock;
+
+
+SELECT '';
+SELECT finalizeAggregation(CAST(dynamic_state, 'AggregateFunction(sum, UInt64)'))
+FROM
+(
+    SELECT CAST(state, 'Dynamic') AS dynamic_state
+    FROM
+    (
+        SELECT sumState(number) AS state
+        FROM numbers(10000)
+    )
+);
diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.reference b/tests/queries/0_stateless/03153_dynamic_type_empty.reference
new file mode 100644
index 00000000000..f7c047dcd19
--- /dev/null
+++ b/tests/queries/0_stateless/03153_dynamic_type_empty.reference
@@ -0,0 +1,15 @@
+[]	String
+[1]	Array(Int64)
+[]	Array(Int64)
+['1']	Array(String)
+[]	Array(Int64)
+()	String
+(1)	Tuple(Int64)
+(0)	Tuple(Int64)
+('1')	Tuple(String)
+(0)	Tuple(Int64)
+{}	String
+{1:2}	Map(Int64, Int64)
+{}	Map(Int64, Int64)
+{'1':'2'}	Map(String, String)
+{}	Map(Int64, Int64)
diff --git a/tests/queries/0_stateless/03153_dynamic_type_empty.sql b/tests/queries/0_stateless/03153_dynamic_type_empty.sql
new file mode 100644
index 00000000000..8e942fe6f6e
--- /dev/null
+++ b/tests/queries/0_stateless/03153_dynamic_type_empty.sql
@@ -0,0 +1,5 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE test_null_empty (d Dynamic) ENGINE = Memory;
+INSERT INTO test_null_empty VALUES ([]), ([1]), ([]), (['1']), ([]), (()),((1)), (()), (('1')), (()), ({}), ({1:2}), ({}), ({'1':'2'}), ({});
+SELECT d, dynamicType(d) FROM test_null_empty;
diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference
new file mode 100644
index 00000000000..e1c7b69b136
--- /dev/null
+++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.reference
@@ -0,0 +1,7 @@
+Array(UInt64)	12000	10000
+Date	12000	10001
+Float64	12000	10000
+Int64	10000	10000
+Map(UInt64, String)	10000	10000
+String	10000	10000
+UInt64	4000	4000
diff --git a/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh
new file mode 100755
index 00000000000..d7709b722c9
--- /dev/null
+++ b/tests/queries/0_stateless/03156_dynamic_type_concurrent_inserts.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "CREATE TABLE test_cc (d Dynamic) ENGINE = Memory"
+
+
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT number::Int64 AS d FROM numbers(10000) SETTINGS max_threads=1,max_insert_threads=1" &
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toString(number) AS d FROM numbers(10000) SETTINGS max_threads=2,max_insert_threads=2" &
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toDate(number % 10000) AS d FROM numbers(10000) SETTINGS max_threads=3,max_insert_threads=3" &
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT [number, number + 1] AS d FROM numbers(10000) SETTINGS max_threads=4,max_insert_threads=4" &
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT toFloat64(number) AS d FROM numbers(10000) SETTINGS max_threads=5,max_insert_threads=5" &
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "INSERT INTO test_cc SELECT map(number, toString(number)) AS d FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" &
+
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --use_variant_as_common_type=1 --allow_experimental_variant_type=1 -q "INSERT INTO test_cc SELECT CAST(multiIf(number % 5 = 0, toString(number), number % 5 = 1, number, number % 5 = 2, toFloat64(number), number % 5 = 3, toDate('2020-01-01'), [number, number + 1]), 'Dynamic') FROM numbers(10000) SETTINGS max_threads=6,max_insert_threads=6" &
+
+wait
+
+$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 -q "SELECT dynamicType(d) t, count(), uniqExact(d) FROM test_cc GROUP BY t ORDER BY t"
diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.reference b/tests/queries/0_stateless/03157_dynamic_type_json.reference
new file mode 100644
index 00000000000..38bca12bb95
--- /dev/null
+++ b/tests/queries/0_stateless/03157_dynamic_type_json.reference
@@ -0,0 +1,5 @@
+1	(((((((((('deep_value'))))))))))
+2	(((((((((('deep_array_value'))))))))))
+
+(((((((((('deep_value'))))))))))	Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String))))))))))
+(((((((((('deep_array_value'))))))))))	Tuple(level1 Tuple(level2 Tuple(level3 Tuple(level4 Tuple(level5 Tuple(level6 Tuple(level7 Tuple(level8 Tuple(level9 Tuple(level10 String))))))))))
diff --git a/tests/queries/0_stateless/03157_dynamic_type_json.sql b/tests/queries/0_stateless/03157_dynamic_type_json.sql
new file mode 100644
index 00000000000..cb1a5987104
--- /dev/null
+++ b/tests/queries/0_stateless/03157_dynamic_type_json.sql
@@ -0,0 +1,13 @@
+SET allow_experimental_dynamic_type=1;
+SET allow_experimental_object_type=1;
+SET allow_experimental_variant_type=1;
+
+CREATE TABLE test_deep_nested_json (i UInt16, d JSON) ENGINE = Memory;
+
+INSERT INTO test_deep_nested_json VALUES (1, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_value"}}}}}}}}}}');
+INSERT INTO test_deep_nested_json VALUES (2, '{"level1": {"level2": {"level3": {"level4": {"level5": {"level6": {"level7": {"level8": {"level9": {"level10": "deep_array_value"}}}}}}}}}}');
+
+SELECT * FROM test_deep_nested_json ORDER BY i;
+
+SELECT '';
+SELECT d::Dynamic d1, dynamicType(d1) FROM test_deep_nested_json ORDER BY i;
diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference
new file mode 100644
index 00000000000..2ede006cedc
--- /dev/null
+++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.reference
@@ -0,0 +1,17 @@
+false	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+false	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+true	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+2001-01-01 01:01:01.111	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+s	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+0	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+1	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+\N	Variant(Bool, DateTime64(3), IPv6, String, UInt32)
+
+false	Bool
+false	Bool
+true	Bool
+2001-01-01 01:01:01.111	DateTime64(3)
+s	String
+0	UInt32
+1	UInt32
+\N	None
diff --git a/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql
new file mode 100644
index 00000000000..20a9e17a148
--- /dev/null
+++ b/tests/queries/0_stateless/03158_dynamic_type_from_variant.sql
@@ -0,0 +1,15 @@
+SET allow_experimental_dynamic_type=1;
+SET allow_experimental_object_type=1;
+SET allow_experimental_variant_type=1;
+
+CREATE TABLE test_variable (v Variant(String, UInt32, IPv6, Bool, DateTime64)) ENGINE = Memory;
+CREATE TABLE test_dynamic (d Dynamic) ENGINE = Memory;
+
+INSERT INTO test_variable VALUES (1), ('s'), (0), ('0'), ('true'), ('false'), ('2001-01-01 01:01:01.111'), (NULL);
+
+SELECT v, toTypeName(v) FROM test_variable ORDER BY v;
+
+INSERT INTO test_dynamic SELECT * FROM test_variable;
+
+SELECT '';
+SELECT d, dynamicType(d) FROM test_dynamic ORDER BY d;
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
new file mode 100644
index 00000000000..a162ec4f857
--- /dev/null
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
@@ -0,0 +1,300 @@
+Array(Dynamic)	[]
+Array(Array(Dynamic))	[[]]
+Array(Array(Array(Dynamic)))	[[[]]]
+Bool	false
+Bool	true
+Date	2022-01-01
+Date32	2022-01-01
+DateTime	2022-01-01 01:01:01
+DateTime64(3)	2022-01-01 01:01:01.011
+Decimal(9, 1)	-99999999.9
+Decimal(18, 2)	-999999999.99
+Decimal(38, 3)	-999999999.999
+Decimal(76, 4)	-999999999.9999
+Float32	-inf
+Float32	-inf
+Float32	-inf
+Float32	-3.4028233e38
+Float32	-1.1754942e-38
+Float32	-1e-45
+Float32	1e-45
+Float32	1.1754942e-38
+Float32	3.4028233e38
+Float32	inf
+Float32	inf
+Float32	inf
+Float32	nan
+Float32	nan
+Float32	nan
+Float64	-inf
+Float64	-inf
+Float64	-inf
+Float64	-1.7976931348623157e308
+Float64	-3.40282347e38
+Float64	-1.1754943499999998e-38
+Float64	-1.3999999999999999e-45
+Float64	-2.2250738585072014e-308
+Float64	2.2250738585072014e-308
+Float64	1.3999999999999999e-45
+Float64	1.1754943499999998e-38
+Float64	3.40282347e38
+Float64	1.7976931348623157e308
+Float64	inf
+Float64	inf
+Float64	inf
+Float64	nan
+Float64	nan
+Float64	nan
+FixedString(1)	1
+FixedString(2)	1\0
+FixedString(10)	1\0\0\0\0\0\0\0\0\0
+IPv4	192.168.0.1
+IPv6	::1
+Int8	-128
+Int8	-128
+Int8	-127
+Int8	-127
+Int8	-1
+Int8	-1
+Int8	0
+Int8	0
+Int8	1
+Int8	1
+Int8	126
+Int8	126
+Int8	127
+Int8	127
+Int16	-32768
+Int16	-32767
+Int16	-1
+Int16	0
+Int16	1
+Int16	32766
+Int16	32767
+Int32	-2147483648
+Int32	-2147483647
+Int32	-1
+Int32	0
+Int32	1
+Int32	2147483646
+Int32	2147483647
+Int64	-9223372036854775808
+Int64	-9223372036854775807
+Int64	-1
+Int64	0
+Int64	1
+Int64	9223372036854775806
+Int64	9223372036854775807
+Int128	-170141183460469231731687303715884105728
+Int128	-170141183460469231731687303715884105727
+Int128	-1
+Int128	0
+Int128	1
+Int128	170141183460469231731687303715884105726
+Int128	170141183460469231731687303715884105727
+Int256	-57896044618658097711785492504343953926634992332820282019728792003956564819968
+Int256	-57896044618658097711785492504343953926634992332820282019728792003956564819967
+Int256	-1
+Int256	0
+Int256	1
+Int256	57896044618658097711785492504343953926634992332820282019728792003956564819966
+Int256	57896044618658097711785492504343953926634992332820282019728792003956564819967
+IntervalDay	1
+IntervalYear	3
+IntervalMonth	2
+LowCardinality(String)	1
+LowCardinality(String)	1
+LowCardinality(UInt16)	0
+MultiPolygon	[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]]
+Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
+Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
+Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
+Object(\'json\')	{"1":"2"}
+Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2}
+Object(Nullable(\'json\'))	{"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null}
+Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null}
+Point	(1.23,4.5600000000000005)
+Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
+String	string
+SimpleAggregateFunction(anyLast, Array(Int16))	[1,2]
+Tuple(Dynamic)	('')
+Tuple(Tuple(Dynamic))	((''))
+Tuple(Tuple(Tuple(Dynamic)))	((('')))
+UUID	00000000-0000-0000-0000-000000000000
+UUID	dededdb6-7835-4ce4-8d11-b5de6f2820e9
+UInt8	0
+UInt8	1
+UInt8	254
+UInt8	255
+UInt16	0
+UInt16	1
+UInt16	65534
+UInt16	65535
+UInt32	0
+UInt32	1
+UInt32	4294967294
+UInt32	4294967295
+UInt64	0
+UInt64	1
+UInt64	18446744073709551614
+UInt64	18446744073709551615
+UInt128	0
+UInt128	1
+UInt128	340282366920938463463374607431768211454
+UInt128	340282366920938463463374607431768211455
+UInt256	0
+UInt256	1
+UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639934
+UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639935
+
+Array(Dynamic)	[]
+Array(Array(Dynamic))	[[]]
+Array(Array(Array(Dynamic)))	[[[]]]
+Bool	false
+Bool	true
+Date	2022-01-01
+Date32	2022-01-01
+DateTime	2022-01-01 01:01:01
+DateTime64(3)	2022-01-01 01:01:01.011
+Decimal(9, 1)	-99999999.9
+Decimal(18, 2)	-999999999.99
+Decimal(38, 3)	-999999999.999
+Decimal(76, 4)	-999999999.9999
+Float32	-inf
+Float32	-inf
+Float32	-inf
+Float32	-3.4028233e38
+Float32	-1.1754942e-38
+Float32	-1e-45
+Float32	1e-45
+Float32	1.1754942e-38
+Float32	3.4028233e38
+Float32	inf
+Float32	inf
+Float32	inf
+Float32	nan
+Float32	nan
+Float32	nan
+Float64	-inf
+Float64	-inf
+Float64	-inf
+Float64	-1.7976931348623157e308
+Float64	-3.40282347e38
+Float64	-1.1754943499999998e-38
+Float64	-1.3999999999999999e-45
+Float64	-2.2250738585072014e-308
+Float64	2.2250738585072014e-308
+Float64	1.3999999999999999e-45
+Float64	1.1754943499999998e-38
+Float64	3.40282347e38
+Float64	1.7976931348623157e308
+Float64	inf
+Float64	inf
+Float64	inf
+Float64	nan
+Float64	nan
+Float64	nan
+FixedString(1)	1
+FixedString(2)	1\0
+FixedString(10)	1\0\0\0\0\0\0\0\0\0
+IPv4	192.168.0.1
+IPv6	::1
+Int8	-128
+Int8	-128
+Int8	-127
+Int8	-127
+Int8	-1
+Int8	-1
+Int8	0
+Int8	0
+Int8	1
+Int8	1
+Int8	126
+Int8	126
+Int8	127
+Int8	127
+Int16	-32768
+Int16	-32767
+Int16	-1
+Int16	0
+Int16	1
+Int16	32766
+Int16	32767
+Int32	-2147483648
+Int32	-2147483647
+Int32	-1
+Int32	0
+Int32	1
+Int32	2147483646
+Int32	2147483647
+Int64	-9223372036854775808
+Int64	-9223372036854775807
+Int64	-1
+Int64	0
+Int64	1
+Int64	9223372036854775806
+Int64	9223372036854775807
+Int128	-170141183460469231731687303715884105728
+Int128	-170141183460469231731687303715884105727
+Int128	-1
+Int128	0
+Int128	1
+Int128	170141183460469231731687303715884105726
+Int128	170141183460469231731687303715884105727
+Int256	-57896044618658097711785492504343953926634992332820282019728792003956564819968
+Int256	-57896044618658097711785492504343953926634992332820282019728792003956564819967
+Int256	-1
+Int256	0
+Int256	1
+Int256	57896044618658097711785492504343953926634992332820282019728792003956564819966
+Int256	57896044618658097711785492504343953926634992332820282019728792003956564819967
+IntervalDay	1
+IntervalYear	3
+IntervalMonth	2
+LowCardinality(String)	1
+LowCardinality(String)	1
+LowCardinality(UInt16)	0
+MultiPolygon	[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]]
+Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
+Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
+Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
+Object(\'json\')	{"1":"2"}
+Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2}
+Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null}
+Object(Nullable(\'json\'))	{"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null}
+Point	(1.23,4.5600000000000005)
+Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
+String	string
+SimpleAggregateFunction(anyLast, Array(Int16))	[1,2]
+Tuple(Dynamic)	('')
+Tuple(Tuple(Dynamic))	((''))
+Tuple(Tuple(Tuple(Dynamic)))	((('')))
+UUID	00000000-0000-0000-0000-000000000000
+UUID	dededdb6-7835-4ce4-8d11-b5de6f2820e9
+UInt8	0
+UInt8	1
+UInt8	254
+UInt8	255
+UInt16	0
+UInt16	1
+UInt16	65534
+UInt16	65535
+UInt32	0
+UInt32	1
+UInt32	4294967294
+UInt32	4294967295
+UInt64	0
+UInt64	1
+UInt64	18446744073709551614
+UInt64	18446744073709551615
+UInt128	0
+UInt128	1
+UInt128	340282366920938463463374607431768211454
+UInt128	340282366920938463463374607431768211455
+UInt256	0
+UInt256	1
+UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639934
+UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639935
+
+50
+50
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
new file mode 100644
index 00000000000..38d70dee64e
--- /dev/null
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
@@ -0,0 +1,99 @@
+-- Tags: no-random-settings
+
+SET allow_experimental_dynamic_type=1;
+SET allow_experimental_object_type=1;
+SET allow_experimental_variant_type=1;
+SET allow_suspicious_low_cardinality_types=1;
+
+
+CREATE TABLE t (d Dynamic(max_types=255)) ENGINE = Memory;
+-- Integer types: signed and unsigned integers (UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256)
+INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8);
+INSERT INTO t VALUES (-128::Int8), (-127::Int8), (-1::Int8), (0::Int8), (1::Int8), (126::Int8), (127::Int8);
+INSERT INTO t VALUES (-32768::Int16), (-32767::Int16), (-1::Int16), (0::Int16), (1::Int16), (32766::Int16), (32767::Int16);
+INSERT INTO t VALUES (-2147483648::Int32), (-2147483647::Int32), (-1::Int32), (0::Int32), (1::Int32), (2147483646::Int32), (2147483647::Int32);
+INSERT INTO t VALUES (-9223372036854775808::Int64), (-9223372036854775807::Int64), (-1::Int64), (0::Int64), (1::Int64), (9223372036854775806::Int64), (9223372036854775807::Int64);
+INSERT INTO t VALUES (-170141183460469231731687303715884105728::Int128), (-170141183460469231731687303715884105727::Int128), (-1::Int128), (0::Int128), (1::Int128), (170141183460469231731687303715884105726::Int128), (170141183460469231731687303715884105727::Int128);
+INSERT INTO t VALUES (-57896044618658097711785492504343953926634992332820282019728792003956564819968::Int256), (-57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256), (-1::Int256), (0::Int256), (1::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819966::Int256), (57896044618658097711785492504343953926634992332820282019728792003956564819967::Int256);
+
+INSERT INTO t VALUES (0::UInt8), (1::UInt8), (254::UInt8), (255::UInt8);
+INSERT INTO t VALUES (0::UInt16), (1::UInt16), (65534::UInt16), (65535::UInt16);
+INSERT INTO t VALUES (0::UInt32), (1::UInt32), (4294967294::UInt32), (4294967295::UInt32);
+INSERT INTO t VALUES (0::UInt64), (1::UInt64), (18446744073709551614::UInt64), (18446744073709551615::UInt64);
+INSERT INTO t VALUES (0::UInt128), (1::UInt128), (340282366920938463463374607431768211454::UInt128), (340282366920938463463374607431768211455::UInt128);
+INSERT INTO t VALUES (0::UInt256), (1::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639934::UInt256), (115792089237316195423570985008687907853269984665640564039457584007913129639935::UInt256);
+
+-- Floating-point numbers: floats(Float32 and Float64) and Decimal values
+INSERT INTO t VALUES (1.17549435e-38::Float32), (3.40282347e+38::Float32), (-3.40282347e+38::Float32), (-1.17549435e-38::Float32), (1.4e-45::Float32), (-1.4e-45::Float32);
+INSERT INTO t VALUES (inf::Float32), (-inf::Float32), (nan::Float32);
+INSERT INTO t VALUES (inf::FLOAT(12)), (-inf::FLOAT(12)), (nan::FLOAT(12));
+INSERT INTO t VALUES (inf::FLOAT(15,22)), (-inf::FLOAT(15,22)), (nan::FLOAT(15,22));
+
+INSERT INTO t VALUES (1.17549435e-38::Float64), (3.40282347e+38::Float64), (-3.40282347e+38::Float64), (-1.17549435e-38::Float64), (1.4e-45::Float64), (-1.4e-45::Float64);
+INSERT INTO t VALUES (2.2250738585072014e-308::Float64), (1.7976931348623157e+308::Float64), (-1.7976931348623157e+308::Float64), (-2.2250738585072014e-308::Float64);
+INSERT INTO t VALUES (inf::Float64), (-inf::Float64), (nan::Float64);
+INSERT INTO t VALUES (inf::DOUBLE(12)), (-inf::DOUBLE(12)), (nan::DOUBLE(12));
+INSERT INTO t VALUES (inf::DOUBLE(15,22)), (-inf::DOUBLE(15,22)), (nan::DOUBLE(15,22));
+
+INSERT INTO t VALUES (-99999999.9::Decimal32(1));
+INSERT INTO t VALUES (-999999999.99::Decimal64(2));
+INSERT INTO t VALUES (-999999999.999::Decimal128(3));
+INSERT INTO t VALUES (-999999999.9999::Decimal256(4));
+
+-- Strings: String and FixedString
+INSERT INTO t VALUES ('string'::String), ('1'::FixedString(1)), ('1'::FixedString(2)), ('1'::FixedString(10)); --(''::String),
+
+-- Boolean
+INSERT INTO t VALUES ('1'::Bool), (0::Bool);
+
+-- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time
+INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64);
+
+-- JSON
+INSERT INTO t VALUES ('{"1":"2"}'::JSON);
+INSERT INTO t FORMAT JSONEachRow {"d" : {"k1" : 1, "k2" : 2}} {"d" : {"1" : 2, "2" : 3}} {"d" : {"2020-10-10" : "foo"}};
+
+-- UUID
+INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID);
+INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID);
+
+-- LowCardinality
+INSERT INTO t VALUES ('1'::LowCardinality(String)), ('1'::LowCardinality(String)), (0::LowCardinality(UInt16));
+
+-- Arrays
+INSERT INTO t VALUES ([]::Array(Dynamic)), ([[]]::Array(Array(Dynamic))), ([[[]]]::Array(Array(Array(Dynamic))));
+
+-- Tuple
+INSERT INTO t VALUES (()::Tuple(Dynamic)), ((())::Tuple(Tuple(Dynamic))), (((()))::Tuple(Tuple(Tuple(Dynamic))));
+
+-- Map.
+INSERT INTO t VALUES (map(11::Dynamic, 'v1'::Dynamic, '22'::Dynamic, 1::Dynamic));
+
+-- SimpleAggregateFunction
+INSERT INTO t VALUES ([1,2]::SimpleAggregateFunction(anyLast, Array(Int16)));
+
+-- IPs
+INSERT INTO t VALUES (toIPv4('192.168.0.1')), (toIPv6('::1'));
+
+-- Geo
+INSERT INTO t VALUES ((1.23, 4.56)::Point), (([(1.23, 4.56)::Point, (2.34, 5.67)::Point])::Ring);
+INSERT INTO t VALUES ([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]::MultiPolygon);
+
+-- Interval
+INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' year);
+
+-- Nested
+INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String));
+INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)));
+
+SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d ;
+
+CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory;
+INSERT INTO t2 SELECT * FROM t;
+
+SELECT '';
+SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d;
+
+SELECT '';
+SELECT uniqExact(dynamicType(d)) t_ FROM t;
+SELECT uniqExact(dynamicType(d)) t_ FROM t2;
diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.reference b/tests/queries/0_stateless/03160_dynamic_type_agg.reference
new file mode 100644
index 00000000000..54f6e428839
--- /dev/null
+++ b/tests/queries/0_stateless/03160_dynamic_type_agg.reference
@@ -0,0 +1 @@
+4950	4950
diff --git a/tests/queries/0_stateless/03160_dynamic_type_agg.sql b/tests/queries/0_stateless/03160_dynamic_type_agg.sql
new file mode 100644
index 00000000000..f99232031a8
--- /dev/null
+++ b/tests/queries/0_stateless/03160_dynamic_type_agg.sql
@@ -0,0 +1,10 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE t (d Dynamic) ENGINE = Memory;
+
+INSERT INTO t SELECT sumState(number) AS d FROM numbers(100);
+
+SELECT finalizeAggregation(d.`AggregateFunction(sum, UInt64)`),
+       sumMerge(d.`AggregateFunction(sum, UInt64)`)
+FROM t GROUP BY d.`AggregateFunction(sum, UInt64)`;
+
diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.reference b/tests/queries/0_stateless/03162_dynamic_type_nested.reference
new file mode 100644
index 00000000000..8d5bcb5f85a
--- /dev/null
+++ b/tests/queries/0_stateless/03162_dynamic_type_nested.reference
@@ -0,0 +1,4 @@
+   ┌─dynamicType(d)──────────────┬─d─────────────────────────────────────────┬─d.Nested(x UInt32, y Dynamic).x─┬─d.Nested(x UInt32, y Dynamic).y───┬─dynamicType(arrayElement(d.Nested(x UInt32, y Dynamic).y, 1))─┬─d.Nested(x UInt32, y Dynamic).y.String─┬─d.Nested(x UInt32, y Dynamic).y.Tuple(Int64, Array(String))─┐
+1. │ Nested(x UInt32, y Dynamic) │ [(1,'aa'),(2,'bb')]                       │ [1,2]                           │ ['aa','bb']                       │ String                                                        │ ['aa','bb']                            │ [(0,[]),(0,[])]                                             │
+2. │ Nested(x UInt32, y Dynamic) │ [(1,(2,['aa','bb'])),(5,(6,['ee','ff']))] │ [1,5]                           │ [(2,['aa','bb']),(6,['ee','ff'])] │ Tuple(Int64, Array(String))                                   │ [NULL,NULL]                            │ [(2,['aa','bb']),(6,['ee','ff'])]                           │
+   └─────────────────────────────┴───────────────────────────────────────────┴─────────────────────────────────┴───────────────────────────────────┴───────────────────────────────────────────────────────────────┴────────────────────────────────────────┴─────────────────────────────────────────────────────────────┘
diff --git a/tests/queries/0_stateless/03162_dynamic_type_nested.sql b/tests/queries/0_stateless/03162_dynamic_type_nested.sql
new file mode 100644
index 00000000000..94007459a9e
--- /dev/null
+++ b/tests/queries/0_stateless/03162_dynamic_type_nested.sql
@@ -0,0 +1,16 @@
+SET allow_experimental_dynamic_type=1;
+
+CREATE TABLE t (d Dynamic) ENGINE = Memory;
+
+INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y Dynamic)) ;
+INSERT INTO t VALUES ([(1, (2, ['aa', 'bb'])), (5, (6, ['ee', 'ff']))]::Nested(x UInt32, y Dynamic));
+
+SELECT dynamicType(d),
+       d,
+       d.`Nested(x UInt32, y Dynamic)`.x,
+       d.`Nested(x UInt32, y Dynamic)`.y,
+       dynamicType(d.`Nested(x UInt32, y Dynamic)`.y[1]),
+       d.`Nested(x UInt32, y Dynamic)`.y.`String`,
+       d.`Nested(x UInt32, y Dynamic)`.y.`Tuple(Int64, Array(String))`
+FROM t ORDER BY d
+FORMAT PrettyCompactMonoBlock;

From 4829db4d9e80a02eca4b08779bd645bcd3ed5ba7 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Thu, 16 May 2024 14:51:22 +0200
Subject: [PATCH 146/392] Add Dynamic type in fuzzer tests

---
 tests/fuzz/dictionaries/datatypes.dict | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/fuzz/dictionaries/datatypes.dict b/tests/fuzz/dictionaries/datatypes.dict
index 232e89db0c0..a01a94fd3e3 100644
--- a/tests/fuzz/dictionaries/datatypes.dict
+++ b/tests/fuzz/dictionaries/datatypes.dict
@@ -132,3 +132,4 @@
 "YEAR"
 "bool"
 "boolean"
+"Dynamic"

From 73504a048bdc8076b079fcbe93578229348ef761 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Thu, 16 May 2024 14:51:57 +0200
Subject: [PATCH 147/392] Fix doc

---
 docs/en/sql-reference/data-types/dynamic.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
index a2c8ba532ce..eabf032c52f 100644
--- a/docs/en/sql-reference/data-types/dynamic.md
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -355,7 +355,7 @@ SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic;
 - Compare `Dynamic` subcolumn with required type:
 
 ```sql
-SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)')
+SELECT * FROM test WHERE d2.`Array(Int64)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)')
 ```
 
 ```text

From bb130f429e09b20d74f4df550fc096bd68262a14 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 16 May 2024 12:40:44 +0000
Subject: [PATCH 148/392] fix reading of columns of type
 Tuple(Map(LowCardinality(...)))

---
 .../SerializationLowCardinality.cpp           |  9 ++++-
 .../03156_tuple_map_low_cardinality.reference |  6 ++++
 .../03156_tuple_map_low_cardinality.sql       | 33 +++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference
 create mode 100644 tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql

diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
index 2d2be195098..18d6e48623b 100644
--- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp
+++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
@@ -515,8 +515,14 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
     size_t limit,
     DeserializeBinaryBulkSettings & settings,
     DeserializeBinaryBulkStatePtr & state,
-    SubstreamsCache * /* cache */) const
+    SubstreamsCache * cache) const
 {
+    if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
+    {
+        column = cached_column;
+        return;
+    }
+
     auto mutable_column = column->assumeMutable();
     ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(*mutable_column);
 
@@ -670,6 +676,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
     }
 
     column = std::move(mutable_column);
+    addToSubstreamsCache(cache, settings.path, column);
 }
 
 void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference
new file mode 100644
index 00000000000..5b2a36927ee
--- /dev/null
+++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.reference
@@ -0,0 +1,6 @@
+100000
+100000
+100000
+100000
+100000
+100000
diff --git a/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql
new file mode 100644
index 00000000000..836b426a9a9
--- /dev/null
+++ b/tests/queries/0_stateless/03156_tuple_map_low_cardinality.sql
@@ -0,0 +1,33 @@
+DROP TABLE IF EXISTS t_map_lc;
+
+CREATE TABLE t_map_lc
+(
+    id UInt64,
+    t Tuple(m Map(LowCardinality(String), LowCardinality(String)))
+)
+ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000;
+
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  mapKeys(t.m));
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  t.m.keys);
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  t.m.values);
+SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337');
+
+DROP TABLE t_map_lc;
+
+CREATE TABLE t_map_lc
+(
+    id UInt64,
+    t Tuple(m Map(LowCardinality(String), LowCardinality(String)))
+)
+ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = '10G';
+
+INSERT INTO t_map_lc SELECT * FROM generateRandom('id UInt64, t Tuple(m Map(LowCardinality(String), LowCardinality(String)))') LIMIT 100000;
+
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  mapKeys(t.m));
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  t.m.keys);
+SELECT count(), FROM t_map_lc WHERE NOT ignore(*,  t.m.values);
+SELECT * FROM t_map_lc WHERE mapContains(t.m, 'not_existing_key_1337');
+
+DROP TABLE t_map_lc;

From d10bf725f030d9a2fd18b0dd87be409c22461eb5 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 16 May 2024 14:57:22 +0200
Subject: [PATCH 149/392] ci config update to enable job

---
 tests/ci/ci_config.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index dc67e05455c..60ad6933afc 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -1260,8 +1260,7 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.INTEGRATION_TEST_ARM: TestConfig(
             Build.PACKAGE_AARCH64,
-            # add [run_by_label="test arm"] to not run in regular pr workflow by default
-            job_config=JobConfig(num_batches=6, **integration_test_common_params, run_by_label="test arm"),  # type: ignore
+            job_config=JobConfig(num_batches=5, **integration_test_common_params),  # type: ignore
         ),
         JobNames.INTEGRATION_TEST: TestConfig(
             Build.PACKAGE_RELEASE,

From 20b0a208bfdddd68f04c18ff74b3e2d4c99e2e2d Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 16 May 2024 15:04:13 +0200
Subject: [PATCH 150/392] Add proportionsZTest to docs

---
 .../functions/other-functions.md              | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 2b0215115cb..64f823d0656 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -903,6 +903,52 @@ SELECT parseTimeDelta('1yr2mo')
 └──────────────────────────┘
 ```
 
+## proportionsZTest
+
+Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`.
+
+**Syntax**
+
+```sql
+proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type)
+```
+
+**Arguments**
+
+- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md).
+- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md).
+- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md).
+- `trials_y`: NUmber of trials in population `y`. [UInt64](../data-types/int-uint.md).
+- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md).
+- `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). 
+
+:::note
+For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately.
+:::
+
+**Returned value**
+
+- `z_stat`: Z statistic. [Float64](../data-types/float.md).
+- `p_val`: P value. [Float64](../data-types/float.md).
+- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md).
+- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled');
+```
+
+Result:
+
+```response
+┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐
+│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
+
 ## least(a, b)
 
 Returns the smaller value of a and b.

From 3ff2ec0a7d8d3006ccf90250cb95b6ac7c1e872e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 16 May 2024 15:58:27 +0200
Subject: [PATCH 151/392] Fix segfault

---
 src/Storages/ObjectStorage/StorageObjectStorageSource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 9c67a125f5e..abaf51edc4e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -62,7 +62,7 @@ protected:
     const std::optional<FormatSettings> format_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
-    const ReadFromFormatInfo & read_from_format_info;
+    const ReadFromFormatInfo read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
 
     ColumnsDescription columns_desc;

From 17aa7991016875df603bec8495e17d3c1dbb7d3a Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 16 May 2024 16:43:54 +0200
Subject: [PATCH 152/392] CI: aarh64: disable kerberos tests

---
 tests/ci/ci_config.py                                   | 2 +-
 tests/integration/test_storage_kerberized_kafka/test.py | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py
index 60ad6933afc..4761b5b450f 100644
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@@ -1260,7 +1260,7 @@ CI_CONFIG = CIConfig(
         ),
         JobNames.INTEGRATION_TEST_ARM: TestConfig(
             Build.PACKAGE_AARCH64,
-            job_config=JobConfig(num_batches=5, **integration_test_common_params),  # type: ignore
+            job_config=JobConfig(num_batches=6, **integration_test_common_params),  # type: ignore
         ),
         JobNames.INTEGRATION_TEST: TestConfig(
             Build.PACKAGE_RELEASE,
diff --git a/tests/integration/test_storage_kerberized_kafka/test.py b/tests/integration/test_storage_kerberized_kafka/test.py
index 451e1ab2ccf..24d10d7ff83 100644
--- a/tests/integration/test_storage_kerberized_kafka/test.py
+++ b/tests/integration/test_storage_kerberized_kafka/test.py
@@ -5,7 +5,7 @@ import time
 import pytest
 import logging
 
-from helpers.cluster import ClickHouseCluster
+from helpers.cluster import ClickHouseCluster, is_arm
 from helpers.test_tools import TSV
 from helpers.client import QueryRuntimeException
 
@@ -18,6 +18,10 @@ from kafka.protocol.admin import DescribeGroupsResponse_v1, DescribeGroupsReques
 from kafka.protocol.group import MemberAssignment
 import socket
 
+if is_arm():
+    # skip due to no arm support for clickhouse/kerberos-kdc docker image
+    pytestmark = pytest.mark.skip
+
 cluster = ClickHouseCluster(__file__)
 instance = cluster.add_instance(
     "instance",

From 93601066ea74a11da2dffedf6289e442997afaf9 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 16 May 2024 14:54:21 +0000
Subject: [PATCH 153/392] Automatic style fix

---
 tests/ci/ci.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 3ed584f5d93..9c2ded20cff 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -968,10 +968,7 @@ class CiOptions:
 
         for job in jobs_to_do[:]:
             job_param = jobs_params[job]
-            if (
-                job_param["run_by_ci_option"]
-                and job not in jobs_to_do_requested
-            ):
+            if job_param["run_by_ci_option"] and job not in jobs_to_do_requested:
                 print(
                     f"Erasing job '{job}' from list because it's not in included set, but will run only by include"
                 )
@@ -1445,8 +1442,7 @@ def _configure_jobs(
             jobs_params[job] = {
                 "batches": batches_to_do,
                 "num_batches": num_batches,
-                "run_by_ci_option": job_config.run_by_ci_option
-                and pr_info.is_pr,
+                "run_by_ci_option": job_config.run_by_ci_option and pr_info.is_pr,
             }
         elif add_to_skip:
             # treat job as being skipped only if it's controlled by digest

From d8941873ec0fca6b4a2f6f27e2b095d46ac75753 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 16 May 2024 17:38:15 +0200
Subject: [PATCH 154/392] Fix typo

---
 docs/en/sql-reference/functions/other-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 64f823d0656..288432167bb 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -918,7 +918,7 @@ proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_
 - `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md).
 - `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md).
 - `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md).
-- `trials_y`: NUmber of trials in population `y`. [UInt64](../data-types/int-uint.md).
+- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md).
 - `conf_level`: Confidence level for the test. [Float64](../data-types/float.md).
 - `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). 
 

From 9f70cb7cbfea827dcd2458beb5545608d14a5f02 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 16 May 2024 17:39:18 +0200
Subject: [PATCH 155/392] Update aspell-dict.txt

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index a69ca0fb644..bea838c1269 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -759,6 +759,7 @@ Promtail
 Protobuf
 ProtobufSingle
 ProxySQL
+proportionsZTest
 Punycode
 PyArrow
 PyCharm
@@ -2753,6 +2754,7 @@ unixODBC
 unixodbc
 unoptimized
 unparsed
+unpooled
 unrealiable
 unreplicated
 unresolvable

From e4eaf256b1746420ef359deef1af788eab02f0d8 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 16 May 2024 16:33:41 +0200
Subject: [PATCH 156/392] Analyzer: Fix COLUMNS resolve

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 30 +++++++++++++++++--
 .../03152_analyzer_columns_list.reference     |  1 +
 .../03152_analyzer_columns_list.sql           |  1 +
 3 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tests/queries/0_stateless/03152_analyzer_columns_list.reference
 create mode 100644 tests/queries/0_stateless/03152_analyzer_columns_list.sql

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index f0a3a2c74b6..dad1b41c7af 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -4608,6 +4608,34 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
 
     std::unordered_set<std::string> table_expression_column_names_to_skip;
 
+    QueryTreeNodesWithNames result;
+
+    if (matcher_node_typed.getMatcherType() == MatcherNodeType::COLUMNS_LIST)
+    {
+        auto identifiers = matcher_node_typed.getColumnsIdentifiers();
+        result.reserve(identifiers.size());
+
+        for (const auto & identifier : identifiers)
+        {
+            auto resolve_result = tryResolveIdentifier(IdentifierLookup{identifier, IdentifierLookupContext::EXPRESSION}, scope);
+            if (!resolve_result.isResolved())
+                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
+                        "Unknown identifier '{}' inside COLUMNS matcher. In scope {}",
+                        identifier.getFullName(), scope.dump());
+
+            // TODO: Introduce IdentifierLookupContext::COLUMN and get read of this check
+            auto * resolved_column = resolve_result.resolved_identifier->as<ColumnNode>();
+            if (!resolved_column)
+                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
+                        "Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}",
+                        identifier.getFullName(), resolve_result.resolved_identifier->getNodeTypeName(), scope.dump());
+            result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName());
+        }
+        return result;
+    }
+
+    result.resize(matcher_node_typed.getColumnsIdentifiers().size());
+
     for (auto & table_expression : table_expressions_stack)
     {
         bool table_expression_in_resolve_process = nearest_query_scope->table_expressions_in_resolve_process.contains(table_expression.get());
@@ -4775,8 +4803,6 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
         table_expressions_column_nodes_with_names_stack.push_back(std::move(matched_column_nodes_with_names));
     }
 
-    QueryTreeNodesWithNames result;
-
     for (auto & table_expression_column_nodes_with_names : table_expressions_column_nodes_with_names_stack)
     {
         for (auto && table_expression_column_node_with_name : table_expression_column_nodes_with_names)
diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference
new file mode 100644
index 00000000000..eefa8ebd513
--- /dev/null
+++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference
@@ -0,0 +1 @@
+11323	8
diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
new file mode 100644
index 00000000000..5a7e3e9696e
--- /dev/null
+++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
@@ -0,0 +1 @@
+SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1;

From d5b690339309ba5082e20af294dcabf5ec306a7c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Thu, 16 May 2024 16:49:28 +0200
Subject: [PATCH 157/392] Cleanup and add test

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp                 | 6 ++++--
 tests/queries/0_stateless/03152_analyzer_columns_list.sql | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index dad1b41c7af..dfc5ebb3532 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -4623,12 +4623,14 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
                         "Unknown identifier '{}' inside COLUMNS matcher. In scope {}",
                         identifier.getFullName(), scope.dump());
 
-            // TODO: Introduce IdentifierLookupContext::COLUMN and get read of this check
+            // TODO: Introduce IdentifierLookupContext::COLUMN and get rid of this check
             auto * resolved_column = resolve_result.resolved_identifier->as<ColumnNode>();
             if (!resolved_column)
                 throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
                         "Identifier '{}' inside COLUMNS matcher must resolve into a column, but got {}. In scope {}",
-                        identifier.getFullName(), resolve_result.resolved_identifier->getNodeTypeName(), scope.dump());
+                        identifier.getFullName(),
+                        resolve_result.resolved_identifier->getNodeTypeName(),
+                        scope.scope_node->formatASTForErrorMessage());
             result.emplace_back(resolve_result.resolved_identifier, resolved_column->getColumnName());
         }
         return result;
diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
index 5a7e3e9696e..2b19cdf37a2 100644
--- a/tests/queries/0_stateless/03152_analyzer_columns_list.sql
+++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
@@ -1 +1,3 @@
 SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1;
+
+SELECT COLUMNS(license_text, library_name, xyz) APPLY (length) FROM system.licenses; -- { serverError UNKNOWN_IDENTIFIER }

From b82eeeee88b521f5a4beb4a20006a452f0c0bb35 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 16 May 2024 17:43:59 +0000
Subject: [PATCH 158/392] Check what would be broken if do not add all the
 identifiers to functions map.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 52efee03ae4..d83b1b847bf 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1039,10 +1039,6 @@ private:
         auto [_, inserted] = scope.alias_name_to_expression_node.insert(std::make_pair(alias, node));
         if (!inserted)
             scope.nodes_with_duplicated_aliases.insert(node);
-
-        /// If node is identifier put it also in scope alias name to lambda node map
-        if (node->getNodeType() == QueryTreeNodeType::IDENTIFIER)
-            scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
     }
 
     IdentifierResolveScope & scope;

From 3fe9255d74d3b274e530208b7f2a76927f6b5728 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 16 May 2024 19:19:51 +0000
Subject: [PATCH 159/392] Fix tests flakiness

---
 .../Serializations/SerializationDynamic.cpp   |   2 +-
 .../03037_dynamic_merges_1.reference          | 120 ------------------
 ...3037_dynamic_merges_1_horizontal.reference |  60 +++++++++
 .../03037_dynamic_merges_1_horizontal.sh      |  52 ++++++++
 .../03037_dynamic_merges_1_vertical.reference |  60 +++++++++
 ....sh => 03037_dynamic_merges_1_vertical.sh} |  17 +--
 .../03039_dynamic_all_merge_algorithms_1.sh   |   6 +-
 .../03040_dynamic_type_alters_1.sh            |   2 +-
 8 files changed, 180 insertions(+), 139 deletions(-)
 delete mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1.reference
 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference
 create mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference
 rename tests/queries/0_stateless/{03037_dynamic_merges_1.sh => 03037_dynamic_merges_1_vertical.sh} (79%)

diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp
index cb9d4a2f7bc..6351ff0ca0b 100644
--- a/src/DataTypes/Serializations/SerializationDynamic.cpp
+++ b/src/DataTypes/Serializations/SerializationDynamic.cpp
@@ -33,7 +33,7 @@ struct SerializeBinaryBulkStateDynamic : public ISerialization::SerializeBinaryB
     /// Variants statistics. Map (Variant name) -> (Variant size).
     ColumnDynamic::Statistics statistics = { .source = ColumnDynamic::Statistics::Source::READ, .data = {} };
 
-    SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
+    explicit SerializeBinaryBulkStateDynamic(UInt64 structure_version_) : structure_version(structure_version_) {}
 };
 
 struct DeserializeBinaryBulkStateDynamic : public ISerialization::DeserializeBinaryBulkState
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.reference b/tests/queries/0_stateless/03037_dynamic_merges_1.reference
deleted file mode 100644
index 0a647b41c4b..00000000000
--- a/tests/queries/0_stateless/03037_dynamic_merges_1.reference
+++ /dev/null
@@ -1,120 +0,0 @@
-MergeTree compact + horizontal merge
-test
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
-MergeTree wide + horizontal merge
-test
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
-MergeTree compact + vertical merge
-test
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
-MergeTree wide + vertical merge
-test
-50000	DateTime
-60000	Date
-70000	Array(UInt16)
-80000	String
-100000	None
-100000	UInt64
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-70000	Array(UInt16)
-100000	None
-100000	UInt64
-190000	String
-200000	Map(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-10000	Tuple(UInt64, UInt64)
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-260000	String
-100000	None
-100000	UInt64
-200000	Map(UInt64, UInt64)
-270000	String
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference
new file mode 100644
index 00000000000..59297e46330
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference
@@ -0,0 +1,60 @@
+MergeTree compact
+test
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
+MergeTree wide
+test
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
new file mode 100755
index 00000000000..0d3cd45666a
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 "
+
+function test()
+{
+    echo "test"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)"
+    $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)"
+    $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)"
+    $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)"
+    $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final;"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;"
+test
+$CH_CLIENT -q "drop table test;"
+
+echo "MergeTree wide"
+$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;"
+test
+$CH_CLIENT -q "drop table test;"
+
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference
new file mode 100644
index 00000000000..59297e46330
--- /dev/null
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference
@@ -0,0 +1,60 @@
+MergeTree compact
+test
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
+MergeTree wide
+test
+50000	DateTime
+60000	Date
+70000	Array(UInt16)
+80000	String
+100000	None
+100000	UInt64
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+70000	Array(UInt16)
+100000	None
+100000	UInt64
+190000	String
+200000	Map(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+10000	Tuple(UInt64, UInt64)
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+260000	String
+100000	None
+100000	UInt64
+200000	Map(UInt64, UInt64)
+270000	String
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
similarity index 79%
rename from tests/queries/0_stateless/03037_dynamic_merges_1.sh
rename to tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
index 056f6702727..b2c40668228 100755
--- a/tests/queries/0_stateless/03037_dynamic_merges_1.sh
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
@@ -7,8 +7,8 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
 
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 "
 
 function test()
 {
@@ -40,23 +40,12 @@ function test()
 
 $CH_CLIENT -q "drop table if exists test;"
 
-echo "MergeTree compact + horizontal merge"
-$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10;"
-test
-$CH_CLIENT -q "drop table test;"
-
-echo "MergeTree wide + horizontal merge"
-$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10;"
-test
-$CH_CLIENT -q "drop table test;"
-
-
-echo "MergeTree compact + vertical merge"
+echo "MergeTree compact"
 $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
 test
 $CH_CLIENT -q "drop table test;"
 
-echo "MergeTree wide + vertical merge"
+echo "MergeTree wide"
 $CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;"
 test
 $CH_CLIENT -q "drop table test;"
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
index 198c6ca93ff..0941f2da369 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0"
 
 
 function test()
@@ -53,10 +53,10 @@ function test()
 $CH_CLIENT -q "drop table if exists test;"
 
 echo "MergeTree compact + horizontal merge"
-test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000"
 
 echo "MergeTree wide + horizontal merge"
-test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000"
 
 echo "MergeTree compact + vertical merge"
 test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
index 1f2a6a31ad7..7a73be20a4d 100755
--- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
+++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_analyzer=1"
 
 function run()
 {

From 9dbc9f038b6e316b4227a54b4a70e1e0eb8f7361 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Fri, 17 May 2024 11:11:53 +0800
Subject: [PATCH 160/392] fix comments second time

Change-Id: I4b75367233f99ef432cdff78f724195673755a83
---
 src/Core/SettingsChangesHistory.h                    |  2 +-
 .../Formats/Impl/Parquet/ParquetDataValuesReader.cpp |  3 +++
 .../Formats/Impl/Parquet/ParquetRecordReader.cpp     | 12 ++++++++----
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 6fb8fb9358c..96ab7490c1f 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -91,6 +91,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."},
               {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
+              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
               }},
@@ -176,7 +177,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"},
               {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
               {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."},
-              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               }},
     {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."},
               {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"},
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 1f0c7105572..65f569ec264 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -29,6 +29,9 @@ void RleValuesReader::nextGroup()
     {
         cur_group_size *= 8;
         cur_packed_bit_values.resize(cur_group_size);
+
+        // try to suppress clang tidy warnings by assertion
+        assert(bit_width < 64);
         bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size);
     }
     else
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index fddd8059925..0b797dd66ad 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -192,6 +192,7 @@ std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromByteArray()
     switch (col_descriptor.logical_type()->type())
     {
         case parquet::LogicalType::Type::STRING:
+        case parquet::LogicalType::Type::NONE:
             return makeLeafReader<DataTypeString>();
         default:
             return throwUnsupported();
@@ -204,10 +205,13 @@ std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromFLBA()
     {
         case parquet::LogicalType::Type::DECIMAL:
         {
-            if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal128)))
-                return makeDecimalLeafReader<Decimal128>();
-            else if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal256)))
-                return makeDecimalLeafReader<Decimal256>();
+            if (col_descriptor.type_length() > 0)
+            {
+                if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal128)))
+                    return makeDecimalLeafReader<Decimal128>();
+                else if (col_descriptor.type_length() <= static_cast<int>(sizeof(Decimal256)))
+                    return makeDecimalLeafReader<Decimal256>();
+            }
 
             return throwUnsupported(PreformattedMessage::create(
                 ", invalid type length: {}", col_descriptor.type_length()));

From cc583185bdfe7f336af795d95cd97ce65cbef10b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 17 May 2024 08:33:08 +0200
Subject: [PATCH 161/392] Add revision and make some formatting changes to
 other-functions page

---
 .../functions/other-functions.md              | 437 ++++++++++++------
 src/Functions/array/arrayUnion.cpp            |   0
 .../03155_function_array_clamp.sql            |  11 +
 3 files changed, 313 insertions(+), 135 deletions(-)
 create mode 100644 src/Functions/array/arrayUnion.cpp
 create mode 100755 tests/queries/0_stateless/03155_function_array_clamp.sql

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 11ee471d709..5b77f16027b 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -6,11 +6,21 @@ sidebar_label: Other
 
 # Other Functions
 
-## hostName()
+## hostName
 
 Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned.
 If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
 
+**Syntax**
+
+```sql
+hostName()
+```
+
+**Returned value**
+
+- Host name. [String](../data-types/string.md).
+
 ## getMacro {#getMacro}
 
 Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration.
@@ -27,9 +37,7 @@ getMacro(name);
 
 **Returned value**
 
-- Value of the specified macro.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Value of the specified macro.[String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -82,9 +90,7 @@ This function is case-insensitive.
 
 **Returned value**
 
-- String with the fully qualified domain name.
-
-Type: `String`.
+- String with the fully qualified domain name. [String](../data-types/string.md).
 
 **Example**
 
@@ -163,34 +169,58 @@ Result:
 └────────────────┴────────────────────────────┘
 ```
 
-## visibleWidth(x)
+## visibleWidth
 
 Calculates the approximate width when outputting values to the console in text format (tab-separated).
-This function is used by the system to implement Pretty formats.
+This function is used by the system to implement [Pretty formats](../formats.mdx).
 
 `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats.
 
+**Syntax**
+
+```sql
+visibleWidth(x)
+```
+
+**Example**
+
+Query:
+
 ```sql
 SELECT visibleWidth(NULL)
 ```
 
+Result:
+
 ```text
 ┌─visibleWidth(NULL)─┐
 │                  4 │
 └────────────────────┘
 ```
 
-## toTypeName(x)
+## toTypeName
 
 Returns the type name of the passed argument.
 
 If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation.
 
-## blockSize() {#blockSize}
+**Syntax**
+
+```sql
+toTypeName(x)
+```
+
+## blockSize {#blockSize}
 
 In ClickHouse, queries are processed in blocks (chunks).
 This function returns the size (row count) of the block the function is called on.
 
+**Syntax**
+
+```sql
+blockSize()
+```
+
 ## byteSize
 
 Returns an estimation of uncompressed byte size of its arguments in memory.
@@ -207,9 +237,7 @@ byteSize(argument [, ...])
 
 **Returned value**
 
-- Estimation of byte size of the arguments in memory.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -288,16 +316,28 @@ Result:
 └────────────────────────────┘
 ```
 
-## materialize(x)
+## materialize
 
 Turns a constant into a full column containing a single value.
 Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior.
 
-## ignore(…)
+**Syntax**
+
+```sql
+materialize(x)
+```
+
+## ignore
 
 Accepts any arguments, including `NULL` and does nothing. Always returns 0.
 The argument is internally still evaluated. Useful e.g. for benchmarks.
 
+**Syntax**
+
+```sql
+ignore(…)
+```
+
 ## sleep
 
 Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes.
@@ -392,27 +432,33 @@ The `sleepEachRow()` function is primarily used for testing and debugging purpos
 
 Like the [`sleep()` function](#sleep), it's important to use `sleepEachRow()` judiciously and only when necessary, as it can significantly impact the overall performance and responsiveness of your ClickHouse system, especially when dealing with large result sets.
 
-## currentDatabase()
+## currentDatabase
 
 Returns the name of the current database.
 Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database.
 
-## currentUser() {#currentUser}
+**Syntax**
+
+```sql
+currentDatabase()
+```
+
+## currentUser {#currentUser}
 
 Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned.
 
+**Syntax**
+
 ```sql
-SELECT currentUser();
+currentUser()
 ```
 
 Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive.
 
 **Returned values**
 
-- The name of the current user.
-- In distributed queries, the login of the user who initiated the query.
-
-Type: `String`.
+- The name of the current user. [String](../data-types/string.md).
+- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md).
 
 **Example**
 
@@ -448,10 +494,8 @@ isConstant(x)
 
 **Returned values**
 
-- `1` if `x` is constant.
-- `0` if `x` is non-constant.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -497,14 +541,26 @@ Result:
 └────────────────────┘
 ```
 
-## isFinite(x)
+## isFinite
 
 Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0.
 
-## isInfinite(x)
+**Syntax**
+
+```sql
+isFinite(x)
+```
+
+## isInfinite
 
 Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN.
 
+**Syntax**
+
+```sql
+isInfinite(x)
+```
+
 ## ifNotFinite
 
 Checks whether a floating point value is finite.
@@ -517,8 +573,8 @@ ifNotFinite(x,y)
 
 **Arguments**
 
-- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md).
-- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md).
+- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md).
+- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md).
 
 **Returned value**
 
@@ -539,10 +595,16 @@ Result:
 
 You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`.
 
-## isNaN(x)
+## isNaN
 
 Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
 
+**Syntax**
+
+```sql
+isNaN(x)
+```
+
 ## hasColumnInTable
 
 Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0.
@@ -733,11 +795,19 @@ LIMIT 10
 └────────────────┴─────────┘
 ```
 
-## formatReadableDecimalSize(x)
+## formatReadableDecimalSize
 
 Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
 
-Example:
+**Syntax**
+
+```sql
+formatReadableDecimalSize(x)
+```
+
+**Example**
+
+Query:
 
 ```sql
 SELECT
@@ -745,6 +815,8 @@ SELECT
     formatReadableDecimalSize(filesize_bytes) AS filesize
 ```
 
+Result:
+
 ```text
 ┌─filesize_bytes─┬─filesize───┐
 │              1 │ 1.00 B     │
@@ -754,11 +826,20 @@ SELECT
 └────────────────┴────────────┘
 ```
 
-## formatReadableSize(x)
+## formatReadableSize
 
 Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
 
-Example:
+**Syntax**
+
+```sql
+formatReadableSize(x)
+```
+Alias: `FORMAT_BYTES`.
+
+**Example**
+
+Query:
 
 ```sql
 SELECT
@@ -766,7 +847,7 @@ SELECT
     formatReadableSize(filesize_bytes) AS filesize
 ```
 
-Alias: `FORMAT_BYTES`.
+Result:
 
 ```text
 ┌─filesize_bytes─┬─filesize───┐
@@ -777,11 +858,19 @@ Alias: `FORMAT_BYTES`.
 └────────────────┴────────────┘
 ```
 
-## formatReadableQuantity(x)
+## formatReadableQuantity
 
 Given a number, this function returns a rounded number with suffix (thousand, million, billion, etc.) as string.
 
-Example:
+**Syntax**
+
+```sql
+formatReadableQuantity(x)
+```
+
+**Example**
+
+Query:
 
 ```sql
 SELECT
@@ -789,6 +878,8 @@ SELECT
     formatReadableQuantity(number) AS number_for_humans
 ```
 
+Result:
+
 ```text
 ┌─────────number─┬─number_for_humans─┐
 │           1024 │ 1.02 thousand     │
@@ -903,15 +994,27 @@ SELECT parseTimeDelta('1yr2mo')
 └──────────────────────────┘
 ```
 
-## least(a, b)
+## least
 
 Returns the smaller value of a and b.
 
-## greatest(a, b)
+**Syntax**
+
+```sql
+least(a, b)
+```
+
+## greatest
 
 Returns the larger value of a and b.
 
-## uptime()
+**Syntax**
+
+```sql
+greatest(a, b)
+```
+
+## uptime
 
 Returns the server’s uptime in seconds.
 If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
@@ -924,9 +1027,7 @@ uptime()
 
 **Returned value**
 
-- Time value of seconds.
-
-Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -944,7 +1045,7 @@ Result:
 └────────┘
 ```
 
-## version()
+## version
 
 Returns the current version of ClickHouse as a string in the form of:
 
@@ -971,7 +1072,7 @@ None.
 
 **Returned value**
 
-Type: [String](../data-types/string)
+- Current version of ClickHouse. [String](../data-types/string)
 
 **Implementation details**
 
@@ -993,23 +1094,47 @@ SELECT version()
 └───────────┘
 ```
 
-## buildId()
+## buildId
 
 Returns the build ID generated by a compiler for the running ClickHouse server binary.
 If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
 
-## blockNumber()
+**Syntax**
+
+```sql
+buildId()
+```
+
+## blockNumber
 
 Returns the sequence number of the data block where the row is located.
 
-## rowNumberInBlock() {#rowNumberInBlock}
+**Syntax**
+
+```sql
+blockNumber()
+```
+
+## rowNumberInBlock {#rowNumberInBlock}
 
 Returns the ordinal number of the row in the data block. Different data blocks are always recalculated.
 
-## rowNumberInAllBlocks()
+**Syntax**
+
+```sql
+rowNumberInBlock()
+```
+
+## rowNumberInAllBlocks
 
 Returns the ordinal number of the row in the data block. This function only considers the affected data blocks.
 
+**Syntax**
+
+```sql
+rowNumberInAllBlocks()
+```
+
 ## neighbor
 
 The window function that provides access to a row at a specified offset before or after the current row of a given column.
@@ -1128,7 +1253,7 @@ Result:
 └────────────┴───────┴───────────┴────────────────┘
 ```
 
-## runningDifference(x) {#runningDifference}
+## runningDifference {#runningDifference}
 
 Calculates the difference between two consecutive row values in the data block.
 Returns 0 for the first row, and for subsequent rows the difference to the previous row.
@@ -1143,7 +1268,15 @@ The result of the function depends on the affected data blocks and the order of
 The order of rows during calculation of `runningDifference()` can differ from the order of rows returned to the user.
 To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery.
 
-Example:
+**Syntax**
+
+```sql
+runningDifference(x)
+```
+
+**Example**
+
+Query:
 
 ```sql
 SELECT
@@ -1162,6 +1295,8 @@ FROM
 )
 ```
 
+Result:
+
 ```text
 ┌─EventID─┬───────────EventTime─┬─delta─┐
 │    1106 │ 2016-11-24 00:00:04 │     0 │
@@ -1174,6 +1309,8 @@ FROM
 
 Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block.
 
+Query:
+
 ```sql
 SELECT
     number,
@@ -1182,6 +1319,8 @@ FROM numbers(100000)
 WHERE diff != 1
 ```
 
+Result:
+
 ```text
 ┌─number─┬─diff─┐
 │      0 │    0 │
@@ -1191,6 +1330,8 @@ WHERE diff != 1
 └────────┴──────┘
 ```
 
+Query:
+
 ```sql
 set max_block_size=100000 -- default value is 65536!
 
@@ -1201,6 +1342,8 @@ FROM numbers(100000)
 WHERE diff != 1
 ```
 
+Result:
+
 ```text
 ┌─number─┬─diff─┐
 │      0 │    0 │
@@ -1238,9 +1381,7 @@ runningConcurrency(start, end)
 
 **Returned values**
 
-- The number of concurrent events at each event start time.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md)
+- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1272,23 +1413,43 @@ Result:
 └────────────┴────────────────────────────────┘
 ```
 
-## MACNumToString(num)
+## MACNumToString
 
 Interprets a UInt64 number as a MAC address in big endian format. Returns the corresponding MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form) as string.
 
-## MACStringToNum(s)
+**Syntax**
+
+```sql
+MACNumToString(num)
+```
+
+## MACStringToNum
 
 The inverse function of MACNumToString. If the MAC address has an invalid format, it returns 0.
 
-## MACStringToOUI(s)
+**Syntax**
+
+```sql
+MACStringToNum(s)
+```
+
+## MACStringToOUI
 
 Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form), returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0.
 
+**Syntax**
+
+```sql
+MACStringToOUI(s)
+```
+
 ## getSizeOfEnumType
 
 Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md).
 An exception is thrown if the type is not `Enum`.
 
+**Syntax**
+
 ```sql
 getSizeOfEnumType(value)
 ```
@@ -1349,6 +1510,8 @@ Result:
 
 Returns the internal name of the data type that represents the value.
 
+**Syntax**
+
 ```sql
 toColumnTypeName(value)
 ```
@@ -1427,6 +1590,8 @@ Returns the default value for the given data type.
 
 Does not include default values for custom columns set by the user.
 
+**Syntax** 
+
 ```sql
 defaultValueOfArgumentType(expression)
 ```
@@ -1625,29 +1790,31 @@ Result:
 
 Creates an array with a single value.
 
-Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin).
+:::note
+This function is used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin).
+:::
+
+**Syntax**
 
 ```sql
-SELECT replicate(x, arr);
+replicate(x, arr)
 ```
 
-**Arguments:**
+**Arguments**
 
-- `arr` — An array.
 - `x` — The value to fill the result array with.
+- `arr` — An array. [Array](../data-types/array.md).
 
 **Returned value**
 
-An array of the lame length as `arr` filled with value `x`.
-
-Type: `Array`.
+An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md).
 
 **Example**
 
 Query:
 
 ```sql
-SELECT replicate(1, ['a', 'b', 'c'])
+SELECT replicate(1, ['a', 'b', 'c']);
 ```
 
 Result:
@@ -1658,6 +1825,36 @@ Result:
 └───────────────────────────────┘
 ```
 
+## revision
+
+Returns the current ClickHouse [server revision](../../operations/system-tables/metrics#revision).
+
+**Syntax**
+
+```sql
+revision()
+```
+
+**Returned value**
+
+- The current ClickHouse server revision. [UInt32](../data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT revision();
+```
+
+Result:
+
+```response
+┌─revision()─┐
+│      54485 │
+└────────────┘
+```
+
 ## filesystemAvailable
 
 Returns the amount of free space in the filesystem hosting the database persistence. The returned value is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for the operating system.
@@ -1670,9 +1867,7 @@ filesystemAvailable()
 
 **Returned value**
 
-- The amount of remaining space available in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). 
 
 **Example**
 
@@ -1702,9 +1897,7 @@ filesystemFree()
 
 **Returned value**
 
-- The amount of free space in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1734,9 +1927,7 @@ filesystemCapacity()
 
 **Returned value**
 
-- Capacity of the filesystem in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2100,7 +2291,7 @@ Result:
 └──────────────────────────────────────────────────┘
 ```
 
-## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
+## catboostEvaluate
 
 :::note
 This function is not available in ClickHouse Cloud.
@@ -2109,6 +2300,14 @@ This function is not available in ClickHouse Cloud.
 Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning.
 Accepts a path to a catboost model and model arguments (features). Returns Float64.
 
+**Syntax**
+
+```sql
+catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
+```
+
+**Example**
+
 ```sql
 SELECT feat1, ..., feat_n, catboostEvaluate('/path/to/model.bin', feat_1, ..., feat_n) AS prediction
 FROM data_table
@@ -2145,10 +2344,16 @@ communicate using a HTTP interface. By default, port `9012` is used. A different
 
 See [Training and applying models](https://catboost.ai/docs/features/training.html#training) for how to train catboost models from a training data set.
 
-## throwIf(x\[, message\[, error_code\]\])
+## throwIf
 
 Throw an exception if argument `x` is true.
 
+**Syntax**
+
+```sql
+throwIf(x\[, message\[, error_code\]\])
+```
+
 **Arguments**
 
 - `x` - the condition to check.
@@ -2284,9 +2489,7 @@ countDigits(x)
 
 **Returned value**
 
-Number of digits.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
 
 :::note
 For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
@@ -2310,9 +2513,7 @@ Result:
 
 ## errorCodeToName
 
-Returns the textual name of an error code.
-
-Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md).
+- Returns the textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md).
 
 **Syntax**
 
@@ -2343,9 +2544,7 @@ tcpPort()
 
 **Returned value**
 
-- The TCP port number.
-
-Type: [UInt16](../../sql-reference/data-types/int-uint.md).
+- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2381,9 +2580,7 @@ currentProfiles()
 
 **Returned value**
 
-- List of the current user settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## enabledProfiles
 
@@ -2397,9 +2594,7 @@ enabledProfiles()
 
 **Returned value**
 
-- List of the enabled settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## defaultProfiles
 
@@ -2413,9 +2608,7 @@ defaultProfiles()
 
 **Returned value**
 
-- List of the default settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## currentRoles
 
@@ -2429,9 +2622,7 @@ currentRoles()
 
 **Returned value**
 
-- A list of the current roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## enabledRoles
 
@@ -2445,9 +2636,7 @@ enabledRoles()
 
 **Returned value**
 
-- List of the enabled roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## defaultRoles
 
@@ -2461,9 +2650,7 @@ defaultRoles()
 
 **Returned value**
 
-- List of the default roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## getServerPort
 
@@ -2492,9 +2679,7 @@ getServerPort(port_name)
 
 **Returned value**
 
-- The number of the server port.
-
-Type: [UInt16](../../sql-reference/data-types/int-uint.md).
+- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2526,9 +2711,7 @@ queryID()
 
 **Returned value**
 
-- The ID of the current query.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- The ID of the current query. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -2562,9 +2745,7 @@ initialQueryID()
 
 **Returned value**
 
-- The ID of the initial current query.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- The ID of the initial current query. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -2597,9 +2778,7 @@ shardNum()
 
 **Returned value**
 
-- Shard index or constant `0`.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2639,9 +2818,7 @@ shardCount()
 
 **Returned value**
 
-- Total number of shards or `0`.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **See Also**
 
@@ -2663,9 +2840,7 @@ getOSKernelVersion()
 
 **Returned value**
 
-- The current OS kernel version.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- The current OS kernel version. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -2699,9 +2874,7 @@ zookeeperSessionUptime()
 
 **Returned value**
 
-- Uptime of the current ZooKeeper session in seconds.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2738,9 +2911,7 @@ All arguments must be constant.
 
 **Returned value**
 
-- Randomly generated table structure.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Randomly generated table structure. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -2807,9 +2978,7 @@ structureToCapnProtoSchema(structure)
 
 **Returned value**
 
-- CapnProto schema
-
-Type: [String](../../sql-reference/data-types/string.md).
+- CapnProto schema. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -2908,9 +3077,7 @@ structureToProtobufSchema(structure)
 
 **Returned value**
 
-- Protobuf schema
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Protobuf schema. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
diff --git a/src/Functions/array/arrayUnion.cpp b/src/Functions/array/arrayUnion.cpp
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03155_function_array_clamp.sql b/tests/queries/0_stateless/03155_function_array_clamp.sql
new file mode 100755
index 00000000000..4794dafda4b
--- /dev/null
+++ b/tests/queries/0_stateless/03155_function_array_clamp.sql
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-parallel, no-ordinary-database, long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# In previous versions this command took longer than ten minutes. Now it takes less than a second in release mode:
+
+python3 -c 'import sys; import struct; sys.stdout.buffer.write(b"".join(struct.pack("<Q", 36) + b"\x40" + f"{i:064}".encode("ascii") for i in range(1024 * 1024)))' |
+${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&max_query_size=100000000&query=INSERT+INTO+FUNCTION+null('timestamp+UInt64,+label+String')+FORMAT+RowBinary" --data-binary @-

From 54f6fb89eac1843058d160b39ddcb0b851f18ef8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 17 May 2024 08:44:25 +0200
Subject: [PATCH 162/392] Remove files which shouldn't be on this branch

---
 src/Functions/array/arrayUnion.cpp                    |  0
 .../0_stateless/03155_function_array_clamp.sql        | 11 -----------
 2 files changed, 11 deletions(-)
 delete mode 100644 src/Functions/array/arrayUnion.cpp
 delete mode 100755 tests/queries/0_stateless/03155_function_array_clamp.sql

diff --git a/src/Functions/array/arrayUnion.cpp b/src/Functions/array/arrayUnion.cpp
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/queries/0_stateless/03155_function_array_clamp.sql b/tests/queries/0_stateless/03155_function_array_clamp.sql
deleted file mode 100755
index 4794dafda4b..00000000000
--- a/tests/queries/0_stateless/03155_function_array_clamp.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel, no-ordinary-database, long
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-# In previous versions this command took longer than ten minutes. Now it takes less than a second in release mode:
-
-python3 -c 'import sys; import struct; sys.stdout.buffer.write(b"".join(struct.pack("<Q", 36) + b"\x40" + f"{i:064}".encode("ascii") for i in range(1024 * 1024)))' |
-${CLICKHOUSE_CURL} "${CLICKHOUSE_URL}&max_query_size=100000000&query=INSERT+INTO+FUNCTION+null('timestamp+UInt64,+label+String')+FORMAT+RowBinary" --data-binary @-

From b53e9eec7b6560ebb67a5d868689494a7f0ab008 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 16 May 2024 18:17:46 +0200
Subject: [PATCH 163/392] Support for archives (unfinished)

---
 src/IO/S3/URI.h                               |   1 +
 .../ObjectStorage/ReadBufferIterator.cpp      |  34 ++--
 .../ObjectStorage/S3/Configuration.cpp        |   8 +
 src/Storages/ObjectStorage/S3/Configuration.h |   3 +
 .../ObjectStorage/StorageObjectStorage.cpp    |  10 ++
 .../ObjectStorage/StorageObjectStorage.h      |   4 +
 .../StorageObjectStorageSource.cpp            | 146 +++++++++++++++++-
 .../StorageObjectStorageSource.h              |  70 ++++++++-
 8 files changed, 255 insertions(+), 21 deletions(-)

diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h
index c52e6bc1441..363f98c46f5 100644
--- a/src/IO/S3/URI.h
+++ b/src/IO/S3/URI.h
@@ -29,6 +29,7 @@ struct URI
     std::string key;
     std::string version_id;
     std::string storage_name;
+    /// Path (or path pattern) in archive if uri is an archive.
     std::optional<std::string> archive_pattern;
     std::string uri_str;
 
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 3705725ffe1..61575b0115a 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -1,5 +1,6 @@
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 #include <IO/ReadBufferFromFileBase.h>
 
 
@@ -244,22 +245,35 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             }
         }
 
-        std::unique_ptr<ReadBuffer> read_buffer = object_storage->readObject(
-            StoredObject(current_object_info->relative_path),
-            getContext()->getReadSettings(),
-            {},
-            current_object_info->metadata->size_bytes);
+        std::unique_ptr<ReadBuffer> read_buf;
+        CompressionMethod compression_method;
+        using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive;
+        if (auto object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
+        {
+            compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method);
+            auto & archive_reader = object_info_in_archive->archive_reader;
+            read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
+        }
+        else
+        {
+            compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method);
+            read_buf = object_storage->readObject(
+                StoredObject(current_object_info->relative_path),
+                getContext()->getReadSettings(),
+                {},
+                current_object_info->metadata->size_bytes);
+        }
 
-        if (!query_settings.skip_empty_files || !read_buffer->eof())
+        if (!query_settings.skip_empty_files || !read_buf->eof())
         {
             first = false;
 
-            read_buffer = wrapReadBufferWithCompressionMethod(
-                std::move(read_buffer),
-                chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method),
+            read_buf = wrapReadBufferWithCompressionMethod(
+                std::move(read_buf),
+                compression_method,
                 static_cast<int>(getContext()->getSettingsRef().zstd_window_log_max));
 
-            return {std::move(read_buffer), std::nullopt, format};
+            return {std::move(read_buf), std::nullopt, format};
         }
     }
 }
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 9fcbc6a6816..00d569fea9f 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -55,6 +55,14 @@ String StorageS3Configuration::getDataSourceDescription()
     return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket;
 }
 
+std::string StorageS3Configuration::getPathInArchive() const
+{
+    if (url.archive_pattern.has_value())
+        return url.archive_pattern.value();
+
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not an archive", getPath());
+}
+
 void StorageS3Configuration::check(ContextPtr context) const
 {
     validateNamespace(url.bucket);
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index 0bd7f1ab108..de6c02d5020 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -34,6 +34,9 @@ public:
     String getDataSourceDescription() override;
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
+    bool isArchive() const override { return url.archive_pattern.has_value(); }
+    std::string getPathInArchive() const override;
+
     void check(ContextPtr context) const override;
     void validateNamespace(const String & name) const override;
     ConfigurationPtr clone() override { return std::make_shared<StorageS3Configuration>(*this); }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index bc5b347d1e0..73e3d861cff 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -452,6 +452,16 @@ std::string StorageObjectStorage::Configuration::getPathWithoutGlobs() const
     return getPath().substr(0, getPath().find_first_of("*?{"));
 }
 
+bool StorageObjectStorage::Configuration::isPathInArchiveWithGlobs() const
+{
+    return getPathInArchive().find_first_of("*?{") != std::string::npos;
+}
+
+std::string StorageObjectStorage::Configuration::getPathInArchive() const
+{
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not archive", getPath());
+}
+
 void StorageObjectStorage::Configuration::assertInitialized() const
 {
     if (!initialized)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 26b153ca0db..7b118cb7e6b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -175,6 +175,10 @@ public:
     bool isNamespaceWithGlobs() const;
     virtual std::string getPathWithoutGlobs() const;
 
+    virtual bool isArchive() const { return false; }
+    bool isPathInArchiveWithGlobs() const;
+    virtual std::string getPathInArchive() const;
+
     virtual void check(ContextPtr context) const;
     virtual void validateNamespace(const String & /* name */) const {}
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 8d5df96ca6e..56905e6c29b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -7,6 +7,7 @@
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <IO/ReadBufferFromFileBase.h>
+#include <IO/Archives/createArchiveReader.h>
 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
@@ -100,10 +101,11 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
 
     auto settings = configuration->getQuerySettings(local_context);
 
+    std::unique_ptr<IIterator> iterator;
     if (configuration->isPathWithGlobs())
     {
         /// Iterate through disclosed globs and make a source for each file
-        return std::make_shared<GlobIterator>(
+        iterator = std::make_unique<GlobIterator>(
             object_storage, configuration, predicate, virtual_columns,
             local_context, read_keys, settings.list_object_keys_size,
             settings.throw_on_zero_files_match, file_progress_callback);
@@ -123,10 +125,17 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
             copy_configuration->setPaths(keys);
         }
 
-        return std::make_shared<KeysIterator>(
+        iterator = std::make_unique<KeysIterator>(
             object_storage, copy_configuration, virtual_columns, read_keys,
             settings.ignore_non_existent_file, file_progress_callback);
     }
+
+    if (configuration->isArchive())
+    {
+        return std::make_shared<ArchiveIterator>(object_storage, configuration, std::move(iterator), local_context, read_keys);
+    }
+
+    return iterator;
 }
 
 void StorageObjectStorageSource::lazyInitialize(size_t processor)
@@ -262,9 +271,20 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     }
     else
     {
-        const auto compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method);
+        CompressionMethod compression_method;
         const auto max_parsing_threads = need_only_count ? std::optional<size_t>(1) : std::nullopt;
-        read_buf = createReadBuffer(object_info->relative_path, object_info->metadata->size_bytes);
+
+        if (auto object_info_in_archive = dynamic_cast<const ArchiveIterator::ObjectInfoInArchive *>(object_info.get()))
+        {
+            compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method);
+            auto & archive_reader = object_info_in_archive->archive_reader;
+            read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
+        }
+        else
+        {
+            compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method);
+            read_buf = createReadBuffer(*object_info);
+        }
 
         auto input_format = FormatFactory::instance().getInput(
             configuration->format, *read_buf, read_from_format_info.format_header,
@@ -312,8 +332,10 @@ std::future<StorageObjectStorageSource::ReaderHolder> StorageObjectStorageSource
     return create_reader_scheduler([=, this] { return createReader(processor); }, Priority{});
 }
 
-std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const String & key, size_t object_size)
+std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const ObjectInfo & object_info)
 {
+    const auto & object_size = object_info.metadata->size_bytes;
+
     auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size);
     read_settings.enable_filesystem_cache = false;
     /// FIXME: Changing this setting to default value breaks something around parquet reading
@@ -333,7 +355,7 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const S
         LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
 
         auto async_reader = object_storage->readObjects(
-            StoredObjects{StoredObject{key, /* local_path */ "", object_size}}, read_settings);
+            StoredObjects{StoredObject{object_info.relative_path, /* local_path */ "", object_size}}, read_settings);
 
         async_reader->setReadUntilEnd();
         if (read_settings.remote_fs_prefetch)
@@ -344,7 +366,7 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const S
     else
     {
         /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting.
-        return object_storage->readObject(StoredObject(key), read_settings);
+        return object_storage->readObject(StoredObject(object_info.relative_path, "", object_size), read_settings);
     }
 }
 
@@ -609,4 +631,114 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::ReadTaskIterator
     return buffer[current_index];
 }
 
+static IArchiveReader::NameFilter createArchivePathFilter(const std::string & archive_pattern)
+{
+    auto matcher = std::make_shared<re2::RE2>(makeRegexpPatternFromGlobs(archive_pattern));
+    if (!matcher->ok())
+    {
+        throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP,
+                        "Cannot compile regex from glob ({}): {}",
+                        archive_pattern, matcher->error());
+    }
+    return [matcher](const std::string & p) mutable { return re2::RE2::FullMatch(p, *matcher); };
+}
+
+StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive::ObjectInfoInArchive(
+    ObjectInfoPtr archive_object_,
+    const std::string & path_in_archive_,
+    std::shared_ptr<IArchiveReader> archive_reader_)
+    : archive_object(archive_object_)
+    , path_in_archive(path_in_archive_)
+    , archive_reader(archive_reader_)
+{
+}
+
+StorageObjectStorageSource::ArchiveIterator::ArchiveIterator(
+    ObjectStoragePtr object_storage_,
+    ConfigurationPtr configuration_,
+    std::unique_ptr<IIterator> archives_iterator_,
+    ContextPtr context_,
+    ObjectInfos * read_keys_)
+    : IIterator("ArchiveIterator")
+    , WithContext(context_)
+    , object_storage(object_storage_)
+    , is_path_in_archive_with_globs(configuration_->isPathInArchiveWithGlobs())
+    , archives_iterator(std::move(archives_iterator_))
+    , filter(is_path_in_archive_with_globs ? createArchivePathFilter(configuration_->getPathInArchive()) : IArchiveReader::NameFilter{})
+    , path_in_archive(is_path_in_archive_with_globs ? "" : configuration_->getPathInArchive())
+    , read_keys(read_keys_)
+{
+}
+
+std::shared_ptr<IArchiveReader>
+StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr object_info) const
+{
+    const auto size = object_info->metadata->size_bytes;
+    return DB::createArchiveReader(
+        /* path_to_archive */object_info->relative_path,
+        /* archive_read_function */[=, this]()
+        {
+            StoredObject stored_object(object_info->relative_path, "", size);
+            return object_storage->readObject(stored_object, getContext()->getReadSettings());
+        },
+        /* archive_size */size);
+}
+
+StorageObjectStorageSource::ObjectInfoPtr
+StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor)
+{
+    std::unique_lock lock{next_mutex};
+    while (true)
+    {
+        if (filter)
+        {
+            if (!file_enumerator)
+            {
+                archive_object = archives_iterator->next(processor);
+                if (!archive_object)
+                    return {};
+
+                archive_reader = createArchiveReader(archive_object);
+                file_enumerator = archive_reader->firstFile();
+                if (!file_enumerator)
+                    continue;
+            }
+            else if (!file_enumerator->nextFile())
+            {
+                file_enumerator.reset();
+                continue;
+            }
+
+            path_in_archive = file_enumerator->getFileName();
+            if (!filter(path_in_archive))
+                continue;
+        }
+        else
+        {
+            archive_object = archives_iterator->next(processor);
+            if (!archive_object)
+                return {};
+
+            if (!archive_object->metadata)
+                archive_object->metadata = object_storage->getObjectMetadata(archive_object->relative_path);
+
+            archive_reader = createArchiveReader(archive_object);
+            if (!archive_reader->fileExists(path_in_archive))
+                continue;
+        }
+
+        auto object_in_archive = std::make_shared<ObjectInfoInArchive>(archive_object, path_in_archive, archive_reader);
+
+        if (read_keys != nullptr)
+            read_keys->push_back(object_in_archive);
+
+        return object_in_archive;
+    }
+}
+
+size_t StorageObjectStorageSource::ArchiveIterator::estimatedKeysCount()
+{
+    return archives_iterator->estimatedKeysCount();
+}
+
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index abaf51edc4e..664aad56928 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -1,10 +1,11 @@
 #pragma once
+#include <Common/re2.h>
+#include <Interpreters/Context_fwd.h>
+#include <IO/Archives/IArchiveReader.h>
 #include <Processors/SourceWithKeyCondition.h>
 #include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Interpreters/Context_fwd.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Processors/Formats/IInputFormat.h>
-#include <Common/re2.h>
+#include <Storages/ObjectStorage/StorageObjectStorage.h>
 
 
 namespace DB
@@ -25,6 +26,7 @@ public:
     class ReadTaskIterator;
     class GlobIterator;
     class KeysIterator;
+    class ArchiveIterator;
 
     StorageObjectStorageSource(
         String name_,
@@ -109,7 +111,7 @@ protected:
     /// Recreate ReadBuffer and Pipeline for each file.
     ReaderHolder createReader(size_t processor = 0);
     std::future<ReaderHolder> createReaderAsync(size_t processor = 0);
-    std::unique_ptr<ReadBuffer> createReadBuffer(const String & key, size_t object_size);
+    std::unique_ptr<ReadBuffer> createReadBuffer(const ObjectInfo & object_info);
 
     void addNumRowsToCache(const String & path, size_t num_rows);
     std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfoPtr & object_info);
@@ -218,4 +220,64 @@ private:
     std::atomic<size_t> index = 0;
     bool ignore_non_existent_files;
 };
+
+/*
+ * An archives iterator.
+ * Allows to iterate files inside one or many archives.
+ * `archives_iterator` is an iterator which iterates over different archives.
+ * There are two ways to read files in archives:
+ * 1. When we want to read one concete file in each archive.
+ *    In this case we go through all archives, check if this certain file
+ *    exists within this archive and read it if it exists.
+ * 2. When we have a certain pattern of files we want to read in each archive.
+ *    For this purpose we create a filter defined as IArchiveReader::NameFilter.
+ */
+class StorageObjectStorageSource::ArchiveIterator : public IIterator, private WithContext
+{
+public:
+    explicit ArchiveIterator(
+        ObjectStoragePtr object_storage_,
+        ConfigurationPtr configuration_,
+        std::unique_ptr<IIterator> archives_iterator_,
+        ContextPtr context_,
+        ObjectInfos * read_keys_);
+
+    size_t estimatedKeysCount() override;
+
+    struct ObjectInfoInArchive : public ObjectInfo
+    {
+        ObjectInfoInArchive(
+            ObjectInfoPtr archive_object_,
+            const std::string & path_in_archive_,
+            std::shared_ptr<IArchiveReader> archive_reader_);
+
+        const ObjectInfoPtr archive_object;
+        const std::string path_in_archive;
+        const std::shared_ptr<IArchiveReader> archive_reader;
+    };
+
+private:
+    ObjectInfoPtr nextImpl(size_t processor) override;
+    std::shared_ptr<IArchiveReader> createArchiveReader(ObjectInfoPtr object_info) const;
+
+    const ObjectStoragePtr object_storage;
+    const bool is_path_in_archive_with_globs;
+    /// Iterator which iterates through different archives.
+    const std::unique_ptr<IIterator> archives_iterator;
+    /// Used when files inside archive are defined with a glob
+    const IArchiveReader::NameFilter filter = {};
+    /// Current file inside the archive.
+    std::string path_in_archive = {};
+    /// Read keys of files inside archives.
+    ObjectInfos * read_keys;
+    /// Object pointing to archive (NOT path within archive).
+    ObjectInfoPtr archive_object;
+    /// Reader of the archive.
+    std::shared_ptr<IArchiveReader> archive_reader;
+    /// File enumerator inside the archive.
+    std::unique_ptr<IArchiveReader::FileEnumerator> file_enumerator;
+
+    std::mutex next_mutex;
+};
+
 }

From f0a2b85f052e88703ce6255addabeb842a47e8fe Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 17 May 2024 11:37:51 +0200
Subject: [PATCH 164/392] Fix test query

---
 .../02271_fix_column_matcher_and_column_transformer.sql    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql
index 245b2cc97e3..b2a04788bbb 100644
--- a/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql
+++ b/tests/queries/0_stateless/02271_fix_column_matcher_and_column_transformer.sql
@@ -61,6 +61,11 @@ CREATE TABLE github_events
 )
 ENGINE = MergeTree ORDER BY (event_type, repo_name, created_at);
 
-with top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ), last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ), last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ), last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc ) select d.repo_name, columns(count) from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name;
+with
+    top_repos as ( select repo_name from github_events where event_type = 'WatchEvent' and toDate(created_at) = today() - 1 group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toMonday(created_at) = toMonday(today() - interval 1 week) group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count() desc limit 100 union distinct select repo_name from github_events where event_type = 'WatchEvent' and toYear(created_at) = toYear(today()) - 1 group by repo_name order by count() desc limit 100 ),
+    last_day as ( select repo_name, count() as count_last_day, rowNumberInAllBlocks() + 1 as position_last_day from github_events where repo_name in (select repo_name from top_repos) and toDate(created_at) = today() - 1 group by repo_name order by count_last_day desc ),
+    last_week as ( select repo_name, count() as count_last_week, rowNumberInAllBlocks() + 1 as position_last_week from github_events where repo_name in (select repo_name from top_repos) and toMonday(created_at) = toMonday(today()) - interval 1 week group by repo_name order by count_last_week desc ),
+    last_month as ( select repo_name, count() as count_last_month, rowNumberInAllBlocks() + 1 as position_last_month from github_events where repo_name in (select repo_name from top_repos) and toStartOfMonth(created_at) = toStartOfMonth(today()) - interval 1 month group by repo_name order by count_last_month desc )
+select d.repo_name, columns('count') from last_day d join last_week w on d.repo_name = w.repo_name join last_month m on d.repo_name = m.repo_name;
 
 DROP TABLE github_events;

From 61ee5e46ad50fcedd86f6d62d4c2bda2f6fedade Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 17 May 2024 11:43:33 +0200
Subject: [PATCH 165/392] Update the test

---
 .../03152_analyzer_columns_list.reference          |  2 +-
 .../0_stateless/03152_analyzer_columns_list.sql    | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.reference b/tests/queries/0_stateless/03152_analyzer_columns_list.reference
index eefa8ebd513..4e9025b5baf 100644
--- a/tests/queries/0_stateless/03152_analyzer_columns_list.reference
+++ b/tests/queries/0_stateless/03152_analyzer_columns_list.reference
@@ -1 +1 @@
-11323	8
+4	3
diff --git a/tests/queries/0_stateless/03152_analyzer_columns_list.sql b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
index 2b19cdf37a2..baed3a4ff68 100644
--- a/tests/queries/0_stateless/03152_analyzer_columns_list.sql
+++ b/tests/queries/0_stateless/03152_analyzer_columns_list.sql
@@ -1,3 +1,13 @@
-SELECT COLUMNS(license_text, library_name) APPLY (length) FROM system.licenses ORDER BY library_name LIMIT 1;
+CREATE TABLE test
+(
+    foo String,
+    bar String,
+)
+ENGINE = MergeTree()
+ORDER BY (foo, bar);
 
-SELECT COLUMNS(license_text, library_name, xyz) APPLY (length) FROM system.licenses; -- { serverError UNKNOWN_IDENTIFIER }
+INSERT INTO test VALUES ('foo', 'bar1');
+
+SELECT COLUMNS(bar, foo) APPLY (length) FROM test;
+
+SELECT COLUMNS(bar, foo, xyz) APPLY (length) FROM test; -- { serverError UNKNOWN_IDENTIFIER }

From 53e992af4ff6c2df33f46c597498baa38c327ee3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 17 May 2024 11:42:28 +0000
Subject: [PATCH 166/392] Remove some unnecessary UNREACHABLEs

---
 programs/keeper-client/Commands.cpp                 |  3 ++-
 programs/main.cpp                                   |  2 +-
 src/Access/AccessEntityIO.cpp                       |  3 +--
 src/Access/AccessRights.cpp                         |  1 -
 src/Access/IAccessStorage.cpp                       |  9 +++------
 .../AggregateFunctionGroupArray.cpp                 | 13 ++++++-------
 .../AggregateFunctionSequenceNextNode.cpp           |  1 -
 src/AggregateFunctions/AggregateFunctionSum.h       | 13 ++++++-------
 src/Common/DateLUTImpl.cpp                          |  1 -
 src/Common/IntervalKind.cpp                         | 10 ----------
 src/Common/TargetSpecific.cpp                       |  2 --
 src/Common/ThreadProfileEvents.cpp                  |  1 -
 src/Common/ZooKeeper/IKeeper.cpp                    |  2 --
 src/Compression/CompressionCodecDeflateQpl.cpp      |  1 -
 src/Compression/CompressionCodecDoubleDelta.cpp     |  3 +--
 src/Coordination/KeeperReconfiguration.cpp          |  8 +++++++-
 src/Coordination/KeeperServer.cpp                   |  3 ++-
 src/Core/Field.cpp                                  |  1 -
 src/Core/Field.h                                    |  2 --
 src/DataTypes/Serializations/ISerialization.cpp     |  1 -
 src/Disks/IO/CachedOnDiskReadBufferFromFile.h       |  1 -
 .../MetadataStorageTransactionState.cpp             |  1 -
 src/Disks/VolumeJBOD.cpp                            |  2 --
 src/Formats/EscapingRuleUtils.cpp                   |  1 -
 src/Functions/FunctionsRound.h                      |  4 ----
 src/Functions/PolygonUtils.h                        |  2 --
 .../UserDefinedSQLObjectsZooKeeperStorage.cpp       |  1 -
 src/IO/CompressionMethod.cpp                        |  1 -
 src/IO/HadoopSnappyReadBuffer.h                     |  1 -
 src/Interpreters/AggregatedDataVariants.cpp         |  8 --------
 src/Interpreters/Cache/FileSegment.cpp              |  1 -
 src/Interpreters/ComparisonGraph.cpp                |  1 -
 src/Interpreters/FilesystemCacheLog.cpp             |  1 -
 src/Interpreters/HashJoin.cpp                       |  3 ---
 .../InterpreterTransactionControlQuery.cpp          |  1 -
 src/Interpreters/SetVariants.cpp                    |  4 ----
 src/Parsers/ASTExplainQuery.h                       |  2 --
 src/Parsers/Lexer.cpp                               |  4 ----
 .../Formats/Impl/MsgPackRowInputFormat.cpp          |  1 -
 src/Processors/IProcessor.cpp                       |  2 --
 src/Processors/QueryPlan/ReadFromMergeTree.cpp      |  6 ------
 src/Processors/QueryPlan/TotalsHavingStep.cpp       |  2 --
 src/Processors/Transforms/FillingTransform.cpp      |  1 -
 .../Transforms/buildPushingToViewsChain.cpp         |  2 --
 src/Storages/MergeTree/BackgroundJobsAssignee.cpp   |  1 -
 src/Storages/MergeTree/KeyCondition.cpp             |  2 --
 src/Storages/MergeTree/MergeTreeData.cpp            |  2 --
 src/Storages/MergeTree/MergeTreeDataWriter.cpp      |  2 --
 src/Storages/WindowView/StorageWindowView.cpp       |  1 -
 49 files changed, 29 insertions(+), 112 deletions(-)

diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp
index ec5eaf5070c..38c3d4356f6 100644
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@@ -11,6 +11,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int KEEPER_EXCEPTION;
+    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
 }
 
 bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@@ -441,7 +442,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
             new_members = query->args[1].safeGet<String>();
             break;
         default:
-            UNREACHABLE();
+            throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected operation: {}", operation);
     }
 
     auto response = client->zookeeper->reconfig(joining, leaving, new_members);
diff --git a/programs/main.cpp b/programs/main.cpp
index 4bb73399719..48985ea683f 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail)
             ret("AVX2");
         case InstructionFail::AVX512:
             ret("AVX512");
+#undef ret
     }
-    UNREACHABLE();
 }
 
 
diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp
index b0dfd74c53b..1b073329296 100644
--- a/src/Access/AccessEntityIO.cpp
+++ b/src/Access/AccessEntityIO.cpp
@@ -144,8 +144,7 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String
     catch (Exception & e)
     {
         e.addMessage("Could not parse " + file_path);
-        e.rethrow();
-        UNREACHABLE();
+        throw;
     }
 }
 
diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp
index c10931f554c..dd25d3e4ac0 100644
--- a/src/Access/AccessRights.cpp
+++ b/src/Access/AccessRights.cpp
@@ -258,7 +258,6 @@ namespace
             case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel();
             case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel();
         }
-        UNREACHABLE();
     }
 }
 
diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp
index 8e51481e415..8d4e7d3073e 100644
--- a/src/Access/IAccessStorage.cpp
+++ b/src/Access/IAccessStorage.cpp
@@ -257,8 +257,7 @@ std::vector<UUID> IAccessStorage::insert(const std::vector<AccessEntityPtr> & mu
             }
             e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str);
         }
-        e.rethrow();
-        UNREACHABLE();
+        throw;
     }
 }
 
@@ -361,8 +360,7 @@ std::vector<UUID> IAccessStorage::remove(const std::vector<UUID> & ids, bool thr
             }
             e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str);
         }
-        e.rethrow();
-        UNREACHABLE();
+        throw;
     }
 }
 
@@ -458,8 +456,7 @@ std::vector<UUID> IAccessStorage::update(const std::vector<UUID> & ids, const Up
             }
             e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str);
         }
-        e.rethrow();
-        UNREACHABLE();
+        throw;
     }
 }
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index d4fb7afcb78..930b2c6ce73 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -60,14 +60,13 @@ struct GroupArrayTrait
 template <typename Trait>
 constexpr const char * getNameByTrait()
 {
-    if (Trait::last)
+    if constexpr (Trait::last)
         return "groupArrayLast";
-    if (Trait::sampler == Sampler::NONE)
-        return "groupArray";
-    else if (Trait::sampler == Sampler::RNG)
-        return "groupArraySample";
-
-    UNREACHABLE();
+    switch (Trait::sampler)
+    {
+        case Sampler::NONE: return "groupArray";
+        case Sampler::RNG: return "groupArraySample";
+    }
 }
 
 template <typename T>
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
index bed10333af0..a9dd53a75e8 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
@@ -414,7 +414,6 @@ public:
                         break;
                 return (i == events_size) ? base - i : unmatched_idx;
         }
-        UNREACHABLE();
     }
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index 58aaddf357a..2f23187d2ea 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -457,13 +457,12 @@ public:
 
     String getName() const override
     {
-        if constexpr (Type == AggregateFunctionTypeSum)
-            return "sum";
-        else if constexpr (Type == AggregateFunctionTypeSumWithOverflow)
-            return "sumWithOverflow";
-        else if constexpr (Type == AggregateFunctionTypeSumKahan)
-            return "sumKahan";
-        UNREACHABLE();
+        switch (Type)
+        {
+            case AggregateFunctionTypeSum: return "sum";
+            case AggregateFunctionTypeSumWithOverflow: return "sumWithOverflow";
+            case AggregateFunctionTypeSumKahan: return "sumKahan";
+        }
     }
 
     explicit AggregateFunctionSum(const DataTypes & argument_types_)
diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp
index 392ee64dcbf..c87d44a4b95 100644
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@@ -41,7 +41,6 @@ UInt8 getDayOfWeek(const cctz::civil_day & date)
         case cctz::weekday::saturday:   return 6;
         case cctz::weekday::sunday:     return 7;
     }
-    UNREACHABLE();
 }
 
 inline cctz::time_point<cctz::seconds> lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date)
diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp
index 22c7db504c3..1548d5cf9a5 100644
--- a/src/Common/IntervalKind.cpp
+++ b/src/Common/IntervalKind.cpp
@@ -34,8 +34,6 @@ Int64 IntervalKind::toAvgNanoseconds() const
         default:
             return toAvgSeconds() * NANOSECONDS_PER_SECOND;
     }
-
-    UNREACHABLE();
 }
 
 Int32 IntervalKind::toAvgSeconds() const
@@ -54,7 +52,6 @@ Int32 IntervalKind::toAvgSeconds() const
         case IntervalKind::Kind::Quarter: return 7889238; /// Exactly 1/4 of a year.
         case IntervalKind::Kind::Year: return 31556952;   /// The average length of a Gregorian year is equal to 365.2425 days
     }
-    UNREACHABLE();
 }
 
 Float64 IntervalKind::toSeconds() const
@@ -80,7 +77,6 @@ Float64 IntervalKind::toSeconds() const
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not possible to get precise number of seconds in non-precise interval");
     }
-    UNREACHABLE();
 }
 
 bool IntervalKind::isFixedLength() const
@@ -99,7 +95,6 @@ bool IntervalKind::isFixedLength() const
         case IntervalKind::Kind::Quarter:
         case IntervalKind::Kind::Year: return false;
     }
-    UNREACHABLE();
 }
 
 IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
@@ -141,7 +136,6 @@ const char * IntervalKind::toKeyword() const
         case IntervalKind::Kind::Quarter: return "QUARTER";
         case IntervalKind::Kind::Year: return "YEAR";
     }
-    UNREACHABLE();
 }
 
 
@@ -161,7 +155,6 @@ const char * IntervalKind::toLowercasedKeyword() const
         case IntervalKind::Kind::Quarter: return "quarter";
         case IntervalKind::Kind::Year: return "year";
     }
-    UNREACHABLE();
 }
 
 
@@ -192,7 +185,6 @@ const char * IntervalKind::toDateDiffUnit() const
         case IntervalKind::Kind::Year:
             return "year";
     }
-    UNREACHABLE();
 }
 
 
@@ -223,7 +215,6 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
         case IntervalKind::Kind::Year:
             return "toIntervalYear";
     }
-    UNREACHABLE();
 }
 
 
@@ -257,7 +248,6 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
         case IntervalKind::Kind::Year:
             return "toYear";
     }
-    UNREACHABLE();
 }
 
 
diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp
index 49f396c0926..8540c9a9986 100644
--- a/src/Common/TargetSpecific.cpp
+++ b/src/Common/TargetSpecific.cpp
@@ -54,8 +54,6 @@ String toString(TargetArch arch)
         case TargetArch::AMXTILE: return "amxtile";
         case TargetArch::AMXINT8: return "amxint8";
     }
-
-    UNREACHABLE();
 }
 
 }
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 6a63d484cd9..23b41f23bde 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -75,7 +75,6 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider)
         case MetricsProvider::Netlink:
             return "netlink";
     }
-    UNREACHABLE();
 }
 
 bool TasksStatsCounters::checkIfAvailable()
diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index 7d2602bde1e..7cca262baca 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -146,8 +146,6 @@ const char * errorMessage(Error code)
         case Error::ZSESSIONMOVED:            return "Session moved to another server, so operation is ignored";
         case Error::ZNOTREADONLY:             return "State-changing request is passed to read-only server";
     }
-
-    UNREACHABLE();
 }
 
 bool isHardwareError(Error zk_return_code)
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index 7e0653c69f8..f1b5b24e866 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -466,7 +466,6 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
             sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
             return;
     }
-    UNREACHABLE();
 }
 
 void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests()
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index e6e8db4c699..78fdf5c627a 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -163,9 +163,8 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size)
         case sizeof(UInt64):
             return std::numeric_limits<Int64>::max();
         default:
-            assert(false && "only 1, 2, 4 and 8 data sizes are supported");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "only 1, 2, 4 and 8 data sizes are supported");
     }
-    UNREACHABLE();
 }
 
 struct WriteSpec
diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp
index e3642913a7a..a2a06f92283 100644
--- a/src/Coordination/KeeperReconfiguration.cpp
+++ b/src/Coordination/KeeperReconfiguration.cpp
@@ -5,6 +5,12 @@
 
 namespace DB
 {
+
+namespace ErrorCodes
+{
+    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
+}
+
 ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining)
 {
     ClusterUpdateActions out;
@@ -79,7 +85,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA
             new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)});
         }
         else
-            UNREACHABLE();
+            throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected update");
     }
 
     for (const auto & item : cfg->get_servers())
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 8d21ce2ab01..b132c898be6 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -45,6 +45,7 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
     extern const int LOGICAL_ERROR;
     extern const int INVALID_CONFIG_PARAMETER;
+    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
 }
 
 using namespace std::chrono_literals;
@@ -990,7 +991,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
         raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
         return Accepted;
     }
-    UNREACHABLE();
+    throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected action");
 }
 
 ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp
index 73f0703f21e..7207485c799 100644
--- a/src/Core/Field.cpp
+++ b/src/Core/Field.cpp
@@ -146,7 +146,6 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
         case Field::Types::CustomType:
             return Field();
     }
-    UNREACHABLE();
 }
 
 void readBinary(Array & x, ReadBuffer & buf)
diff --git a/src/Core/Field.h b/src/Core/Field.h
index 4424d669c4d..710614cd0a0 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -667,8 +667,6 @@ public:
             case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
             case Types::CustomType: return f(field.template get<CustomType>());
         }
-
-        UNREACHABLE();
     }
 
     String dump() const;
diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp
index a3a28f8091c..cd605c93f0d 100644
--- a/src/DataTypes/Serializations/ISerialization.cpp
+++ b/src/DataTypes/Serializations/ISerialization.cpp
@@ -36,7 +36,6 @@ String ISerialization::kindToString(Kind kind)
         case Kind::SPARSE:
             return "Sparse";
     }
-    UNREACHABLE();
 }
 
 ISerialization::Kind ISerialization::stringToKind(const String & str)
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
index 3433698a162..cb34f7932c3 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
@@ -140,7 +140,6 @@ private:
             case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
                 return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
         }
-        UNREACHABLE();
     }
 
     size_t first_offset = 0;
diff --git a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
index 245578b5d9e..a37f4ce7e65 100644
--- a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
@@ -17,7 +17,6 @@ std::string toString(MetadataStorageTransactionState state)
         case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK:
             return "PARTIALLY_ROLLED_BACK";
     }
-    UNREACHABLE();
 }
 
 }
diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp
index a0c71583a22..e796ad6cdd7 100644
--- a/src/Disks/VolumeJBOD.cpp
+++ b/src/Disks/VolumeJBOD.cpp
@@ -112,7 +112,6 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const
             return disks_by_size.top().disk;
         }
     }
-    UNREACHABLE();
 }
 
 ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
@@ -164,7 +163,6 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
             return reservation;
         }
     }
-    UNREACHABLE();
 }
 
 bool VolumeJBOD::areMergesAvoided() const
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 3edade639df..2fe29d8bebb 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -62,7 +62,6 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule)
         case FormatSettings::EscapingRule::Raw:
             return "Raw";
     }
-    UNREACHABLE();
 }
 
 void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index 99f3a14dfec..233d4058f11 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -149,8 +149,6 @@ struct IntegerRoundingComputation
                 return x;
             }
         }
-
-        UNREACHABLE();
     }
 
     static ALWAYS_INLINE T compute(T x, T scale)
@@ -163,8 +161,6 @@ struct IntegerRoundingComputation
             case ScaleMode::Negative:
                 return computeImpl(x, scale);
         }
-
-        UNREACHABLE();
     }
 
     static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral<T>
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index c4851718da6..57f1243537d 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -381,8 +381,6 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
         case CellType::complexPolygon:
             return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]);
     }
-
-    UNREACHABLE();
 }
 
 
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
index 568e0b9b5d2..766d63eafb0 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
@@ -35,7 +35,6 @@ namespace
             case UserDefinedSQLObjectType::Function:
                 return "function_";
         }
-        UNREACHABLE();
     }
 
     constexpr std::string_view sql_extension = ".sql";
diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp
index b8e1134d422..22913125e99 100644
--- a/src/IO/CompressionMethod.cpp
+++ b/src/IO/CompressionMethod.cpp
@@ -52,7 +52,6 @@ std::string toContentEncodingName(CompressionMethod method)
         case CompressionMethod::None:
             return "";
     }
-    UNREACHABLE();
 }
 
 CompressionMethod chooseHTTPCompressionMethod(const std::string & list)
diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h
index 73e52f2c503..bbbb84dd6dd 100644
--- a/src/IO/HadoopSnappyReadBuffer.h
+++ b/src/IO/HadoopSnappyReadBuffer.h
@@ -88,7 +88,6 @@ public:
             case Status::TOO_LARGE_COMPRESSED_BLOCK:
                 return "TOO_LARGE_COMPRESSED_BLOCK";
         }
-        UNREACHABLE();
     }
 
     explicit HadoopSnappyReadBuffer(
diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp
index 87cfdda5948..8f82f15248f 100644
--- a/src/Interpreters/AggregatedDataVariants.cpp
+++ b/src/Interpreters/AggregatedDataVariants.cpp
@@ -117,8 +117,6 @@ size_t AggregatedDataVariants::size() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
@@ -136,8 +134,6 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 const char * AggregatedDataVariants::getMethodName() const
@@ -155,8 +151,6 @@ const char * AggregatedDataVariants::getMethodName() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 bool AggregatedDataVariants::isTwoLevel() const
@@ -174,8 +168,6 @@ bool AggregatedDataVariants::isTwoLevel() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 bool AggregatedDataVariants::isConvertibleToTwoLevel() const
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 9459029dc4c..61a356fa3c3 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -799,7 +799,6 @@ String FileSegment::stateToString(FileSegment::State state)
         case FileSegment::State::DETACHED:
             return "DETACHED";
     }
-    UNREACHABLE();
 }
 
 bool FileSegment::assertCorrectness() const
diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp
index 4eacbae7a30..d53ff4b0227 100644
--- a/src/Interpreters/ComparisonGraph.cpp
+++ b/src/Interpreters/ComparisonGraph.cpp
@@ -309,7 +309,6 @@ ComparisonGraphCompareResult ComparisonGraph<Node>::pathToCompareResult(Path pat
         case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER;
         case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL;
     }
-    UNREACHABLE();
 }
 
 template <ComparisonGraphNodeType Node>
diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp
index 80fe1c3a8ef..aa489351a98 100644
--- a/src/Interpreters/FilesystemCacheLog.cpp
+++ b/src/Interpreters/FilesystemCacheLog.cpp
@@ -26,7 +26,6 @@ static String typeToString(FilesystemCacheLogElement::CacheType type)
         case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE:
             return "WRITE_THROUGH_CACHE";
     }
-    UNREACHABLE();
 }
 
 ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 3a21c13db5e..75da8bbc3e7 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -705,7 +705,6 @@ namespace
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
         }
-        UNREACHABLE();
     }
 }
 
@@ -2641,8 +2640,6 @@ private:
             default:
                 throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type);
         }
-
-        UNREACHABLE();
     }
 
     template <typename Map>
diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp
index d31ace758c4..13872fbe3f5 100644
--- a/src/Interpreters/InterpreterTransactionControlQuery.cpp
+++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp
@@ -33,7 +33,6 @@ BlockIO InterpreterTransactionControlQuery::execute()
         case ASTTransactionControl::SET_SNAPSHOT:
             return executeSetSnapshot(session_context, tcl.snapshot);
     }
-    UNREACHABLE();
 }
 
 BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context)
diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp
index 64796a013f1..c600d096160 100644
--- a/src/Interpreters/SetVariants.cpp
+++ b/src/Interpreters/SetVariants.cpp
@@ -41,8 +41,6 @@ size_t SetVariantsTemplate<Variant>::getTotalRowCount() const
         APPLY_FOR_SET_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 template <typename Variant>
@@ -57,8 +55,6 @@ size_t SetVariantsTemplate<Variant>::getTotalByteCount() const
         APPLY_FOR_SET_VARIANTS(M)
     #undef M
     }
-
-    UNREACHABLE();
 }
 
 template <typename Variant>
diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h
index 701bde8cebd..eb095b5dbbc 100644
--- a/src/Parsers/ASTExplainQuery.h
+++ b/src/Parsers/ASTExplainQuery.h
@@ -40,8 +40,6 @@ public:
             case TableOverride: return "EXPLAIN TABLE OVERRIDE";
             case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
         }
-
-        UNREACHABLE();
     }
 
     static ExplainKind fromString(const String & str)
diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp
index 9ac6e623803..30717550713 100644
--- a/src/Parsers/Lexer.cpp
+++ b/src/Parsers/Lexer.cpp
@@ -41,8 +41,6 @@ Token quotedString(const char *& pos, const char * const token_begin, const char
             ++pos;
             continue;
         }
-
-        UNREACHABLE();
     }
 }
 
@@ -538,8 +536,6 @@ const char * getTokenName(TokenType type)
 APPLY_FOR_TOKENS(M)
 #undef M
     }
-
-    UNREACHABLE();
 }
 
 
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index 98cbdeaaa4b..6b7f1f5206c 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -657,7 +657,6 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
         }
     }
-    UNREACHABLE();
 }
 
 std::optional<DataTypes> MsgPackSchemaReader::readRowAndGetDataTypes()
diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp
index 8b160153733..5ab5e5277aa 100644
--- a/src/Processors/IProcessor.cpp
+++ b/src/Processors/IProcessor.cpp
@@ -36,8 +36,6 @@ std::string IProcessor::statusToName(Status status)
         case Status::ExpandPipeline:
             return "ExpandPipeline";
     }
-
-    UNREACHABLE();
 }
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index e523a2c243c..2f7927681aa 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1136,8 +1136,6 @@ static void addMergingFinal(
                 return std::make_shared<GraphiteRollupSortedTransform>(header, num_outputs,
                             sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now);
         }
-
-        UNREACHABLE();
     };
 
     pipe.addTransform(get_merging_processor());
@@ -2143,8 +2141,6 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type)
         case ReadFromMergeTree::IndexType::Skip:
             return "Skip";
     }
-
-    UNREACHABLE();
 }
 
 static const char * readTypeToString(ReadFromMergeTree::ReadType type)
@@ -2160,8 +2156,6 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type)
         case ReadFromMergeTree::ReadType::ParallelReplicas:
             return "Parallel";
     }
-
-    UNREACHABLE();
 }
 
 void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp
index d1bd70fd0b2..ac5e144bf4a 100644
--- a/src/Processors/QueryPlan/TotalsHavingStep.cpp
+++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp
@@ -86,8 +86,6 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr
         case TotalsMode::AFTER_HAVING_AUTO:
             return "after_having_auto threshold " + std::to_string(auto_include_threshold);
     }
-
-    UNREACHABLE();
 }
 
 void TotalsHavingStep::describeActions(FormatSettings & settings) const
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index 05fd2a7254f..bb38c3e1dc5 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -67,7 +67,6 @@ static FillColumnDescription::StepFunction getStepFunction(
         FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
 #undef DECLARE_CASE
     }
-    UNREACHABLE();
 }
 
 static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type)
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 5e8ecdca95e..20977b801d3 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -897,8 +897,6 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St
     {
         return std::current_exception();
     }
-
-    UNREACHABLE();
 }
 
 void FinalizingViewsTransform::work()
diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
index 56a4378cf9a..0a69bf1109f 100644
--- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
@@ -93,7 +93,6 @@ String BackgroundJobsAssignee::toString(Type type)
         case Type::Moving:
             return "Moving";
     }
-    UNREACHABLE();
 }
 
 void BackgroundJobsAssignee::start()
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 849240502e4..dbc98404569 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -2957,8 +2957,6 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri
         case ALWAYS_TRUE:
             return "true";
     }
-
-    UNREACHABLE();
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index f448a9a820d..6b6adf56cd2 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1175,8 +1175,6 @@ String MergeTreeData::MergingParams::getModeName() const
         case Graphite:      return "Graphite";
         case VersionedCollapsing: return "VersionedCollapsing";
     }
-
-    UNREACHABLE();
 }
 
 Int64 MergeTreeData::getMaxBlockNumber() const
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index daa163d741c..395d27558f3 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -360,8 +360,6 @@ Block MergeTreeDataWriter::mergeBlock(
                 return std::make_shared<GraphiteRollupSortedAlgorithm>(
                     block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr));
         }
-
-        UNREACHABLE();
     };
 
     auto merging_algorithm = get_merging_algorithm();
diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp
index a9ec1f6c694..4e11787cecf 100644
--- a/src/Storages/WindowView/StorageWindowView.cpp
+++ b/src/Storages/WindowView/StorageWindowView.cpp
@@ -297,7 +297,6 @@ namespace
             CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
         }
-        UNREACHABLE();
     }
 
     class AddingAggregatedChunkInfoTransform : public ISimpleTransform

From e560bd8a1a9c57640af1303a95f0a81d864c75e3 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 17 May 2024 14:37:47 +0000
Subject: [PATCH 167/392] Incorporate review feedback

---
 src/Access/AccessRights.cpp                     |  1 +
 src/AggregateFunctions/AggregateFunctionSum.h   | 12 ++++++------
 src/Compression/CompressionCodecDoubleDelta.cpp |  4 ++--
 src/Coordination/KeeperReconfiguration.cpp      |  4 ++--
 src/Coordination/KeeperServer.cpp               |  2 +-
 src/Core/Field.cpp                              |  1 +
 src/Functions/FunctionsTimeWindow.cpp           |  2 --
 src/Parsers/Lexer.cpp                           |  2 ++
 8 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp
index dd25d3e4ac0..2127f4ada70 100644
--- a/src/Access/AccessRights.cpp
+++ b/src/Access/AccessRights.cpp
@@ -258,6 +258,7 @@ namespace
             case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel();
             case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel();
         }
+        chassert(false);
     }
 }
 
diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index 2f23187d2ea..2ce03c530c2 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -457,12 +457,12 @@ public:
 
     String getName() const override
     {
-        switch (Type)
-        {
-            case AggregateFunctionTypeSum: return "sum";
-            case AggregateFunctionTypeSumWithOverflow: return "sumWithOverflow";
-            case AggregateFunctionTypeSumKahan: return "sumKahan";
-        }
+        if constexpr (Type == AggregateFunctionTypeSum)
+            return "sum";
+        else if constexpr (Type == AggregateFunctionTypeSumWithOverflow)
+            return "sumWithOverflow";
+        else if constexpr (Type == AggregateFunctionTypeSumKahan)
+            return "sumKahan";
     }
 
     explicit AggregateFunctionSum(const DataTypes & argument_types_)
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 78fdf5c627a..443b9d33532 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -142,9 +142,9 @@ namespace ErrorCodes
 {
     extern const int CANNOT_COMPRESS;
     extern const int CANNOT_DECOMPRESS;
-    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
     extern const int ILLEGAL_CODEC_PARAMETER;
+    extern const int LOGICAL_ERROR;
 }
 
 namespace
@@ -163,7 +163,7 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size)
         case sizeof(UInt64):
             return std::numeric_limits<Int64>::max();
         default:
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "only 1, 2, 4 and 8 data sizes are supported");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "only 1, 2, 4 and 8 data sizes are supported");
     }
 }
 
diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp
index a2a06f92283..05211af6704 100644
--- a/src/Coordination/KeeperReconfiguration.cpp
+++ b/src/Coordination/KeeperReconfiguration.cpp
@@ -8,7 +8,7 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
+    extern const int LOGICAL_ERROR;
 }
 
 ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining)
@@ -85,7 +85,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA
             new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)});
         }
         else
-            throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected update");
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected update");
     }
 
     for (const auto & item : cfg->get_servers())
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index b132c898be6..953072c5b0e 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -991,7 +991,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
         raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
         return Accepted;
     }
-    throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected action");
+    chassert(false);
 }
 
 ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp
index 7207485c799..73f0703f21e 100644
--- a/src/Core/Field.cpp
+++ b/src/Core/Field.cpp
@@ -146,6 +146,7 @@ inline Field getBinaryValue(UInt8 type, ReadBuffer & buf)
         case Field::Types::CustomType:
             return Field();
     }
+    UNREACHABLE();
 }
 
 void readBinary(Array & x, ReadBuffer & buf)
diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp
index 1c9f28c9724..f93a885ee65 100644
--- a/src/Functions/FunctionsTimeWindow.cpp
+++ b/src/Functions/FunctionsTimeWindow.cpp
@@ -232,7 +232,6 @@ struct TimeWindowImpl<TUMBLE>
             default:
                 throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
         }
-        UNREACHABLE();
     }
 
     template <typename ToType, IntervalKind::Kind unit>
@@ -422,7 +421,6 @@ struct TimeWindowImpl<HOP>
             default:
                 throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
         }
-        UNREACHABLE();
     }
 
     template <typename ToType, IntervalKind::Kind kind>
diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp
index 30717550713..d669c8a4690 100644
--- a/src/Parsers/Lexer.cpp
+++ b/src/Parsers/Lexer.cpp
@@ -41,6 +41,8 @@ Token quotedString(const char *& pos, const char * const token_begin, const char
             ++pos;
             continue;
         }
+
+        chassert(false);
     }
 }
 

From f266bdb88e1891e484add0431e9e5ca56c963635 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 17 May 2024 14:44:17 +0000
Subject: [PATCH 168/392] Fix more places

---
 src/Functions/FunctionsRound.h                              | 4 ----
 src/Interpreters/HashJoin.h                                 | 6 ------
 .../MergeTree/PartMovesBetweenShardsOrchestrator.cpp        | 2 --
 src/Storages/WindowView/StorageWindowView.cpp               | 2 --
 4 files changed, 14 deletions(-)

diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index 233d4058f11..dde57e8320d 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -243,8 +243,6 @@ inline float roundWithMode(float x, RoundingMode mode)
         case RoundingMode::Ceil: return ceilf(x);
         case RoundingMode::Trunc: return truncf(x);
     }
-
-    UNREACHABLE();
 }
 
 inline double roundWithMode(double x, RoundingMode mode)
@@ -256,8 +254,6 @@ inline double roundWithMode(double x, RoundingMode mode)
         case RoundingMode::Ceil: return ceil(x);
         case RoundingMode::Trunc: return trunc(x);
     }
-
-    UNREACHABLE();
 }
 
 template <typename T>
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index 86db8943926..a0996556f9a 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -322,8 +322,6 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
-
-            UNREACHABLE();
         }
 
         size_t getTotalByteCountImpl(Type which) const
@@ -338,8 +336,6 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
-
-            UNREACHABLE();
         }
 
         size_t getBufferSizeInCells(Type which) const
@@ -354,8 +350,6 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
-
-            UNREACHABLE();
         }
 /// NOLINTEND(bugprone-macro-parentheses)
     };
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
index 78fcfabb704..4228d7b70b6 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
@@ -616,8 +616,6 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
             }
         }
     }
-
-    UNREACHABLE();
 }
 
 void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk)
diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp
index 4e11787cecf..8bca1c97aad 100644
--- a/src/Storages/WindowView/StorageWindowView.cpp
+++ b/src/Storages/WindowView/StorageWindowView.cpp
@@ -919,7 +919,6 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec)
         CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
     }
-    UNREACHABLE();
 }
 
 UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
@@ -947,7 +946,6 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
         CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
     }
-    UNREACHABLE();
 }
 
 void StorageWindowView::addFireSignal(std::set<UInt32> & signals)

From d964b4b78667a1437dd74836432828c5dda1be7e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Fri, 17 May 2024 16:50:38 +0200
Subject: [PATCH 169/392] Finish archives related changes

---
 src/Disks/ObjectStorages/IObjectStorage.h     |  6 +++
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 11 ++++-
 .../ObjectStorage/ReadBufferIterator.cpp      | 40 ++++++++++++-------
 .../ObjectStorage/StorageObjectStorage.cpp    |  7 +++-
 .../StorageObjectStorageCluster.cpp           |  2 +-
 .../StorageObjectStorageSource.cpp            | 37 +++++++++--------
 .../StorageObjectStorageSource.h              | 19 ++++++++-
 src/Storages/S3Queue/S3QueueSource.h          |  2 +-
 8 files changed, 88 insertions(+), 36 deletions(-)

diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 43c7cf19adf..5724ae8929c 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -37,6 +37,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NOT_IMPLEMENTED;
+    extern const int LOGICAL_ERROR;
 }
 
 class ReadBufferFromFileBase;
@@ -64,6 +65,11 @@ struct RelativePathWithMetadata
     {}
 
     virtual ~RelativePathWithMetadata() = default;
+
+    virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
+    virtual std::string getPath() const { return relative_path; }
+    virtual bool isArchive() const { return false; }
+    virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
 };
 
 struct ObjectKeyWithMetadata
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index c24874d0a94..983bb1834b8 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -457,7 +457,16 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
 ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
 {
     auto settings_ptr = s3_settings.get();
-    auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
+    S3::ObjectInfo object_info;
+    try
+    {
+        object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
+    }
+    catch (DB::Exception & e)
+    {
+        e.addMessage("while reading " + path);
+        throw;
+    }
 
     ObjectMetadata result;
     result.size_bytes = object_info.size;
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 61575b0115a..e065de16e55 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -50,7 +50,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const
         std::back_inserter(sources),
         [&](const auto & elem)
         {
-            return std::filesystem::path(configuration->getDataSourceDescription()) / elem->relative_path;
+            return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath();
         });
     return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext());
 }
@@ -67,8 +67,9 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
         const auto & object_info = (*it);
         auto get_last_mod_time = [&] -> std::optional<time_t>
         {
+            const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
             if (!object_info->metadata)
-                object_info->metadata = object_storage->tryGetObjectMetadata(object_info->relative_path);
+                object_info->metadata = object_storage->tryGetObjectMetadata(path);
 
             return object_info->metadata
                 ? std::optional<time_t>(object_info->metadata->last_modified.epochTime())
@@ -77,7 +78,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
 
         if (format)
         {
-            auto cache_key = getKeyForSchemaCache(object_info->relative_path, *format);
+            auto cache_key = getKeyForSchemaCache(object_info->getPath(), *format);
             if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
                 return columns;
         }
@@ -88,7 +89,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
             /// If we have such entry for some format, we can use this format to read the file.
             for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
             {
-                auto cache_key = getKeyForSchemaCache(object_info->relative_path, format_name);
+                auto cache_key = getKeyForSchemaCache(object_info->getPath(), format_name);
                 if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
                 {
                     /// Now format is known. It should be the same for all files.
@@ -105,7 +106,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
 void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows)
 {
     if (query_settings.schema_inference_use_cache)
-        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->relative_path, *format), num_rows);
+        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->getPath(), *format), num_rows);
 }
 
 void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
@@ -113,7 +114,7 @@ void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
     if (query_settings.schema_inference_use_cache
         && query_settings.schema_inference_mode == SchemaInferenceMode::UNION)
     {
-        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->relative_path, *format), columns);
+        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->getPath(), *format), columns);
     }
 }
 
@@ -134,7 +135,7 @@ void ReadBufferIterator::setFormatName(const String & format_name)
 String ReadBufferIterator::getLastFileName() const
 {
     if (current_object_info)
-        return current_object_info->relative_path;
+        return current_object_info->getFileName();
     else
         return "";
 }
@@ -142,9 +143,13 @@ String ReadBufferIterator::getLastFileName() const
 std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
 {
     auto context = getContext();
-    auto impl = object_storage->readObject(StoredObject(current_object_info->relative_path), context->getReadSettings());
-    const auto compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method);
+
+    const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
+    auto impl = object_storage->readObject(StoredObject(), context->getReadSettings());
+
+    const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
     const auto zstd_window_log_max = static_cast<int>(context->getSettingsRef().zstd_window_log_max);
+
     return wrapReadBufferWithCompressionMethod(std::move(impl), compression_method, zstd_window_log_max);
 }
 
@@ -158,7 +163,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         {
             for (const auto & object_info : read_keys)
             {
-                if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->relative_path))
+                if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
                 {
                     format = format_from_file_name;
                     break;
@@ -170,7 +175,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         if (first && getContext()->getSettingsRef().schema_inference_mode == SchemaInferenceMode::DEFAULT)
         {
             if (auto cached_columns = tryGetColumnsFromCache(read_keys.begin(), read_keys.end()))
+            {
                 return {nullptr, cached_columns, format};
+            }
         }
     }
 
@@ -178,7 +185,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
     {
         current_object_info = file_iterator->next(0);
 
-        if (!current_object_info || current_object_info->relative_path.empty())
+        if (!current_object_info)
         {
             if (first)
             {
@@ -203,6 +210,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             return {nullptr, std::nullopt, format};
         }
 
+        const auto filename = current_object_info->getFileName();
+        chassert(!filename.empty());
+
         /// file iterator could get new keys after new iteration
         if (read_keys.size() > prev_read_keys_size)
         {
@@ -211,7 +221,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             {
                 for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
                 {
-                    if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->relative_path))
+                    if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
                     {
                         format = format_from_file_name;
                         break;
@@ -250,15 +260,15 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive;
         if (auto object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
         {
-            compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method);
+            compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
             auto & archive_reader = object_info_in_archive->archive_reader;
             read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
         }
         else
         {
-            compression_method = chooseCompressionMethod(current_object_info->relative_path, configuration->compression_method);
+            compression_method = chooseCompressionMethod(filename, configuration->compression_method);
             read_buf = object_storage->readObject(
-                StoredObject(current_object_info->relative_path),
+                StoredObject(current_object_info->getPath()),
                 getContext()->getReadSettings(),
                 {},
                 current_object_info->metadata->size_bytes);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 73e3d861cff..c45752c10f5 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -403,7 +403,12 @@ void StorageObjectStorage::Configuration::initialize(
         configuration.fromAST(engine_args, local_context, with_table_structure);
 
     if (configuration.format == "auto")
-        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(configuration.getPath()).value_or("auto");
+    {
+        configuration.format = FormatFactory::instance().tryGetFormatFromFileName(
+            configuration.isArchive()
+            ? configuration.getPathInArchive()
+            : configuration.getPath()).value_or("auto");
+    }
     else
         FormatFactory::instance().checkFormatName(configuration.format);
 
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
index a43d9da0fa3..78f568d8ae2 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@@ -90,7 +90,7 @@ RemoteQueryExecutor::Extension StorageObjectStorageCluster::getTaskIteratorExten
     {
         auto object_info = iterator->next(0);
         if (object_info)
-            return object_info->relative_path;
+            return object_info->getPath();
         else
             return "";
     });
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 56905e6c29b..d3b67876224 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -100,6 +100,7 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
                         "Expression can not have wildcards inside {} name", configuration->getNamespaceType());
 
     auto settings = configuration->getQuerySettings(local_context);
+    const bool is_archive = configuration->isArchive();
 
     std::unique_ptr<IIterator> iterator;
     if (configuration->isPathWithGlobs())
@@ -107,7 +108,7 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
         /// Iterate through disclosed globs and make a source for each file
         iterator = std::make_unique<GlobIterator>(
             object_storage, configuration, predicate, virtual_columns,
-            local_context, read_keys, settings.list_object_keys_size,
+            local_context, is_archive ? nullptr : read_keys, settings.list_object_keys_size,
             settings.throw_on_zero_files_match, file_progress_callback);
     }
     else
@@ -126,11 +127,11 @@ std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSourc
         }
 
         iterator = std::make_unique<KeysIterator>(
-            object_storage, copy_configuration, virtual_columns, read_keys,
+            object_storage, copy_configuration, virtual_columns, is_archive ? nullptr : read_keys,
             settings.ignore_non_existent_file, file_progress_callback);
     }
 
-    if (configuration->isArchive())
+    if (is_archive)
     {
         return std::make_shared<ArchiveIterator>(object_storage, configuration, std::move(iterator), local_context, read_keys);
     }
@@ -175,12 +176,13 @@ Chunk StorageObjectStorageSource::generate()
             progress(num_rows, chunk_size ? chunk_size : chunk.bytes());
 
             const auto & object_info = reader.getObjectInfo();
+            const auto & filename = object_info.getFileName();
             chassert(object_info.metadata);
             VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
                 chunk,
                 read_from_format_info.requested_virtual_columns,
                 fs::path(configuration->getNamespace()) / reader.getRelativePath(),
-                object_info.metadata->size_bytes);
+                object_info.metadata->size_bytes, &filename);
 
             return chunk;
         }
@@ -219,7 +221,7 @@ void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t n
 std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info)
 {
     const auto cache_key = getKeyForSchemaCache(
-        fs::path(configuration->getDataSourceDescription()) / object_info->relative_path,
+        fs::path(configuration->getDataSourceDescription()) / object_info->getPath(),
         configuration->format,
         format_settings,
         getContext());
@@ -242,11 +244,14 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     {
         object_info = file_iterator->next(processor);
 
-        if (!object_info || object_info->relative_path.empty())
+        if (!object_info || object_info->getFileName().empty())
             return {};
 
         if (!object_info->metadata)
-            object_info->metadata = object_storage->getObjectMetadata(object_info->relative_path);
+        {
+            const auto & path = object_info->isArchive() ? object_info->getPathToArchive() : object_info->getPath();
+            object_info->metadata = object_storage->getObjectMetadata(path);
+        }
     }
     while (query_settings.skip_empty_files && object_info->metadata->size_bytes == 0);
 
@@ -282,7 +287,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         }
         else
         {
-            compression_method = chooseCompressionMethod(object_info->relative_path, configuration->compression_method);
+            compression_method = chooseCompressionMethod(object_info->getFileName(), configuration->compression_method);
             read_buf = createReadBuffer(*object_info);
         }
 
@@ -355,7 +360,7 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const O
         LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
 
         auto async_reader = object_storage->readObjects(
-            StoredObjects{StoredObject{object_info.relative_path, /* local_path */ "", object_size}}, read_settings);
+            StoredObjects{StoredObject{object_info.getPath(), /* local_path */ "", object_size}}, read_settings);
 
         async_reader->setReadUntilEnd();
         if (read_settings.remote_fs_prefetch)
@@ -366,7 +371,7 @@ std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(const O
     else
     {
         /// FIXME: this is inconsistent that readObject always reads synchronously ignoring read_method setting.
-        return object_storage->readObject(StoredObject(object_info.relative_path, "", object_size), read_settings);
+        return object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings);
     }
 }
 
@@ -381,7 +386,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::IIterator::next(
 
     if (object_info)
     {
-        LOG_TEST(logger, "Next key: {}", object_info->relative_path);
+        LOG_TEST(logger, "Next key: {}", object_info->getFileName());
     }
 
     return object_info;
@@ -470,7 +475,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
             new_batch = std::move(result.value());
             for (auto it = new_batch.begin(); it != new_batch.end();)
             {
-                if (!recursive && !re2::RE2::FullMatch((*it)->relative_path, *matcher))
+                if (!recursive && !re2::RE2::FullMatch((*it)->getPath(), *matcher))
                     it = new_batch.erase(it);
                 else
                     ++it;
@@ -487,7 +492,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
             for (const auto & object_info : new_batch)
             {
                 chassert(object_info);
-                paths.push_back(fs::path(configuration->getNamespace()) / object_info->relative_path);
+                paths.push_back(fs::path(configuration->getNamespace()) / object_info->getPath());
             }
 
             VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
@@ -675,10 +680,10 @@ StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr o
 {
     const auto size = object_info->metadata->size_bytes;
     return DB::createArchiveReader(
-        /* path_to_archive */object_info->relative_path,
+        /* path_to_archive */object_info->getPath(),
         /* archive_read_function */[=, this]()
         {
-            StoredObject stored_object(object_info->relative_path, "", size);
+            StoredObject stored_object(object_info->getPath(), "", size);
             return object_storage->readObject(stored_object, getContext()->getReadSettings());
         },
         /* archive_size */size);
@@ -720,7 +725,7 @@ StorageObjectStorageSource::ArchiveIterator::nextImpl(size_t processor)
                 return {};
 
             if (!archive_object->metadata)
-                archive_object->metadata = object_storage->getObjectMetadata(archive_object->relative_path);
+                archive_object->metadata = object_storage->getObjectMetadata(archive_object->getPath());
 
             archive_reader = createArchiveReader(archive_object);
             if (!archive_reader->fileExists(path_in_archive))
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 664aad56928..fb0ad3e32f1 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -92,7 +92,7 @@ protected:
         PullingPipelineExecutor * operator->() { return reader.get(); }
         const PullingPipelineExecutor * operator->() const { return reader.get(); }
 
-        const String & getRelativePath() const { return object_info->relative_path; }
+        std::string getRelativePath() const { return object_info->getPath(); }
         const ObjectInfo & getObjectInfo() const { return *object_info; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
 
@@ -251,6 +251,23 @@ public:
             const std::string & path_in_archive_,
             std::shared_ptr<IArchiveReader> archive_reader_);
 
+        std::string getFileName() const override
+        {
+            return path_in_archive;
+        }
+
+        std::string getPath() const override
+        {
+            return archive_object->getPath() + "::" + path_in_archive;
+        }
+
+        std::string getPathToArchive() const override
+        {
+            return archive_object->getPath();
+        }
+
+        bool isArchive() const override { return true; }
+
         const ObjectInfoPtr archive_object;
         const std::string path_in_archive;
         const std::shared_ptr<IArchiveReader> archive_reader;
diff --git a/src/Storages/S3Queue/S3QueueSource.h b/src/Storages/S3Queue/S3QueueSource.h
index fdeed8d46d2..663577e055b 100644
--- a/src/Storages/S3Queue/S3QueueSource.h
+++ b/src/Storages/S3Queue/S3QueueSource.h
@@ -29,7 +29,7 @@ public:
     using FileStatusPtr = S3QueueFilesMetadata::FileStatusPtr;
     using ReaderHolder = StorageObjectStorageSource::ReaderHolder;
     using Metadata = S3QueueFilesMetadata;
-    using ObjectInfo = RelativePathWithMetadata;
+    using ObjectInfo = StorageObjectStorageSource::ObjectInfo;
     using ObjectInfoPtr = std::shared_ptr<ObjectInfo>;
     using ObjectInfos = std::vector<ObjectInfoPtr>;
 

From 4909c3ea2393c66226c23cd03847f1c5e5b05ff7 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Fri, 17 May 2024 18:24:21 +0200
Subject: [PATCH 170/392] Cleanups

---
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 11 ------
 .../MergeTree/IMergeTreeDataPartWriter.cpp    |  7 ----
 .../MergeTree/IMergeTreeDataPartWriter.h      | 39 ++++++-------------
 .../MergeTree/IMergedBlockOutputStream.cpp    |  8 +---
 .../MergeTree/IMergedBlockOutputStream.h      | 10 ++---
 src/Storages/MergeTree/MergeTask.cpp          |  2 +-
 .../MergeTree/MergeTreeDataPartCompact.cpp    | 29 +++++++-------
 .../MergeTree/MergeTreeDataPartCompact.h      |  9 -----
 .../MergeTree/MergeTreeDataPartWide.cpp       | 15 ++++---
 .../MergeTree/MergeTreeDataPartWide.h         |  9 -----
 .../MergeTreeDataPartWriterCompact.cpp        | 18 ++++-----
 .../MergeTreeDataPartWriterCompact.h          |  6 +--
 .../MergeTreeDataPartWriterOnDisk.cpp         |  4 +-
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 13 ++-----
 .../MergeTree/MergeTreeDataPartWriterWide.cpp | 29 ++++++--------
 .../MergeTree/MergeTreeDataPartWriterWide.h   |  6 +--
 src/Storages/MergeTree/MergeTreeIOSettings.h  |  2 +-
 src/Storages/MergeTree/MergeTreePartition.cpp |  5 +--
 src/Storages/MergeTree/MergeTreePartition.h   |  2 +-
 .../MergeTree/MergedBlockOutputStream.cpp     |  1 +
 .../MergedColumnOnlyOutputStream.cpp          |  9 ++---
 src/Storages/MergeTree/MutateTask.cpp         |  1 +
 22 files changed, 76 insertions(+), 159 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 4ec5b3f5f8a..091a7ceb5bd 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -43,7 +43,6 @@ class IReservation;
 using ReservationPtr = std::unique_ptr<IReservation>;
 
 class IMergeTreeReader;
-class IMergeTreeDataPartWriter;
 class MarkCache;
 class UncompressedCache;
 class MergeTreeTransaction;
@@ -74,7 +73,6 @@ public:
     using VirtualFields = std::unordered_map<String, Field>;
 
     using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
-//    using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
 
     using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
     using NameToNumber = std::unordered_map<std::string, size_t>;
@@ -106,15 +104,6 @@ public:
         const ValueSizeMap & avg_value_size_hints_,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
 
-////    virtual MergeTreeWriterPtr getWriter(
-////        const NamesAndTypesList & columns_list,
-////        const StorageMetadataPtr & metadata_snapshot,
-////        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-////        const Statistics & stats_to_recalc_,
-////        const CompressionCodecPtr & default_codec_,
-////        const MergeTreeWriterSettings & writer_settings,
-////        const MergeTreeIndexGranularity & computed_index_granularity) = 0;
-
 // TODO: remove?
     virtual bool isStoredOnDisk() const = 0;
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index b46fbc5fc9e..e01572715d6 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -1,5 +1,4 @@
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
-#include "Storages/MergeTree/MergeTreeSettings.h"
 
 namespace DB
 {
@@ -46,12 +45,10 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per
 }
 
 IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
-//    const MergeTreeMutableDataPartPtr & data_part_,
     const String & data_part_name_,
     const SerializationByName & serializations_,
     MutableDataPartStoragePtr data_part_storage_,
     const MergeTreeIndexGranularityInfo & index_granularity_info_,
-
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
@@ -61,7 +58,6 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     , serializations(serializations_)
     , data_part_storage(data_part_storage_)
     , index_granularity_info(index_granularity_info_)
-
     , storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
     , columns_list(columns_list_)
@@ -117,7 +113,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -134,7 +129,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -153,7 +147,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 6854668a01e..3245a23339b 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -1,14 +1,12 @@
 #pragma once
 
-#include <IO/WriteBufferFromFile.h>
-#include <IO/WriteBufferFromFileBase.h>
-#include <Compression/CompressedWriteBuffer.h>
-#include <IO/HashingWriteBuffer.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-#include <Disks/IDisk.h>
-#include "Storages/MergeTree/MergeTreeDataPartType.h"
-#include "Storages/MergeTree/MergeTreeSettings.h"
+#include <Storages/MergeTree/MergeTreeDataPartType.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>
+#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
+#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
+#include <Storages/MergeTree/MergeTreeIndices.h>
+#include <Storages/MergeTree/IDataPartStorage.h>
+#include <Storages/Statistics/Statistics.h>
 
 
 namespace DB
@@ -24,15 +22,11 @@ class IMergeTreeDataPartWriter : private boost::noncopyable
 {
 public:
     IMergeTreeDataPartWriter(
-//        const MergeTreeMutableDataPartPtr & data_part_,
-
         const String & data_part_name_,
         const SerializationByName & serializations_,
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
-
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list_,
         const StorageMetadataPtr & metadata_snapshot_,
         const MergeTreeWriterSettings & settings_,
@@ -42,7 +36,7 @@ public:
 
     virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0;
 
-    virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0;
+    virtual void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) = 0;
 
     virtual void finish(bool sync) = 0;
 
@@ -56,21 +50,12 @@ protected:
 
     IDataPartStorage & getDataPartStorage() { return *data_part_storage; }
 
-
-//    const MergeTreeMutableDataPartPtr data_part;  // TODO: remove
-
     /// Serializations for every columns and subcolumns by their names.
-    String data_part_name;
-    SerializationByName serializations;
+    const String data_part_name;
+    const SerializationByName serializations;
     MutableDataPartStoragePtr data_part_storage;
-    MergeTreeIndexGranularityInfo index_granularity_info;
-
-
-//    const MergeTreeData & storage; // TODO: remove
-
+    const MergeTreeIndexGranularityInfo index_granularity_info;
     const MergeTreeSettingsPtr storage_settings;
-
-
     const StorageMetadataPtr metadata_snapshot;
     const NamesAndTypesList columns_list;
     const MergeTreeWriterSettings settings;
@@ -90,7 +75,6 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -100,5 +84,4 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const MergeTreeWriterSettings & writer_settings,
         const MergeTreeIndexGranularity & computed_index_granularity);
 
-
 }
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
index f99adf7c4db..89c813ab233 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp
@@ -2,30 +2,26 @@
 #include <Storages/MergeTree/MergeTreeIOSettings.h>
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
 #include <Common/logger_useful.h>
-#include "Storages/MergeTree/IDataPartStorage.h"
-#include "Storages/StorageSet.h"
 
 namespace DB
 {
 
 IMergedBlockOutputStream::IMergedBlockOutputStream(
-//    const MergeTreeMutableDataPartPtr & data_part,
     const MergeTreeSettingsPtr & storage_settings_,
     MutableDataPartStoragePtr data_part_storage_,
     const StorageMetadataPtr & metadata_snapshot_,
     const NamesAndTypesList & columns_list,
     bool reset_columns_)
-    //: storage(data_part->storage)
     : storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
-    , data_part_storage(data_part_storage_)//data_part->getDataPartStoragePtr())
+    , data_part_storage(data_part_storage_)
     , reset_columns(reset_columns_)
 {
     if (reset_columns)
     {
         SerializationInfo::Settings info_settings =
         {
-            .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,//storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
+            .ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,
             .choose_kind = false,
         };
 
diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h
index b6f279e6d58..a9b058418ea 100644
--- a/src/Storages/MergeTree/IMergedBlockOutputStream.h
+++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include "Storages/MergeTree/IDataPartStorage.h"
-#include "Storages/MergeTree/MergeTreeSettings.h"
+#include <Storages/MergeTree/IDataPartStorage.h>
+#include <Storages/MergeTree/MergeTreeSettings.h>
 #include <Storages/MergeTree/MergeTreeIndexGranularity.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
-#include "Common/Logger.h"
+#include <Common/Logger.h>
 
 namespace DB
 {
@@ -15,7 +15,6 @@ class IMergedBlockOutputStream
 {
 public:
     IMergedBlockOutputStream(
-//        const MergeTreeMutableDataPartPtr & data_part,
         const MergeTreeSettingsPtr & storage_settings_,
         MutableDataPartStoragePtr data_part_storage_,
         const StorageMetadataPtr & metadata_snapshot_,
@@ -43,11 +42,8 @@ protected:
         SerializationInfoByName & serialization_infos,
         MergeTreeData::DataPart::Checksums & checksums);
 
-//    const MergeTreeData & storage; // TODO: remove
-////
     MergeTreeSettingsPtr storage_settings;
     LoggerPtr log;
-////
 
     StorageMetadataPtr metadata_snapshot;
 
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 1b5ad0d81a7..2ce74bde1d5 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -9,7 +9,7 @@
 #include <Common/ActionBlocker.h>
 #include <Processors/Transforms/CheckSortedTransform.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
-
+#include <Compression/CompressedWriteBuffer.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/Serializations/SerializationInfo.h>
 #include <IO/IReadableWriteBuffer.h>
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index eebbe3110c0..373ad6c23ea 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -48,21 +48,20 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
 }
 
 MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
-        const String & data_part_name_,
-        const String & logger_name_,
-        const SerializationByName & serializations_,
-        MutableDataPartStoragePtr data_part_storage_,
-        const MergeTreeIndexGranularityInfo & index_granularity_info_,
-        const MergeTreeSettingsPtr & storage_settings_,
-
-        const NamesAndTypesList & columns_list,
-        const StorageMetadataPtr & metadata_snapshot,
-        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-        const Statistics & stats_to_recalc_,
-        const String & marks_file_extension_,
-        const CompressionCodecPtr & default_codec_,
-        const MergeTreeWriterSettings & writer_settings,
-        const MergeTreeIndexGranularity & computed_index_granularity)
+    const String & data_part_name_,
+    const String & logger_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+    const MergeTreeSettingsPtr & storage_settings_,
+    const NamesAndTypesList & columns_list,
+    const StorageMetadataPtr & metadata_snapshot,
+    const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
+    const Statistics & stats_to_recalc_,
+    const String & marks_file_extension_,
+    const CompressionCodecPtr & default_codec_,
+    const MergeTreeWriterSettings & writer_settings,
+    const MergeTreeIndexGranularity & computed_index_granularity)
 {
 ////// TODO: fix the order of columns
 ////
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index 5a57d778b7d..ca88edba7b3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -40,15 +40,6 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-//    MergeTreeWriterPtr getWriter(
-//        const NamesAndTypesList & columns_list,
-//        const StorageMetadataPtr & metadata_snapshot,
-//        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-//        const Statistics & stats_to_recalc_,
-//        const CompressionCodecPtr & default_codec_,
-//        const MergeTreeWriterSettings & writer_settings,
-//        const MergeTreeIndexGranularity & computed_index_granularity) override;
-
 // TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index c99cff258e0..34a3f30c4ba 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -54,18 +54,17 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
 }
 
 MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
-        const String & data_part_name_,
-        const String & logger_name_,
-        const SerializationByName & serializations_,
-        MutableDataPartStoragePtr data_part_storage_,
-        const MergeTreeIndexGranularityInfo & index_granularity_info_,
-        const MergeTreeSettingsPtr & storage_settings_,
-
+    const String & data_part_name_,
+    const String & logger_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+    const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
-        const String & marks_file_extension_,
+    const String & marks_file_extension_,
     const CompressionCodecPtr & default_codec_,
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index 45d0fbbebec..e3cb3f04335 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -35,15 +35,6 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-//    MergeTreeWriterPtr getWriter(
-//        const NamesAndTypesList & columns_list,
-//        const StorageMetadataPtr & metadata_snapshot,
-//        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-//        const Statistics & stats_to_recalc_,
-//        const CompressionCodecPtr & default_codec_,
-//        const MergeTreeWriterSettings & writer_settings,
-//        const MergeTreeIndexGranularity & computed_index_granularity) override;
-
 // TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 6e8ea1a915b..3f08d8eea21 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -10,14 +10,12 @@ namespace ErrorCodes
 }
 
 MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
-//    const MergeTreeMutableDataPartPtr & data_part_,
-        const String & data_part_name_,
-        const String & logger_name_,
-        const SerializationByName & serializations_,
-        MutableDataPartStoragePtr data_part_storage_,
-        const MergeTreeIndexGranularityInfo & index_granularity_info_,
-        const MergeTreeSettingsPtr & storage_settings_,
-
+    const String & data_part_name_,
+    const String & logger_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+    const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@@ -250,7 +248,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
     }
 }
 
-void MergeTreeDataPartWriterCompact::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums)
+void MergeTreeDataPartWriterCompact::fillDataChecksums(MergeTreeDataPartChecksums & checksums)
 {
     if (columns_buffer.size() != 0)
     {
@@ -420,7 +418,7 @@ size_t MergeTreeDataPartWriterCompact::ColumnsBuffer::size() const
     return accumulated_columns.at(0)->size();
 }
 
-void MergeTreeDataPartWriterCompact::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & /*checksums_to_remove*/)
+void MergeTreeDataPartWriterCompact::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & /*checksums_to_remove*/)
 {
     // If we don't have anything to write, skip finalization.
     if (!columns_list.empty())
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 3bec4c7e988..03804ff4966 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -11,14 +11,12 @@ class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk
 {
 public:
     MergeTreeDataPartWriterCompact(
-//        const MergeTreeMutableDataPartPtr & data_part,
         const String & data_part_name_,
         const String & logger_name_,
         const SerializationByName & serializations_,
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -30,12 +28,12 @@ public:
 
     void write(const Block & block, const IColumn::Permutation * permutation) override;
 
-    void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) override;
+    void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) override;
     void finish(bool sync) override;
 
 private:
     /// Finish serialization of the data. Flush rows in buffer to disk, compute checksums.
-    void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums);
+    void fillDataChecksums(MergeTreeDataPartChecksums & checksums);
     void finishDataSerialization(bool sync);
 
     void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 13892c17577..25eb83a82c0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -146,7 +146,6 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     MutableDataPartStoragePtr data_part_storage_,
     const MergeTreeIndexGranularityInfo & index_granularity_info_,
     const MergeTreeSettingsPtr & storage_settings_,
-
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const MergeTreeIndices & indices_to_recalc_,
@@ -231,7 +230,6 @@ static size_t computeIndexGranularityImpl(
 
 size_t MergeTreeDataPartWriterOnDisk::computeIndexGranularity(const Block & block) const
 {
-//    const auto storage_settings = storage.getSettings();
     return computeIndexGranularityImpl(
             block,
             storage_settings->index_granularity_bytes,
@@ -293,7 +291,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
         GinIndexStorePtr store = nullptr;
         if (typeid_cast<const MergeTreeIndexFullText *>(&*skip_index) != nullptr)
         {
-            store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, /*storage.getSettings()*/storage_settings->max_digestion_size_per_segment);
+            store = std::make_shared<GinIndexStore>(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment);
             gin_index_stores[stream_name] = store;
         }
         skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings));
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index 39f33217b57..e17724fa1d0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -5,9 +5,6 @@
 #include <IO/WriteBufferFromFileBase.h>
 #include <Compression/CompressedWriteBuffer.h>
 #include <IO/HashingWriteBuffer.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-#include <Disks/IDisk.h>
 #include <Parsers/ExpressionElementParsers.h>
 #include <Parsers/parseQuery.h>
 #include <Storages/Statistics/Statistics.h>
@@ -97,21 +94,19 @@ public:
 
         void sync() const;
 
-        void addToChecksums(IMergeTreeDataPart::Checksums & checksums);
+        void addToChecksums(MergeTreeDataPartChecksums & checksums);
     };
 
     using StreamPtr = std::unique_ptr<Stream<false>>;
     using StatisticStreamPtr = std::unique_ptr<Stream<true>>;
 
     MergeTreeDataPartWriterOnDisk(
-//        const MergeTreeMutableDataPartPtr & data_part_,
         const String & data_part_name_,
         const String & logger_name_,
         const SerializationByName & serializations_,
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -140,13 +135,13 @@ protected:
     void calculateAndSerializeStatistics(const Block & stats_block);
 
     /// Finishes primary index serialization: write final primary index row (if required) and compute checksums
-    void fillPrimaryIndexChecksums(MergeTreeData::DataPart::Checksums & checksums);
+    void fillPrimaryIndexChecksums(MergeTreeDataPartChecksums & checksums);
     void finishPrimaryIndexSerialization(bool sync);
     /// Finishes skip indices serialization: write all accumulated data to disk and compute checksums
-    void fillSkipIndicesChecksums(MergeTreeData::DataPart::Checksums & checksums);
+    void fillSkipIndicesChecksums(MergeTreeDataPartChecksums & checksums);
     void finishSkipIndicesSerialization(bool sync);
 
-    void fillStatisticsChecksums(MergeTreeData::DataPart::Checksums & checksums);
+    void fillStatisticsChecksums(MergeTreeDataPartChecksums & checksums);
     void finishStatisticsSerialization(bool sync);
 
     /// Get global number of the current which we are writing (or going to start to write)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 713dee87fa8..a57bf7d2037 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -76,14 +76,12 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
 }
 
 MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
-//    const MergeTreeMutableDataPartPtr & data_part_,
-        const String & data_part_name_,
-        const String & logger_name_,
-        const SerializationByName & serializations_,
-        MutableDataPartStoragePtr data_part_storage_,
-        const MergeTreeIndexGranularityInfo & index_granularity_info_,
-        const MergeTreeSettingsPtr & storage_settings_,
-
+    const String & data_part_name_,
+    const String & logger_name_,
+    const SerializationByName & serializations_,
+    MutableDataPartStoragePtr data_part_storage_,
+    const MergeTreeIndexGranularityInfo & index_granularity_info_,
+    const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
@@ -114,7 +112,6 @@ void MergeTreeDataPartWriterWide::addStreams(
     {
         assert(!substream_path.empty());
 
-//        auto storage_settings = storage.getSettings();
         auto full_stream_name = ISerialization::getFileNameForStream(column, substream_path);
 
         String stream_name;
@@ -416,11 +413,10 @@ void MergeTreeDataPartWriterWide::writeColumn(
         serialization->serializeBinaryBulkStatePrefix(column, serialize_settings, it->second);
     }
 
-//    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
     serialize_settings.getter = createStreamGetter(name_and_type, offset_columns);
-    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;
 
     for (const auto & granule : granules)
     {
@@ -603,12 +599,11 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
 
 }
 
-void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove)
+void MergeTreeDataPartWriterWide::fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove)
 {
-//    const auto & global_settings = storage.getContext()->getSettingsRef();
     ISerialization::SerializeBinaryBulkSettings serialize_settings;
-    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;//global_settings.low_cardinality_max_dictionary_size;
-    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;//global_settings.low_cardinality_use_single_dictionary_for_part != 0;
+    serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size;
+    serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part;
     WrittenOffsetColumns offset_columns;
     if (rows_written_in_last_mark > 0)
     {
@@ -683,7 +678,7 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(bool sync)
 
 }
 
-void MergeTreeDataPartWriterWide::fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove)
+void MergeTreeDataPartWriterWide::fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove)
 {
     // If we don't have anything to write, skip finalization.
     if (!columns_list.empty())
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index ef9c4ab17dc..5789213c910 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -21,14 +21,12 @@ class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk
 {
 public:
     MergeTreeDataPartWriterWide(
-//        const MergeTreeMutableDataPartPtr & data_part,
         const String & data_part_name_,
         const String & logger_name_,
         const SerializationByName & serializations_,
         MutableDataPartStoragePtr data_part_storage_,
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
-
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -40,14 +38,14 @@ public:
 
     void write(const Block & block, const IColumn::Permutation * permutation) override;
 
-    void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) final;
+    void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) final;
 
     void finish(bool sync) final;
 
 private:
     /// Finish serialization of data: write final mark if required and compute checksums
     /// Also validate written data in debug mode
-    void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove);
+    void fillDataChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove);
     void finishDataSerialization(bool sync);
 
     /// Write data of one column.
diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h
index 421c62887da..2b7d5c366f2 100644
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@@ -75,7 +75,7 @@ struct MergeTreeWriterSettings
         , query_write_settings(query_write_settings_)
         , max_threads_for_annoy_index_creation(global_settings.max_threads_for_annoy_index_creation)
         , low_cardinality_max_dictionary_size(global_settings.low_cardinality_max_dictionary_size)
-        , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part)
+        , low_cardinality_use_single_dictionary_for_part(global_settings.low_cardinality_use_single_dictionary_for_part != 0)
     {
     }
 
diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index c2ef7f98388..c7b7557fe52 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -12,7 +12,6 @@
 #include <Common/FieldVisitorToString.h>
 #include <Common/FieldVisitorHash.h>
 #include <Common/typeid_cast.h>
-#include "Interpreters/Context_fwd.h"
 #include <base/hex.h>
 #include <Core/Block.h>
 
@@ -414,12 +413,10 @@ void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataM
         partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file, {});
 }
 
-std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(/*const MergeTreeData & storage,*/
+std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(
     StorageMetadataPtr metadata_snapshot, ContextPtr storage_context,
     IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const
 {
-//    auto metadata_snapshot = storage.getInMemoryMetadataPtr();
-//    const auto & context = storage.getContext();
     const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage_context).sample_block;
     return store(partition_key_sample, data_part_storage, checksums, storage_context->getWriteSettings());
 }
diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h
index 04175d6f927..44def70bdd9 100644
--- a/src/Storages/MergeTree/MergeTreePartition.h
+++ b/src/Storages/MergeTree/MergeTreePartition.h
@@ -44,7 +44,7 @@ public:
 
     /// Store functions return write buffer with written but not finalized data.
     /// User must call finish() for returned object.
-    [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(//const MergeTreeData & storage,
+    [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(
         StorageMetadataPtr metadata_snapshot, ContextPtr storage_context,
         IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums) const;
     [[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const Block & partition_key_sample, IDataPartStorage & data_part_storage, MergeTreeDataPartChecksums & checksums, const WriteSettings & settings) const;
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 2441d941952..e0fb4f703a0 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -1,4 +1,5 @@
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
+#include <IO/HashingWriteBuffer.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/MergeTreeTransaction.h>
 #include <Parsers/queryToString.h>
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 51853384012..1c75d81eca5 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -24,7 +24,6 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
     , header(header_)
 {
     const auto & global_settings = data_part->storage.getContext()->getSettings();
-//    const auto & storage_settings = data_part->storage.getSettings();
 
     MergeTreeWriterSettings writer_settings(
         global_settings,
@@ -34,10 +33,10 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
         /* rewrite_primary_key = */ false);
 
     writer = createMergeTreeDataPartWriter(
-            data_part->getType(),
-            data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
-            data_part_storage, data_part->index_granularity_info,
-            storage_settings,
+        data_part->getType(),
+        data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
+        data_part_storage, data_part->index_granularity_info,
+        storage_settings,
         header.getNamesAndTypesList(),
         metadata_snapshot_,
         indices_to_recalc,
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index 54077055d96..7d6b68c7359 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -1,5 +1,6 @@
 #include <Storages/MergeTree/MutateTask.h>
 
+#include <IO/HashingWriteBuffer.h>
 #include <Common/logger_useful.h>
 #include <Common/escapeForFileName.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>

From 13c94806e5f5ff800620d502229ff17cbce379f2 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 17 May 2024 19:44:57 +0200
Subject: [PATCH 171/392] fix waiting for mutations with retriable errors

---
 src/Storages/StorageReplicatedMergeTree.cpp | 31 ++++++++++++++++-----
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index cc6599f8cd1..d60fa6bc787 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -590,6 +590,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas(
         LOG_DEBUG(log, "Waiting for {} to apply mutation {}", replica, mutation_id);
         zkutil::EventPtr wait_event = std::make_shared<Poco::Event>();
 
+        constexpr size_t MAX_RETRIES_ON_FAILED_MUTATION = 30;
+        size_t retries_on_failed_mutation = 0;
+
         while (!partial_shutdown_called)
         {
             /// Mutation maybe killed or whole replica was deleted.
@@ -637,18 +640,32 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas(
                 }
             }
 
-            /// If mutation status is empty, than local replica may just not loaded it into memory.
-            if (mutation_status && !mutation_status->latest_fail_reason.empty())
-            {
-                LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason);
-                break;
-            }
-
             /// Replica can become inactive, so wait with timeout, if nothing happened -> recheck it
             if (!wait_event->tryWait(1000))
             {
                 LOG_TRACE(log, "Failed to wait for mutation '{}', will recheck", mutation_id);
             }
+
+            /// If mutation status is empty, than local replica may just not loaded it into memory.
+            if (mutation_status && !mutation_status->latest_fail_reason.empty())
+            {
+                LOG_DEBUG(log, "Mutation {} is done {} or failed {} (status: '{}')", mutation_id, mutation_status->is_done, !mutation_status->latest_fail_reason.empty(), mutation_status->latest_fail_reason);
+
+                /// In some cases latest_fail_reason may be retryable and there's a chance it will be cleared after the next attempt
+                if (++retries_on_failed_mutation <= MAX_RETRIES_ON_FAILED_MUTATION)
+                    continue;
+
+                if (mutation_status->is_done)
+                {
+                    LOG_DEBUG(log, "Looks like mutation {} is done, rechecking", mutation_id);
+                    continue;
+                }
+
+                /// It's still possible that latest_fail_reason will be cleared just before queue.getIncompleteMutationsStatus(...) below,
+                /// but it's unlikely. Anyway, rethrow the exception here to avoid exiting with is_done=false
+                checkMutationStatus(mutation_status, {mutation_id});
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "checkMutationStatus didn't throw when checking status of {}: {}", mutation_id, mutation_status->latest_fail_reason);
+            }
         }
 
         /// This replica inactive, don't check anything

From 077e6057f275a69a5fac48097b995572a5e07f06 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 17 May 2024 21:45:07 +0200
Subject: [PATCH 172/392] Update reinterpretAsDate and reinterpretAsDateTime
 functions, add a test

---
 .../functions/type-conversion-functions.md    | 84 ++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index ea08ffa50e7..cf3483f27a4 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1004,9 +1004,91 @@ Result:
 
 ## reinterpretAsDate
 
+Accepts a string, fixed string or numeric value and interprets the bytes as a number in host order (little endian). It returns a date from the interpreted number as the number of days since the beginning of the Unix Epoch.
+
+**Syntax**
+
+```sql
+reinterpretAsDate(x)
+```
+
+**Parameters**
+
+- `x`: number of days since the beginning of the Unix Epoch. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Date. [Date](../data-types/date.md).
+
+**Implementation details**
+
+:::note
+If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. 
+:::
+
+**Example**
+
+Query:
+
+```sql
+SELECT reinterpretAsDate(65), reinterpretAsDate('A');
+```
+
+Result:
+
+```response
+┌─reinterpretAsDate(65)─┬─reinterpretAsDate('A')─┐
+│            1970-03-07 │             1970-03-07 │
+└───────────────────────┴────────────────────────┘
+```
+
 ## reinterpretAsDateTime
 
-These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
+These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). Returns a date with time interpreted as the number of seconds since the beginning of the Unix Epoch.
+
+**Syntax**
+
+```sql
+reinterpretAsDateTime(x)
+```
+
+**Parameters**
+
+- `x`: number of seconds since the beginning of the Unix Epoch.
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Date and Time. [DateTime](../data-types/datetime.md).
+
+**Implementation details**
+
+:::note
+If the provided string isn’t long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. 
+:::
+
+**Example**
+
+Query:
+
+```sql
+SELECT reinterpretAsDateTime(65), reinterpretAsDateTime('A');
+```
+
+Result:
+
+```response
+┌─reinterpretAsDateTime(65)─┬─reinterpretAsDateTime('A')─┐
+│       1970-01-01 01:01:05 │        1970-01-01 01:01:05 │
+└───────────────────────────┴────────────────────────────┘
+```
 
 ## reinterpretAsString
 

From 764bf4d477c95cc3d27fe438a439956829997f9c Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 17 May 2024 22:04:40 +0200
Subject: [PATCH 173/392] Update reinterpretAsFixedString documentation and add
 tests

---
 .../functions/type-conversion-functions.md    | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index cf3483f27a4..14a12ab5d5d 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1098,6 +1098,38 @@ This function accepts a number or date or date with time and returns a string co
 
 This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
 
+**Syntax**
+
+```sql
+reinterpretAsFixedString(x)
+```
+
+**Parameters**
+
+- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md).
+
+**Returned value**
+
+- Fixed string containing bytes representing `x`. [FixedString](../data-types/fixedstring.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT 
+    reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')), 
+    reinterpretAsFixedString(toDate('1970-03-07'));
+```
+
+Result:
+
+```response
+┌─reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsFixedString(toDate('1970-03-07'))─┐
+│ A                                                           │ A                                              │
+└─────────────────────────────────────────────────────────────┴────────────────────────────────────────────────┘
+```
+
 ## reinterpretAsUUID
 
 :::note

From 2c8b303a2fc69365be39a91179365466c3ebc14a Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 17 May 2024 20:16:58 +0000
Subject: [PATCH 174/392] Use Dynamic as supertype, add more tests, fix tests
 flakiness, update docs

---
 docs/en/sql-reference/data-types/dynamic.md   |  4 ++--
 src/DataTypes/getLeastSupertype.cpp           | 19 +++++++++++++++++++
 .../03037_dynamic_merges_1_horizontal.sh      |  2 +-
 .../03037_dynamic_merges_1_vertical.sh        |  2 +-
 .../03159_dynamic_type_all_types.reference    | 12 ++++++------
 .../03159_dynamic_type_all_types.sql          |  4 ++--
 .../03163_dynamic_as_supertype.reference      | 10 ++++++++++
 .../03163_dynamic_as_supertype.sql            |  8 ++++++++
 8 files changed, 49 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/03163_dynamic_as_supertype.reference
 create mode 100644 tests/queries/0_stateless/03163_dynamic_as_supertype.sql

diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md
index eabf032c52f..955fd54e641 100644
--- a/docs/en/sql-reference/data-types/dynamic.md
+++ b/docs/en/sql-reference/data-types/dynamic.md
@@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax:
 <column_name> Dynamic(max_types=N)
 ```
 
-Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic`. If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
+Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
 
 :::note
 The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
@@ -355,7 +355,7 @@ SELECT * FROM test WHERE d2 == [1,2,3]::Array(UInt32)::Dynamic;
 - Compare `Dynamic` subcolumn with required type:
 
 ```sql
-SELECT * FROM test WHERE d2.`Array(Int64)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)')
+SELECT * FROM test WHERE d2.`Array(Int65)` == [1,2,3] -- or using variantElement(d2, 'Array(UInt32)')
 ```
 
 ```text
diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp
index 0977bea362c..a71b19d6c92 100644
--- a/src/DataTypes/getLeastSupertype.cpp
+++ b/src/DataTypes/getLeastSupertype.cpp
@@ -19,6 +19,7 @@
 #include <DataTypes/DataTypesDecimal.h>
 #include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeVariant.h>
+#include <DataTypes/DataTypeDynamic.h>
 
 
 namespace DB
@@ -256,6 +257,24 @@ DataTypePtr getLeastSupertype(const DataTypes & types)
             return types[0];
     }
 
+    /// If one of the types is Dynamic, the supertype is Dynamic
+    {
+        bool have_dynamic = false;
+        size_t max_dynamic_types = 0;
+
+        for (const auto & type : types)
+        {
+            if (const auto & dynamic_type = typeid_cast<const DataTypeDynamic *>(type.get()))
+            {
+                have_dynamic = true;
+                max_dynamic_types = std::max(max_dynamic_types, dynamic_type->getMaxDynamicTypes());
+            }
+        }
+
+        if (have_dynamic)
+            return std::make_shared<DataTypeDynamic>(max_dynamic_types);
+    }
+
     /// Recursive rules
 
     /// If there are Nothing types, skip them
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
index 0d3cd45666a..7c1ac41cfdc 100755
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh
@@ -8,7 +8,7 @@ CLICKHOUSE_LOG_COMMENT=
 . "$CUR_DIR"/../shell_config.sh
 
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 "
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192"
 
 function test()
 {
diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
index b2c40668228..927ceac72b5 100755
--- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
+++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh
@@ -8,8 +8,8 @@ CLICKHOUSE_LOG_COMMENT=
 . "$CUR_DIR"/../shell_config.sh
 
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 "
 
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --merge_max_block_size 8192 --merge_max_block_size_bytes=10485760 --index_granularity 8192"
 function test()
 {
     echo "test"
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
index a162ec4f857..7dcaaa1f3ec 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
@@ -110,9 +110,9 @@ Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
 Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
 Object(\'json\')	{"1":"2"}
-Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2}
-Object(Nullable(\'json\'))	{"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null}
-Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
+Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
@@ -259,9 +259,9 @@ Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
 Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
 Object(\'json\')	{"1":"2"}
-Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":null,"k1":1,"k2":2}
-Object(Nullable(\'json\'))	{"1":null,"2":null,"2020-10-10":"foo","k1":null,"k2":null}
-Object(Nullable(\'json\'))	{"1":2,"2":3,"2020-10-10":null,"k1":null,"k2":null}
+Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
index 38d70dee64e..64fab07ed4f 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
@@ -86,13 +86,13 @@ INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' yea
 INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String));
 INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)));
 
-SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d ;
+SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d);
 
 CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory;
 INSERT INTO t2 SELECT * FROM t;
 
 SELECT '';
-SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d;
+SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d);
 
 SELECT '';
 SELECT uniqExact(dynamicType(d)) t_ FROM t;
diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference
new file mode 100644
index 00000000000..5f1a8613a77
--- /dev/null
+++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference
@@ -0,0 +1,10 @@
+str_0	Dynamic(max_types=3)	String
+1	Dynamic(max_types=3)	UInt64
+str_2	Dynamic(max_types=3)	String
+3	Dynamic(max_types=3)	UInt64
+str_1	String
+42	UInt64
+str_2	String
+43	UInt64
+2020-01-01	Date
+[1,2,3]	Array(Int64)
diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql
new file mode 100644
index 00000000000..fbb6aa74fab
--- /dev/null
+++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql
@@ -0,0 +1,8 @@
+SET allow_experimental_dynamic_type=1;
+SELECT if(number % 2, number::Dynamic(max_types=3), ('str_' || toString(number))::Dynamic(max_types=2)) AS d, toTypeName(d), dynamicType(d) FROM numbers(4);
+CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory;
+INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64);
+CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory;
+INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]);
+SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2;
+

From dd6c763492d032738c922cff19c8687e05c2f542 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Fri, 17 May 2024 17:48:06 -0400
Subject: [PATCH 175/392] Use of the redefined context in process query
 pipline.

---
 .../Transforms/buildPushingToViewsChain.cpp   |  5 +--
 .../Transforms/buildPushingToViewsChain.h     |  3 ++
 ...te_view_with_sql_security_option.reference |  1 +
 ...84_create_view_with_sql_security_option.sh | 35 +++++++++++++++++++
 4 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index 5e8ecdca95e..cdcfad4442c 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -414,7 +414,8 @@ std::optional<Chain> generateViewChain(
         out.getInputHeader(),
         view_id,
         nullptr,
-        std::move(runtime_stats)});
+        std::move(runtime_stats),
+        insert_context});
 
     if (type == QueryViewsLogElement::ViewType::MATERIALIZED)
     {
@@ -590,7 +591,7 @@ Chain buildPushingToViewsChain(
 
 static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data)
 {
-    const auto & context = views_data.context;
+    const auto & context = view.context;
 
     /// We create a table with the same name as original table and the same alias columns,
     ///  but it will contain single block (that is INSERT-ed into main table).
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h
index 53aceeda1cc..a1feed91b60 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.h
+++ b/src/Processors/Transforms/buildPushingToViewsChain.h
@@ -33,6 +33,9 @@ struct ViewRuntimeData
     /// Info which is needed for query views log.
     std::unique_ptr<QueryViewsLogElement::ViewRuntimeStats> runtime_stats;
 
+    /// An overridden context bounded to this view with the correct SQL security grants.
+    ContextPtr context;
+
     void setException(std::exception_ptr e)
     {
         exception = e;
diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
index 9ba927fa201..931cf8ac19c 100644
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
@@ -24,6 +24,7 @@ OK
 2
 OK
 OK
+100
 ===== TestGrants =====
 OK
 OK
diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
index 9c9df120298..62b03b5d5ff 100755
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
@@ -192,6 +192,41 @@ ${CLICKHOUSE_CLIENT} --user $user1 --query "
 
 ${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1"
 
+${CLICKHOUSE_CLIENT} --multiquery <<EOF
+CREATE TABLE $db.source
+(
+    a UInt64
+)
+ORDER BY a;
+
+CREATE TABLE $db.destination1
+(
+    `a` UInt64
+)
+ORDER BY a;
+
+CREATE TABLE $db.destination2
+(
+    `a` UInt64
+)
+ORDER BY a;
+
+CREATE MATERIALIZED VIEW $db.mv1 TO $db.destination1
+AS SELECT *
+FROM $db.source;
+
+ALTER TABLE $db.mv1 MODIFY DEFINER=default SQL SECURITY DEFINER;
+
+CREATE MATERIALIZED VIEW $db.mv2 TO $db.destination2
+AS SELECT *
+FROM $db.destination1;
+
+GRANT INSERT ON $db.source TO $user2;
+EOF
+
+${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100"
+${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2"
+
 
 echo "===== TestRowPolicy ====="
 ${CLICKHOUSE_CLIENT} --multiquery <<EOF

From c63753bab9f75ad52b4c8607d7b24e9f3edce7bc Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Sat, 18 May 2024 01:04:20 +0000
Subject: [PATCH 176/392] Fix tests

---
 .../0_stateless/03159_dynamic_type_all_types.reference |  6 +++---
 .../0_stateless/03163_dynamic_as_supertype.reference   | 10 +++++-----
 .../queries/0_stateless/03163_dynamic_as_supertype.sql |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
index 7dcaaa1f3ec..abecca893f9 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
@@ -110,9 +110,9 @@ Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
 Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
 Object(\'json\')	{"1":"2"}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
 Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
@@ -260,8 +260,8 @@ Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
 Object(\'json\')	{"1":"2"}
 Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
 Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
+Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference
index 5f1a8613a77..33e3a15c7fb 100644
--- a/tests/queries/0_stateless/03163_dynamic_as_supertype.reference
+++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.reference
@@ -2,9 +2,9 @@ str_0	Dynamic(max_types=3)	String
 1	Dynamic(max_types=3)	UInt64
 str_2	Dynamic(max_types=3)	String
 3	Dynamic(max_types=3)	UInt64
-str_1	String
-42	UInt64
-str_2	String
-43	UInt64
-2020-01-01	Date
 [1,2,3]	Array(Int64)
+2020-01-01	Date
+str_1	String
+str_2	String
+42	UInt64
+43	UInt64
diff --git a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql
index fbb6aa74fab..baba637eea4 100644
--- a/tests/queries/0_stateless/03163_dynamic_as_supertype.sql
+++ b/tests/queries/0_stateless/03163_dynamic_as_supertype.sql
@@ -4,5 +4,5 @@ CREATE TABLE dynamic_test_1 (d Dynamic(max_types=3)) ENGINE = Memory;
 INSERT INTO dynamic_test_1 VALUES ('str_1'), (42::UInt64);
 CREATE TABLE dynamic_test_2 (d Dynamic(max_types=5)) ENGINE = Memory;
 INSERT INTO dynamic_test_2 VALUES ('str_2'), (43::UInt64), ('2020-01-01'::Date), ([1, 2, 3]);
-SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2;
+SELECT * FROM (SELECT d, dynamicType(d) FROM dynamic_test_1 UNION ALL SELECT d, dynamicType(d) FROM dynamic_test_2) order by d;
 

From 9ba21335e4b4d157f4b1de884e87ef84e917dc62 Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Sat, 18 May 2024 12:20:24 -0400
Subject: [PATCH 177/392] fix test

---
 .../0_stateless/02884_create_view_with_sql_security_option.sh  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
index 62b03b5d5ff..a9a306a9e27 100755
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
@@ -197,18 +197,21 @@ CREATE TABLE $db.source
 (
     a UInt64
 )
+ENGINE = MergeTree
 ORDER BY a;
 
 CREATE TABLE $db.destination1
 (
     `a` UInt64
 )
+ENGINE = MergeTree
 ORDER BY a;
 
 CREATE TABLE $db.destination2
 (
     `a` UInt64
 )
+ENGINE = MergeTree
 ORDER BY a;
 
 CREATE MATERIALIZED VIEW $db.mv1 TO $db.destination1

From 3a79b1facc63aa9ae3a8deb986bd00cf51c14c1f Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Sat, 18 May 2024 17:15:01 -0400
Subject: [PATCH 178/392] fix test

---
 .../0_stateless/02884_create_view_with_sql_security_option.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
index a9a306a9e27..f1da343da36 100755
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
@@ -202,14 +202,14 @@ ORDER BY a;
 
 CREATE TABLE $db.destination1
 (
-    `a` UInt64
+    a UInt64
 )
 ENGINE = MergeTree
 ORDER BY a;
 
 CREATE TABLE $db.destination2
 (
-    `a` UInt64
+    a UInt64
 )
 ENGINE = MergeTree
 ORDER BY a;

From 79b3f52dc5189d6def125cf5ed9b1fb2e37267e4 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sat, 18 May 2024 23:18:41 +0000
Subject: [PATCH 179/392] only interpolate expression should be used for DAG

---
 src/Planner/PlannerExpressionAnalysis.cpp           |  7 +++----
 .../03155_analyzer_interpolate.reference            | 13 +++++++++++++
 .../0_stateless/03155_analyzer_interpolate.sql      |  7 +++++++
 3 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/03155_analyzer_interpolate.reference
 create mode 100644 tests/queries/0_stateless/03155_analyzer_interpolate.sql

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 6e194b2c03e..6ff56f36933 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -439,20 +439,19 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
         auto & interpolate_list_node = query_node.getInterpolate()->as<ListNode &>();
 
         PlannerActionsVisitor interpolate_actions_visitor(planner_context);
-        auto interpolate_actions_dag = std::make_shared<ActionsDAG>();
+        auto interpolate_expression_dag = std::make_shared<ActionsDAG>();
 
         for (auto & interpolate_node : interpolate_list_node.getNodes())
         {
             auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
-            interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
-            interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
+            interpolate_actions_visitor.visit(interpolate_expression_dag, interpolate_node_typed.getInterpolateExpression());
         }
 
         std::unordered_map<std::string_view, const ActionsDAG::Node *> before_sort_actions_inputs_name_to_node;
         for (const auto & node : before_sort_actions->getInputs())
             before_sort_actions_inputs_name_to_node.emplace(node->result_name, node);
 
-        for (const auto & node : interpolate_actions_dag->getNodes())
+        for (const auto & node : interpolate_expression_dag->getNodes())
         {
             if (before_sort_actions_dag_output_node_names.contains(node.result_name) ||
                 node.type != ActionsDAG::ActionType::INPUT)
diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.reference b/tests/queries/0_stateless/03155_analyzer_interpolate.reference
new file mode 100644
index 00000000000..791aaa5b2a2
--- /dev/null
+++ b/tests/queries/0_stateless/03155_analyzer_interpolate.reference
@@ -0,0 +1,13 @@
+0	[5]
+0.5	[5]
+1	[1]
+1.5	[5]
+2	[5]
+2.5	[5]
+3	[5]
+3.5	[5]
+4	[4]
+4.5	[5]
+5	[5]
+5.5	[5]
+7	[7]
diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql
new file mode 100644
index 00000000000..9b56106f2b4
--- /dev/null
+++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql
@@ -0,0 +1,7 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/62464
+SET allow_experimental_analyzer = 1;
+
+SELECT n, [number] as inter FROM (
+   SELECT toFloat32(number % 10) AS n, number
+   FROM numbers(10) WHERE number % 3 = 1
+) group by n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]);

From a67418bcc8abb685a1c0271f8f34d5434bb0a113 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 19 May 2024 07:14:37 +0000
Subject: [PATCH 180/392] add NOT_AN_AGGREGATE exception for interpolate
 expression columns

---
 src/Planner/PlannerExpressionAnalysis.cpp        | 16 ++++++++++++++--
 .../0_stateless/03155_analyzer_interpolate.sql   |  9 +++++++--
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 6ff56f36933..e7d553af944 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -28,6 +28,7 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int NOT_AN_AGGREGATE;
 }
 
 namespace
@@ -397,7 +398,8 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
 SortAnalysisResult analyzeSort(const QueryNode & query_node,
     const ColumnsWithTypeAndName & input_columns,
     const PlannerContextPtr & planner_context,
-    ActionsChain & actions_chain)
+    ActionsChain & actions_chain,
+    std::optional<AggregationAnalysisResult> aggregation_analysis_result_optional)
 {
     ActionsDAGPtr before_sort_actions = std::make_shared<ActionsDAG>(input_columns);
     auto & before_sort_actions_outputs = before_sort_actions->getOutputs();
@@ -451,6 +453,10 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
         for (const auto & node : before_sort_actions->getInputs())
             before_sort_actions_inputs_name_to_node.emplace(node->result_name, node);
 
+        std::unordered_set<std::string_view> aggregation_keys;
+        if (aggregation_analysis_result_optional)
+            aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end());
+
         for (const auto & node : interpolate_expression_dag->getNodes())
         {
             if (before_sort_actions_dag_output_node_names.contains(node.result_name) ||
@@ -466,6 +472,12 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
                 input_node_it = it;
             }
 
+            if (aggregation_analysis_result_optional)
+                if (!aggregation_keys.contains(node.result_name))
+                    throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
+                        "Column {} is not under aggregate function and not in GROUP BY keys. In query {}",
+                        node.result_name, query_node.formatASTForErrorMessage());
+
             before_sort_actions_outputs.push_back(input_node_it->second);
             before_sort_actions_dag_output_node_names.insert(node.result_name);
         }
@@ -567,7 +579,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())
     {
-        sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain);
+        sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain, aggregation_analysis_result_optional);
         current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
     }
 
diff --git a/tests/queries/0_stateless/03155_analyzer_interpolate.sql b/tests/queries/0_stateless/03155_analyzer_interpolate.sql
index 9b56106f2b4..b3c1d233f47 100644
--- a/tests/queries/0_stateless/03155_analyzer_interpolate.sql
+++ b/tests/queries/0_stateless/03155_analyzer_interpolate.sql
@@ -1,7 +1,12 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/62464
 SET allow_experimental_analyzer = 1;
 
-SELECT n, [number] as inter FROM (
+SELECT n, [number] AS inter FROM (
    SELECT toFloat32(number % 10) AS n, number
    FROM numbers(10) WHERE number % 3 = 1
-) group by n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]);
+) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS [5]);
+
+SELECT n, number+5 AS inter FROM (  -- { serverError NOT_AN_AGGREGATE }
+   SELECT toFloat32(number % 10) AS n, number, number*2 AS mn
+   FROM numbers(10) WHERE number % 3 = 1
+) GROUP BY n, inter ORDER BY n WITH FILL FROM 0 TO 5.51 STEP 0.5 INTERPOLATE (inter AS mn * 2);

From f065128ef2d67dfa4709f5d783d3c5a33b6f1e42 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 07:16:07 +0000
Subject: [PATCH 181/392] Fix style

---
 src/Compression/CompressionCodecDoubleDelta.cpp | 5 +++++
 src/Coordination/KeeperServer.cpp               | 1 -
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 443b9d33532..cbd8cd57a62 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -21,6 +21,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
 /** NOTE DoubleDelta is surprisingly bad name. The only excuse is that it comes from an academic paper.
   * Most people will think that "double delta" is just applying delta transform twice.
   * But in fact it is something more than applying delta transform twice.
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 953072c5b0e..b07c90b8660 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -45,7 +45,6 @@ namespace ErrorCodes
     extern const int SUPPORT_IS_DISABLED;
     extern const int LOGICAL_ERROR;
     extern const int INVALID_CONFIG_PARAMETER;
-    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
 }
 
 using namespace std::chrono_literals;

From 113bb0000510b30c0845593911baa6d72cd5fb20 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 08:34:59 +0000
Subject: [PATCH 182/392] Fix clang-tidy
 "-readability-redundant-inline-specifier"

---
 .clang-tidy                                   |  1 -
 base/base/BorrowedObjectPool.h                | 14 ++---
 .../library-bridge/LibraryBridgeHandlers.h    |  2 +-
 programs/server/MetricsTransmitter.h          |  8 +--
 .../AggregateFunctionSequenceNextNode.cpp     |  2 +-
 .../Combinators/AggregateFunctionIf.cpp       |  4 +-
 src/AggregateFunctions/QuantileTDigest.h      |  2 +-
 src/AggregateFunctions/QuantileTiming.h       |  2 +-
 src/AggregateFunctions/ThetaSketchData.h      |  4 +-
 src/AggregateFunctions/UniqVariadicHash.h     |  8 +--
 src/AggregateFunctions/UniquesHashSet.h       | 10 ++--
 ...egateFunctionsArithmericOperationsPass.cpp |  4 +-
 .../Passes/ComparisonTupleEliminationPass.cpp |  2 +-
 .../Passes/FunctionToSubcolumnsPass.cpp       |  2 +-
 .../Passes/NormalizeCountVariantsPass.cpp     |  2 +-
 .../RewriteAggregateFunctionWithIfPass.cpp    |  2 +-
 .../RewriteSumFunctionWithSumAndCountPass.cpp |  2 +-
 src/Analyzer/Passes/SumIfToCountIfPass.cpp    |  4 +-
 .../CatBoostLibraryBridgeHelper.h             | 14 ++---
 .../ExternalDictionaryLibraryBridgeHelper.h   | 20 +++----
 src/BridgeHelper/IBridgeHelper.h              |  6 +-
 src/BridgeHelper/LibraryBridgeHelper.h        |  2 +-
 src/BridgeHelper/XDBCBridgeHelper.h           | 16 +++---
 src/Common/CPUID.h                            |  4 +-
 src/Common/ColumnsHashingImpl.h               |  2 +-
 src/Common/CombinedCardinalityEstimator.h     |  6 +-
 src/Common/CompactArray.h                     |  2 +-
 src/Common/CounterInFile.h                    |  2 +-
 src/Common/CurrentThread.h                    |  4 +-
 src/Common/HashTable/FixedHashTable.h         |  2 +-
 src/Common/HashTable/HashTable.h              |  2 +-
 src/Common/HashTable/PackedHashMap.h          |  2 +-
 src/Common/HashTable/SmallTable.h             |  2 +-
 src/Common/HyperLogLogCounter.h               | 20 +++----
 src/Common/IntervalTree.h                     | 18 +++---
 src/Common/JSONParsers/SimdJSONParser.h       | 36 ++++++------
 src/Common/PODArray.h                         |  2 +-
 src/Common/PoolBase.h                         |  2 +-
 src/Common/RadixSort.h                        |  4 +-
 src/Common/SpaceSaving.h                      |  4 +-
 src/Common/ThreadProfileEvents.h              |  2 +-
 src/Common/Volnitsky.h                        | 18 +++---
 src/Common/ZooKeeper/IKeeper.h                |  6 +-
 src/Common/findExtreme.cpp                    |  4 +-
 src/Core/Field.h                              |  4 +-
 src/Core/Joins.h                              | 24 ++++----
 src/Daemon/BaseDaemon.h                       |  2 +-
 src/DataTypes/DataTypeDecimalBase.h           |  2 +-
 src/Dictionaries/CacheDictionaryStorage.h     |  8 +--
 src/Dictionaries/DictionaryHelpers.h          |  8 +--
 src/Dictionaries/Embedded/RegionsNames.h      |  4 +-
 src/Dictionaries/ICacheDictionaryStorage.h    | 16 +++---
 src/Dictionaries/IPAddressDictionary.cpp      |  2 +-
 src/Dictionaries/RegExpTreeDictionary.cpp     |  4 +-
 src/Dictionaries/SSDCacheDictionaryStorage.h  | 56 +++++++++----------
 src/Disks/IO/IOUringReader.h                  |  4 +-
 src/Functions/DivisionUtils.h                 |  6 +-
 src/Functions/ExtractString.h                 |  6 +-
 src/Functions/FunctionBinaryArithmetic.h      |  8 +--
 src/Functions/FunctionSQLJSON.h               | 20 +++----
 src/Functions/FunctionsAES.h                  |  4 +-
 src/Functions/FunctionsBitToArray.cpp         |  2 +-
 src/Functions/FunctionsCodingIP.cpp           |  4 +-
 src/Functions/FunctionsConsistentHashing.h    |  2 +-
 .../FunctionsLanguageClassification.cpp       |  2 +-
 src/Functions/FunctionsLogical.cpp            |  8 +--
 src/Functions/FunctionsLogical.h              | 42 +++++++-------
 .../FunctionsProgrammingClassification.cpp    |  2 +-
 src/Functions/FunctionsRound.h                |  2 +-
 src/Functions/FunctionsStringHash.cpp         | 20 +++----
 src/Functions/FunctionsStringSimilarity.cpp   |  8 +--
 src/Functions/FunctionsTimeWindow.h           |  8 +--
 .../FunctionsTonalityClassification.cpp       |  2 +-
 src/Functions/GCDLCMImpl.h                    |  2 +-
 src/Functions/GregorianDate.cpp               | 10 ++--
 src/Functions/PolygonUtils.h                  |  2 +-
 src/Functions/TransformDateTime64.h           |  8 +--
 src/Functions/abs.cpp                         |  2 +-
 src/Functions/array/arrayIndex.h              | 16 +++---
 src/Functions/array/arrayNorm.cpp             | 26 ++++-----
 src/Functions/bitAnd.cpp                      |  4 +-
 src/Functions/bitBoolMaskAnd.cpp              |  2 +-
 src/Functions/bitBoolMaskOr.cpp               |  2 +-
 src/Functions/bitCount.cpp                    |  2 +-
 src/Functions/bitHammingDistance.cpp          |  2 +-
 src/Functions/bitNot.cpp                      |  4 +-
 src/Functions/bitOr.cpp                       |  4 +-
 src/Functions/bitRotateLeft.cpp               |  4 +-
 src/Functions/bitRotateRight.cpp              |  4 +-
 src/Functions/bitShiftLeft.cpp                |  4 +-
 src/Functions/bitShiftRight.cpp               |  6 +-
 src/Functions/bitSwapLastTwo.cpp              |  4 +-
 src/Functions/bitTest.cpp                     |  2 +-
 src/Functions/bitTestAll.cpp                  |  2 +-
 src/Functions/bitTestAny.cpp                  |  2 +-
 src/Functions/bitWrapperFunc.cpp              |  2 +-
 src/Functions/bitXor.cpp                      |  4 +-
 src/Functions/dateName.cpp                    | 18 +++---
 src/Functions/divide.cpp                      |  4 +-
 src/Functions/divideDecimal.cpp               |  2 +-
 src/Functions/factorial.cpp                   |  2 +-
 src/Functions/greatCircleDistance.cpp         | 10 ++--
 src/Functions/greatest.cpp                    |  6 +-
 src/Functions/h3GetUnidirectionalEdge.cpp     |  2 +-
 src/Functions/initialQueryID.cpp              |  6 +-
 src/Functions/intDiv.cpp                      |  2 +-
 src/Functions/intDivOrZero.cpp                |  2 +-
 src/Functions/intExp10.cpp                    |  2 +-
 src/Functions/intExp2.cpp                     |  4 +-
 src/Functions/isValidUTF8.cpp                 |  4 +-
 src/Functions/jumpConsistentHash.cpp          |  2 +-
 src/Functions/kostikConsistentHash.cpp        |  2 +-
 src/Functions/least.cpp                       |  6 +-
 src/Functions/minus.cpp                       |  6 +-
 src/Functions/modulo.cpp                      |  2 +-
 src/Functions/moduloOrZero.cpp                |  2 +-
 src/Functions/multiply.cpp                    |  6 +-
 src/Functions/multiplyDecimal.cpp             |  2 +-
 src/Functions/negate.cpp                      |  4 +-
 src/Functions/plus.cpp                        |  6 +-
 src/Functions/queryID.cpp                     |  6 +-
 src/Functions/repeat.cpp                      |  4 +-
 src/Functions/roundAge.cpp                    |  2 +-
 src/Functions/roundDuration.cpp               |  2 +-
 src/Functions/roundToExp2.cpp                 |  2 +-
 src/Functions/sign.cpp                        |  2 +-
 src/Functions/space.cpp                       |  2 +-
 src/Functions/tokenExtractors.cpp             |  2 +-
 src/IO/BufferBase.h                           | 24 ++++----
 src/IO/HTTPHeaderEntries.h                    |  2 +-
 src/IO/HadoopSnappyReadBuffer.h               |  4 +-
 src/IO/IReadableWriteBuffer.h                 |  2 +-
 src/IO/PeekableReadBuffer.h                   |  6 +-
 src/IO/ReadBuffer.h                           |  2 +-
 src/IO/S3/Requests.h                          |  2 +-
 src/IO/WriteBuffer.h                          |  6 +-
 src/IO/ZstdDeflatingAppendableWriteBuffer.h   |  2 +-
 src/Interpreters/DDLTask.h                    |  8 +--
 src/Interpreters/DatabaseCatalog.h            |  2 +-
 src/Interpreters/JIT/CHJIT.cpp                | 14 ++---
 src/Interpreters/JIT/CHJIT.h                  |  2 +-
 src/Interpreters/JIT/CompileDAG.h             | 16 +++---
 src/Interpreters/JoinUtils.h                  |  2 +-
 .../examples/hash_map_string_3.cpp            |  2 +-
 .../Impl/CustomSeparatedRowInputFormat.h      |  2 +-
 .../Formats/Impl/TemplateRowInputFormat.h     |  2 +-
 src/Processors/Port.h                         |  6 +-
 src/Server/HTTPHandler.h                      |  6 +-
 src/Storages/Cache/ExternalDataSourceCache.h  |  2 +-
 src/Storages/Cache/RemoteCacheController.h    | 20 +++----
 src/Storages/Hive/HiveFile.h                  |  4 +-
 src/Storages/Kafka/KafkaConsumer.h            |  6 +-
 .../MergeTree/BackgroundProcessList.h         |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.h   | 14 ++---
 .../MergeTree/MergeTreeBlockReadUtils.h       |  8 +--
 .../MergeTree/MergeTreeIndexGranularityInfo.h |  4 +-
 src/Storages/StorageReplicatedMergeTree.h     |  2 +-
 src/Storages/UVLoop.h                         |  4 +-
 src/TableFunctions/ITableFunction.h           |  2 +-
 159 files changed, 490 insertions(+), 491 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index e2f318562ec..66417c41c46 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -129,7 +129,6 @@ Checks: [
   '-readability-avoid-nested-conditional-operator',
   '-modernize-use-designated-initializers',
   '-performance-enum-size',
-  '-readability-redundant-inline-specifier',
   '-readability-redundant-member-init',
   '-bugprone-crtp-constructor-accessibility',
   '-bugprone-suspicious-stringview-data-usage',
diff --git a/base/base/BorrowedObjectPool.h b/base/base/BorrowedObjectPool.h
index 05a23d5835e..f5ef28582b2 100644
--- a/base/base/BorrowedObjectPool.h
+++ b/base/base/BorrowedObjectPool.h
@@ -86,7 +86,7 @@ public:
     }
 
     /// Return object into pool. Client must return same object that was borrowed.
-    inline void returnObject(T && object_to_return)
+    void returnObject(T && object_to_return)
     {
         {
             std::lock_guard lock(objects_mutex);
@@ -99,20 +99,20 @@ public:
     }
 
     /// Max pool size
-    inline size_t maxSize() const
+    size_t maxSize() const
     {
         return max_size;
     }
 
     /// Allocated objects size by the pool. If allocatedObjectsSize == maxSize then pool is full.
-    inline size_t allocatedObjectsSize() const
+    size_t allocatedObjectsSize() const
     {
         std::lock_guard lock(objects_mutex);
         return allocated_objects_size;
     }
 
     /// Returns allocatedObjectsSize == maxSize
-    inline bool isFull() const
+    bool isFull() const
     {
         std::lock_guard lock(objects_mutex);
         return allocated_objects_size == max_size;
@@ -120,7 +120,7 @@ public:
 
     /// Borrowed objects size. If borrowedObjectsSize == allocatedObjectsSize and pool is full.
     /// Then client will wait during borrowObject function call.
-    inline size_t borrowedObjectsSize() const
+    size_t borrowedObjectsSize() const
     {
         std::lock_guard lock(objects_mutex);
         return borrowed_objects_size;
@@ -129,7 +129,7 @@ public:
 private:
 
     template <typename FactoryFunc>
-    inline T allocateObjectForBorrowing(const std::unique_lock<std::mutex> &, FactoryFunc && func)
+    T allocateObjectForBorrowing(const std::unique_lock<std::mutex> &, FactoryFunc && func)
     {
         ++allocated_objects_size;
         ++borrowed_objects_size;
@@ -137,7 +137,7 @@ private:
         return std::forward<FactoryFunc>(func)();
     }
 
-    inline T borrowFromObjects(const std::unique_lock<std::mutex> &)
+    T borrowFromObjects(const std::unique_lock<std::mutex> &)
     {
         T dst;
         detail::moveOrCopyIfThrow(std::move(objects.back()), dst);
diff --git a/programs/library-bridge/LibraryBridgeHandlers.h b/programs/library-bridge/LibraryBridgeHandlers.h
index 1db71eb24cb..62fbf2caede 100644
--- a/programs/library-bridge/LibraryBridgeHandlers.h
+++ b/programs/library-bridge/LibraryBridgeHandlers.h
@@ -23,7 +23,7 @@ public:
     void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
 
 private:
-    static constexpr inline auto FORMAT = "RowBinary";
+    static constexpr auto FORMAT = "RowBinary";
 
     const size_t keep_alive_timeout;
     LoggerPtr log;
diff --git a/programs/server/MetricsTransmitter.h b/programs/server/MetricsTransmitter.h
index 23420117b56..24069a60071 100644
--- a/programs/server/MetricsTransmitter.h
+++ b/programs/server/MetricsTransmitter.h
@@ -56,10 +56,10 @@ private:
     std::condition_variable cond;
     std::optional<ThreadFromGlobalPool> thread;
 
-    static inline constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents.";
-    static inline constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative.";
-    static inline constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
-    static inline constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
+    static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents.";
+    static constexpr auto profile_events_cumulative_path_prefix = "ClickHouse.ProfileEventsCumulative.";
+    static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics.";
+    static constexpr auto asynchronous_metrics_path_prefix = "ClickHouse.AsynchronousMetrics.";
 };
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
index bed10333af0..b3824720b04 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
@@ -341,7 +341,7 @@ public:
             value[i] = Node::read(buf, arena);
     }
 
-    inline std::optional<size_t> getBaseIndex(Data & data) const
+    std::optional<size_t> getBaseIndex(Data & data) const
     {
         if (data.value.size() == 0)
             return {};
diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
index 9b5ee79a533..3e21ffa3418 100644
--- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.cpp
@@ -73,7 +73,7 @@ private:
     using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
         AggregateFunctionIfNullUnary<result_is_nullable, serialize_flag>>;
 
-    inline bool singleFilter(const IColumn ** columns, size_t row_num) const
+    bool singleFilter(const IColumn ** columns, size_t row_num) const
     {
         const IColumn * filter_column = columns[num_arguments - 1];
 
@@ -261,7 +261,7 @@ public:
         filter_is_only_null = arguments.back()->onlyNull();
     }
 
-    static inline bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
+    static bool singleFilter(const IColumn ** columns, size_t row_num, size_t num_arguments)
     {
         return assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num];
     }
diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h
index 9d84f079daa..d5a4f6b576a 100644
--- a/src/AggregateFunctions/QuantileTDigest.h
+++ b/src/AggregateFunctions/QuantileTDigest.h
@@ -138,7 +138,7 @@ class QuantileTDigest
             compress();
     }
 
-    inline bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean)
+    bool canBeMerged(const BetterFloat & l_mean, const Value & r_mean)
     {
         return l_mean == r_mean || (!std::isinf(l_mean) && !std::isinf(r_mean));
     }
diff --git a/src/AggregateFunctions/QuantileTiming.h b/src/AggregateFunctions/QuantileTiming.h
index 45fbf38258f..eef15828fc0 100644
--- a/src/AggregateFunctions/QuantileTiming.h
+++ b/src/AggregateFunctions/QuantileTiming.h
@@ -262,7 +262,7 @@ namespace detail
         UInt64 count_big[BIG_SIZE];
 
         /// Get value of quantile by index in array `count_big`.
-        static inline UInt16 indexInBigToValue(size_t i)
+        static UInt16 indexInBigToValue(size_t i)
         {
             return (i * BIG_PRECISION) + SMALL_THRESHOLD
                 + (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2));    /// A small randomization so that it is not noticeable that all the values are even.
diff --git a/src/AggregateFunctions/ThetaSketchData.h b/src/AggregateFunctions/ThetaSketchData.h
index f32386d945b..99dca27673d 100644
--- a/src/AggregateFunctions/ThetaSketchData.h
+++ b/src/AggregateFunctions/ThetaSketchData.h
@@ -24,14 +24,14 @@ private:
     std::unique_ptr<datasketches::update_theta_sketch> sk_update;
     std::unique_ptr<datasketches::theta_union> sk_union;
 
-    inline datasketches::update_theta_sketch * getSkUpdate()
+    datasketches::update_theta_sketch * getSkUpdate()
     {
         if (!sk_update)
             sk_update = std::make_unique<datasketches::update_theta_sketch>(datasketches::update_theta_sketch::builder().build());
         return sk_update.get();
     }
 
-    inline datasketches::theta_union * getSkUnion()
+    datasketches::theta_union * getSkUnion()
     {
         if (!sk_union)
             sk_union = std::make_unique<datasketches::theta_union>(datasketches::theta_union::builder().build());
diff --git a/src/AggregateFunctions/UniqVariadicHash.h b/src/AggregateFunctions/UniqVariadicHash.h
index 840380e7f0f..5bb245397d4 100644
--- a/src/AggregateFunctions/UniqVariadicHash.h
+++ b/src/AggregateFunctions/UniqVariadicHash.h
@@ -38,7 +38,7 @@ bool isAllArgumentsContiguousInMemory(const DataTypes & argument_types);
 template <>
 struct UniqVariadicHash<false, false>
 {
-    static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
+    static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
     {
         UInt64 hash;
 
@@ -65,7 +65,7 @@ struct UniqVariadicHash<false, false>
 template <>
 struct UniqVariadicHash<false, true>
 {
-    static inline UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
+    static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num)
     {
         UInt64 hash;
 
@@ -94,7 +94,7 @@ struct UniqVariadicHash<false, true>
 template <>
 struct UniqVariadicHash<true, false>
 {
-    static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
+    static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
     {
         const IColumn ** column = columns;
         const IColumn ** columns_end = column + num_args;
@@ -114,7 +114,7 @@ struct UniqVariadicHash<true, false>
 template <>
 struct UniqVariadicHash<true, true>
 {
-    static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
+    static UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num)
     {
         const auto & tuple_columns = assert_cast<const ColumnTuple *>(columns[0])->getColumns();
 
diff --git a/src/AggregateFunctions/UniquesHashSet.h b/src/AggregateFunctions/UniquesHashSet.h
index d6fc2bb6634..d5241547711 100644
--- a/src/AggregateFunctions/UniquesHashSet.h
+++ b/src/AggregateFunctions/UniquesHashSet.h
@@ -105,14 +105,14 @@ private:
         }
     }
 
-    inline size_t buf_size() const           { return 1ULL << size_degree; } /// NOLINT
-    inline size_t max_fill() const           { return 1ULL << (size_degree - 1); } /// NOLINT
-    inline size_t mask() const               { return buf_size() - 1; }
+    size_t buf_size() const           { return 1ULL << size_degree; } /// NOLINT
+    size_t max_fill() const           { return 1ULL << (size_degree - 1); } /// NOLINT
+    size_t mask() const               { return buf_size() - 1; }
 
-    inline size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
+    size_t place(HashValue x) const { return (x >> UNIQUES_HASH_BITS_FOR_SKIP) & mask(); }
 
     /// The value is divided by 2 ^ skip_degree
-    inline bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); }
+    bool good(HashValue hash) const { return hash == ((hash >> skip_degree) << skip_degree); }
 
     HashValue hash(Value key) const { return static_cast<HashValue>(Hash()(key)); }
 
diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index f96ba22eb7a..9153bc4eca2 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -173,13 +173,13 @@ private:
         return arithmetic_function_clone;
     }
 
-    inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
+    void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
     {
         auto function = FunctionFactory::instance().get(function_name, getContext());
         function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
     }
 
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
+    static void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
     {
         auto function_aggregate_function = function_node.getAggregateFunction();
 
diff --git a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
index f8233f473f8..ebefc12ae53 100644
--- a/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
+++ b/src/Analyzer/Passes/ComparisonTupleEliminationPass.cpp
@@ -184,7 +184,7 @@ private:
         return result_function;
     }
 
-    inline QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const
+    QueryTreeNodePtr makeEqualsFunction(QueryTreeNodePtr lhs_argument, QueryTreeNodePtr rhs_argument) const
     {
         return makeComparisonFunction(std::move(lhs_argument), std::move(rhs_argument), "equals");
     }
diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
index 6248f462979..15ac8d642a4 100644
--- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
+++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
@@ -215,7 +215,7 @@ public:
     }
 
 private:
-    inline void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
+    void resolveOrdinaryFunctionNode(FunctionNode & function_node, const String & function_name) const
     {
         auto function = FunctionFactory::instance().get(function_name, getContext());
         function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
index 0d6f3fc2d87..e70e08e65f4 100644
--- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
+++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp
@@ -59,7 +59,7 @@ public:
         }
     }
 private:
-    static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
+    static void resolveAsCountAggregateFunction(FunctionNode & function_node)
     {
         AggregateFunctionProperties properties;
         auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
index 513dd0054d6..a82ad3dced1 100644
--- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
+++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp
@@ -108,7 +108,7 @@ public:
     }
 
 private:
-    static inline void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
+    static void resolveAsAggregateFunctionWithIf(FunctionNode & function_node, const DataTypes & argument_types)
     {
         auto result_type = function_node.getResultType();
 
diff --git a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
index 917256bf4b1..5646d26f7f6 100644
--- a/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
+++ b/src/Analyzer/Passes/RewriteSumFunctionWithSumAndCountPass.cpp
@@ -110,7 +110,7 @@ private:
         function_node.resolveAsFunction(function->build(function_node.getArgumentColumns()));
     }
 
-    static inline void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
+    static void resolveAsAggregateFunctionNode(FunctionNode & function_node, const DataTypePtr & argument_type)
     {
         AggregateFunctionProperties properties;
         const auto aggregate_function = AggregateFunctionFactory::instance().get(function_node.getFunctionName(),
diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 1a4712aa697..852cbe75c4a 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -156,7 +156,7 @@ public:
     }
 
 private:
-    static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
+    static void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
     {
         AggregateFunctionProperties properties;
         auto aggregate_function = AggregateFunctionFactory::instance().get(
@@ -165,7 +165,7 @@ private:
         function_node.resolveAsAggregateFunction(std::move(aggregate_function));
     }
 
-    inline QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
+    QueryTreeNodePtr getMultiplyFunction(QueryTreeNodePtr left, QueryTreeNodePtr right)
     {
         auto multiply_function_node = std::make_shared<FunctionNode>("multiply");
         auto & multiply_arguments_nodes = multiply_function_node->getArguments().getNodes();
diff --git a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h
index 55dfd715f00..5d5c6d01705 100644
--- a/src/BridgeHelper/CatBoostLibraryBridgeHelper.h
+++ b/src/BridgeHelper/CatBoostLibraryBridgeHelper.h
@@ -14,8 +14,8 @@ namespace DB
 class CatBoostLibraryBridgeHelper final : public LibraryBridgeHelper
 {
 public:
-    static constexpr inline auto PING_HANDLER = "/catboost_ping";
-    static constexpr inline auto MAIN_HANDLER = "/catboost_request";
+    static constexpr auto PING_HANDLER = "/catboost_ping";
+    static constexpr auto MAIN_HANDLER = "/catboost_request";
 
     explicit CatBoostLibraryBridgeHelper(
         ContextPtr context_,
@@ -38,11 +38,11 @@ protected:
     bool bridgeHandShake() override;
 
 private:
-    static constexpr inline auto CATBOOST_LIST_METHOD = "catboost_list";
-    static constexpr inline auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
-    static constexpr inline auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
-    static constexpr inline auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
-    static constexpr inline auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
+    static constexpr auto CATBOOST_LIST_METHOD = "catboost_list";
+    static constexpr auto CATBOOST_REMOVEMODEL_METHOD = "catboost_removeModel";
+    static constexpr auto CATBOOST_REMOVEALLMODELS_METHOD = "catboost_removeAllModels";
+    static constexpr auto CATBOOST_GETTREECOUNT_METHOD = "catboost_GetTreeCount";
+    static constexpr auto CATBOOST_LIB_EVALUATE_METHOD = "catboost_libEvaluate";
 
     Poco::URI createRequestURI(const String & method) const;
 
diff --git a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h
index 5632fd2a28e..63816aa63ef 100644
--- a/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h
+++ b/src/BridgeHelper/ExternalDictionaryLibraryBridgeHelper.h
@@ -25,8 +25,8 @@ public:
         String dict_attributes;
     };
 
-    static constexpr inline auto PING_HANDLER = "/extdict_ping";
-    static constexpr inline auto MAIN_HANDLER = "/extdict_request";
+    static constexpr auto PING_HANDLER = "/extdict_ping";
+    static constexpr auto MAIN_HANDLER = "/extdict_request";
 
     ExternalDictionaryLibraryBridgeHelper(ContextPtr context_, const Block & sample_block, const Field & dictionary_id_, const LibraryInitData & library_data_);
 
@@ -62,14 +62,14 @@ protected:
     ReadWriteBufferFromHTTP::OutStreamCallback getInitLibraryCallback() const;
 
 private:
-    static constexpr inline auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew";
-    static constexpr inline auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone";
-    static constexpr inline auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete";
-    static constexpr inline auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll";
-    static constexpr inline auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds";
-    static constexpr inline auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys";
-    static constexpr inline auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified";
-    static constexpr inline auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad";
+    static constexpr auto EXT_DICT_LIB_NEW_METHOD = "extDict_libNew";
+    static constexpr auto EXT_DICT_LIB_CLONE_METHOD = "extDict_libClone";
+    static constexpr auto EXT_DICT_LIB_DELETE_METHOD = "extDict_libDelete";
+    static constexpr auto EXT_DICT_LOAD_ALL_METHOD = "extDict_loadAll";
+    static constexpr auto EXT_DICT_LOAD_IDS_METHOD = "extDict_loadIds";
+    static constexpr auto EXT_DICT_LOAD_KEYS_METHOD = "extDict_loadKeys";
+    static constexpr auto EXT_DICT_IS_MODIFIED_METHOD = "extDict_isModified";
+    static constexpr auto EXT_DICT_SUPPORTS_SELECTIVE_LOAD_METHOD = "extDict_supportsSelectiveLoad";
 
     Poco::URI createRequestURI(const String & method) const;
 
diff --git a/src/BridgeHelper/IBridgeHelper.h b/src/BridgeHelper/IBridgeHelper.h
index 6812bd04a03..8ce1c0e143a 100644
--- a/src/BridgeHelper/IBridgeHelper.h
+++ b/src/BridgeHelper/IBridgeHelper.h
@@ -16,9 +16,9 @@ class IBridgeHelper: protected WithContext
 {
 
 public:
-    static constexpr inline auto DEFAULT_HOST = "127.0.0.1";
-    static constexpr inline auto DEFAULT_FORMAT = "RowBinary";
-    static constexpr inline auto PING_OK_ANSWER = "Ok.";
+    static constexpr auto DEFAULT_HOST = "127.0.0.1";
+    static constexpr auto DEFAULT_FORMAT = "RowBinary";
+    static constexpr auto PING_OK_ANSWER = "Ok.";
 
     static const inline std::string PING_METHOD = Poco::Net::HTTPRequest::HTTP_GET;
     static const inline std::string MAIN_METHOD = Poco::Net::HTTPRequest::HTTP_POST;
diff --git a/src/BridgeHelper/LibraryBridgeHelper.h b/src/BridgeHelper/LibraryBridgeHelper.h
index 8940f9d1c9e..0c56fe7a221 100644
--- a/src/BridgeHelper/LibraryBridgeHelper.h
+++ b/src/BridgeHelper/LibraryBridgeHelper.h
@@ -37,7 +37,7 @@ protected:
 
     Poco::URI createBaseURI() const override;
 
-    static constexpr inline size_t DEFAULT_PORT = 9012;
+    static constexpr size_t DEFAULT_PORT = 9012;
 
     const Poco::Util::AbstractConfiguration & config;
     LoggerPtr log;
diff --git a/src/BridgeHelper/XDBCBridgeHelper.h b/src/BridgeHelper/XDBCBridgeHelper.h
index b557e12b85b..5f4c7fd8381 100644
--- a/src/BridgeHelper/XDBCBridgeHelper.h
+++ b/src/BridgeHelper/XDBCBridgeHelper.h
@@ -52,12 +52,12 @@ class XDBCBridgeHelper : public IXDBCBridgeHelper
 {
 
 public:
-    static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
-    static constexpr inline auto PING_HANDLER = "/ping";
-    static constexpr inline auto MAIN_HANDLER = "/";
-    static constexpr inline auto COL_INFO_HANDLER = "/columns_info";
-    static constexpr inline auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
-    static constexpr inline auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
+    static constexpr auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
+    static constexpr auto PING_HANDLER = "/ping";
+    static constexpr auto MAIN_HANDLER = "/";
+    static constexpr auto COL_INFO_HANDLER = "/columns_info";
+    static constexpr auto IDENTIFIER_QUOTE_HANDLER = "/identifier_quote";
+    static constexpr auto SCHEMA_ALLOWED_HANDLER = "/schema_allowed";
 
     XDBCBridgeHelper(
             ContextPtr context_,
@@ -256,7 +256,7 @@ protected:
 
 struct JDBCBridgeMixin
 {
-    static constexpr inline auto DEFAULT_PORT = 9019;
+    static constexpr auto DEFAULT_PORT = 9019;
 
     static String configPrefix()
     {
@@ -287,7 +287,7 @@ struct JDBCBridgeMixin
 
 struct ODBCBridgeMixin
 {
-    static constexpr inline auto DEFAULT_PORT = 9018;
+    static constexpr auto DEFAULT_PORT = 9018;
 
     static String configPrefix()
     {
diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h
index d7a714ec5af..b49f7706904 100644
--- a/src/Common/CPUID.h
+++ b/src/Common/CPUID.h
@@ -69,9 +69,9 @@ union CPUInfo
         UInt32 edx;
     } registers;
 
-    inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
+    explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
 
-    inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
+    CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
 };
 
 inline bool haveRDTSCP() noexcept
diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h
index f74a56292ae..0e013decf1f 100644
--- a/src/Common/ColumnsHashingImpl.h
+++ b/src/Common/ColumnsHashingImpl.h
@@ -453,7 +453,7 @@ protected:
     /// Return the columns which actually contain the values of the keys.
     /// For a given key column, if it is nullable, we return its nested
     /// column. Otherwise we return the key column itself.
-    inline const ColumnRawPtrs & getActualColumns() const
+    const ColumnRawPtrs & getActualColumns() const
     {
         return actual_columns;
     }
diff --git a/src/Common/CombinedCardinalityEstimator.h b/src/Common/CombinedCardinalityEstimator.h
index 0e53755d773..132f00de8eb 100644
--- a/src/Common/CombinedCardinalityEstimator.h
+++ b/src/Common/CombinedCardinalityEstimator.h
@@ -292,13 +292,13 @@ private:
     }
 
     template <typename T>
-    inline T & getContainer()
+    T & getContainer()
     {
         return *reinterpret_cast<T *>(address & mask);
     }
 
     template <typename T>
-    inline const T & getContainer() const
+    const T & getContainer() const
     {
         return *reinterpret_cast<T *>(address & mask);
     }
@@ -309,7 +309,7 @@ private:
         address |= static_cast<UInt8>(t);
     }
 
-    inline details::ContainerType getContainerType() const
+    details::ContainerType getContainerType() const
     {
         return static_cast<details::ContainerType>(address & ~mask);
     }
diff --git a/src/Common/CompactArray.h b/src/Common/CompactArray.h
index 613dc3d0b90..7b2bd658d2e 100644
--- a/src/Common/CompactArray.h
+++ b/src/Common/CompactArray.h
@@ -116,7 +116,7 @@ public:
 
     /** Return the current cell number and the corresponding content.
       */
-    inline std::pair<BucketIndex, UInt8> get() const
+    std::pair<BucketIndex, UInt8> get() const
     {
         if ((current_bucket_index == 0) || is_eof)
             throw Exception(ErrorCodes::NO_AVAILABLE_DATA, "No available data.");
diff --git a/src/Common/CounterInFile.h b/src/Common/CounterInFile.h
index 854bf7cc675..0a11e52be2c 100644
--- a/src/Common/CounterInFile.h
+++ b/src/Common/CounterInFile.h
@@ -37,7 +37,7 @@ namespace fs = std::filesystem;
 class CounterInFile
 {
 private:
-    static inline constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16;
+    static constexpr size_t SMALL_READ_WRITE_BUFFER_SIZE = 16;
 
 public:
     /// path - the name of the file, including the path
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index e2b627a7f29..8dade8c6fd5 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -62,9 +62,9 @@ public:
     static void updatePerformanceCountersIfNeeded();
 
     static ProfileEvents::Counters & getProfileEvents();
-    inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker()
+    static MemoryTracker * getMemoryTracker()
     {
-        if (unlikely(!current_thread))
+        if (!current_thread) [[unlikely]]
             return nullptr;
         return &current_thread->memory_tracker;
     }
diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 49675aaafbc..8f6ec1604ee 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -261,7 +261,7 @@ public:
             return true;
         }
 
-        inline const value_type & get() const
+        const value_type & get() const
         {
             if (!is_initialized || is_eof)
                 throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");
diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h
index 9050b7ef6d7..a600f57b06a 100644
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@@ -844,7 +844,7 @@ public:
             return true;
         }
 
-        inline const value_type & get() const
+        const value_type & get() const
         {
             if (!is_initialized || is_eof)
                 throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");
diff --git a/src/Common/HashTable/PackedHashMap.h b/src/Common/HashTable/PackedHashMap.h
index 0d25addb58e..72eb721b274 100644
--- a/src/Common/HashTable/PackedHashMap.h
+++ b/src/Common/HashTable/PackedHashMap.h
@@ -69,7 +69,7 @@ struct PackedHashMapCell : public HashMapCell<Key, TMapped, Hash, TState, Packed
     bool isZero(const State & state) const { return isZero(this->value.first, state); }
     static bool isZero(const Key key, const State & /*state*/) { return ZeroTraits::check(key); }
 
-    static inline bool bitEqualsByValue(key_type a, key_type b) { return a == b; }
+    static bool bitEqualsByValue(key_type a, key_type b) { return a == b; }
 
     template <size_t I>
     auto get() const
diff --git a/src/Common/HashTable/SmallTable.h b/src/Common/HashTable/SmallTable.h
index 3229e4748ea..63a6b932dd0 100644
--- a/src/Common/HashTable/SmallTable.h
+++ b/src/Common/HashTable/SmallTable.h
@@ -112,7 +112,7 @@ public:
             return true;
         }
 
-        inline const value_type & get() const
+        const value_type & get() const
         {
             if (!is_initialized || is_eof)
                 throw DB::Exception(DB::ErrorCodes::NO_AVAILABLE_DATA, "No available data");
diff --git a/src/Common/HyperLogLogCounter.h b/src/Common/HyperLogLogCounter.h
index bacd4cc7288..9b2b33dc918 100644
--- a/src/Common/HyperLogLogCounter.h
+++ b/src/Common/HyperLogLogCounter.h
@@ -128,13 +128,13 @@ public:
     {
     }
 
-    inline void update(UInt8 cur_rank, UInt8 new_rank)
+    void update(UInt8 cur_rank, UInt8 new_rank)
     {
         denominator -= static_cast<T>(1.0) / (1ULL << cur_rank);
         denominator += static_cast<T>(1.0) / (1ULL << new_rank);
     }
 
-    inline void update(UInt8 rank)
+    void update(UInt8 rank)
     {
         denominator += static_cast<T>(1.0) / (1ULL << rank);
     }
@@ -166,13 +166,13 @@ public:
         rank_count[0] = static_cast<UInt32>(initial_value);
     }
 
-    inline void update(UInt8 cur_rank, UInt8 new_rank)
+    void update(UInt8 cur_rank, UInt8 new_rank)
     {
         --rank_count[cur_rank];
         ++rank_count[new_rank];
     }
 
-    inline void update(UInt8 rank)
+    void update(UInt8 rank)
     {
         ++rank_count[rank];
     }
@@ -429,13 +429,13 @@ public:
 
 private:
     /// Extract subset of bits in [begin, end[ range.
-    inline HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const
+    HashValueType extractBitSequence(HashValueType val, UInt8 begin, UInt8 end) const
     {
         return (val >> begin) & ((1ULL << (end - begin)) - 1);
     }
 
     /// Rank is number of trailing zeros.
-    inline UInt8 calculateRank(HashValueType val) const
+    UInt8 calculateRank(HashValueType val) const
     {
         if (unlikely(val == 0))
             return max_rank;
@@ -448,7 +448,7 @@ private:
         return zeros_plus_one;
     }
 
-    inline HashValueType getHash(Value key) const
+    HashValueType getHash(Value key) const
     {
         /// NOTE: this should be OK, since value is the same as key for HLL.
         return static_cast<HashValueType>(
@@ -496,7 +496,7 @@ private:
             throw Poco::Exception("Internal error", DB::ErrorCodes::LOGICAL_ERROR);
     }
 
-    inline double applyCorrection(double raw_estimate) const
+    double applyCorrection(double raw_estimate) const
     {
         double fixed_estimate;
 
@@ -525,7 +525,7 @@ private:
     /// Correction used in HyperLogLog++ algorithm.
     /// Source: "HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm"
     /// (S. Heule et al., Proceedings of the EDBT 2013 Conference).
-    inline double applyBiasCorrection(double raw_estimate) const
+    double applyBiasCorrection(double raw_estimate) const
     {
         double fixed_estimate;
 
@@ -540,7 +540,7 @@ private:
     /// Calculation of unique values using LinearCounting algorithm.
     /// Source: "A Linear-time Probabilistic Counting Algorithm for Database Applications"
     /// (Whang et al., ACM Trans. Database Syst., pp. 208-229, 1990).
-    inline double applyLinearCorrection(double raw_estimate) const
+    double applyLinearCorrection(double raw_estimate) const
     {
         double fixed_estimate;
 
diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h
index fbd1de3197e..db7f5238921 100644
--- a/src/Common/IntervalTree.h
+++ b/src/Common/IntervalTree.h
@@ -23,7 +23,7 @@ struct Interval
 
     Interval(IntervalStorageType left_, IntervalStorageType right_) : left(left_), right(right_) { }
 
-    inline bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
+    bool contains(IntervalStorageType point) const { return left <= point && point <= right; }
 };
 
 template <typename IntervalStorageType>
@@ -290,7 +290,7 @@ private:
 
         IntervalStorageType middle_element;
 
-        inline bool hasValue() const { return sorted_intervals_range_size != 0; }
+        bool hasValue() const { return sorted_intervals_range_size != 0; }
     };
 
     using IntervalWithEmptyValue = Interval;
@@ -585,7 +585,7 @@ private:
         }
     }
 
-    inline size_t findFirstIteratorNodeIndex() const
+    size_t findFirstIteratorNodeIndex() const
     {
         size_t nodes_size = nodes.size();
         size_t result_index = 0;
@@ -602,7 +602,7 @@ private:
         return result_index;
     }
 
-    inline size_t findLastIteratorNodeIndex() const
+    size_t findLastIteratorNodeIndex() const
     {
         if (unlikely(nodes.empty()))
             return 0;
@@ -618,7 +618,7 @@ private:
         return result_index;
     }
 
-    inline void increaseIntervalsSize()
+    void increaseIntervalsSize()
     {
         /// Before tree is build we store all intervals size in our first node to allow tree iteration.
         ++intervals_size;
@@ -630,7 +630,7 @@ private:
     size_t intervals_size = 0;
     bool tree_is_built = false;
 
-    static inline const Interval & getInterval(const IntervalWithValue & interval_with_value)
+    static const Interval & getInterval(const IntervalWithValue & interval_with_value)
     {
         if constexpr (is_empty_value)
             return interval_with_value;
@@ -639,7 +639,7 @@ private:
     }
 
     template <typename IntervalCallback>
-    static inline bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
+    static bool callCallback(const IntervalWithValue & interval, IntervalCallback && callback)
     {
         if constexpr (is_empty_value)
             return callback(interval);
@@ -647,7 +647,7 @@ private:
             return callback(interval.first, interval.second);
     }
 
-    static inline void
+    static void
     intervalsToPoints(const std::vector<IntervalWithValue> & intervals, std::vector<IntervalStorageType> & temporary_points_storage)
     {
         for (const auto & interval_with_value : intervals)
@@ -658,7 +658,7 @@ private:
         }
     }
 
-    static inline IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
+    static IntervalStorageType pointsMedian(std::vector<IntervalStorageType> & points)
     {
         size_t size = points.size();
         size_t middle_element_index = size / 2;
diff --git a/src/Common/JSONParsers/SimdJSONParser.h b/src/Common/JSONParsers/SimdJSONParser.h
index a8594710d20..827d142266a 100644
--- a/src/Common/JSONParsers/SimdJSONParser.h
+++ b/src/Common/JSONParsers/SimdJSONParser.h
@@ -26,62 +26,62 @@ class SimdJSONBasicFormatter
 {
 public:
     explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
-    inline void comma() { oneChar(','); }
+    void comma() { oneChar(','); }
     /** Start an array, prints [ **/
-    inline void startArray() { oneChar('['); }
+    void startArray() { oneChar('['); }
     /** End an array, prints ] **/
-    inline void endArray() { oneChar(']'); }
+    void endArray() { oneChar(']'); }
     /** Start an array, prints { **/
-    inline void startObject() { oneChar('{'); }
+    void startObject() { oneChar('{'); }
     /** Start an array, prints } **/
-    inline void endObject() { oneChar('}'); }
+    void endObject() { oneChar('}'); }
     /** Prints a true **/
-    inline void trueAtom()
+    void trueAtom()
     {
         const char * s = "true";
         buffer.insert(s, s + 4);
     }
     /** Prints a false **/
-    inline void falseAtom()
+    void falseAtom()
     {
         const char * s = "false";
         buffer.insert(s, s + 5);
     }
     /** Prints a null **/
-    inline void nullAtom()
+    void nullAtom()
     {
         const char * s = "null";
         buffer.insert(s, s + 4);
     }
     /** Prints a number **/
-    inline void number(int64_t x)
+    void number(int64_t x)
     {
         char number_buffer[24];
         auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
         buffer.insert(number_buffer, res.ptr);
     }
     /** Prints a number **/
-    inline void number(uint64_t x)
+    void number(uint64_t x)
     {
         char number_buffer[24];
         auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
         buffer.insert(number_buffer, res.ptr);
     }
     /** Prints a number **/
-    inline void number(double x)
+    void number(double x)
     {
         char number_buffer[24];
         auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
         buffer.insert(number_buffer, res.ptr);
     }
     /** Prints a key (string + colon) **/
-    inline void key(std::string_view unescaped)
+    void key(std::string_view unescaped)
     {
         string(unescaped);
         oneChar(':');
     }
     /** Prints a string. The string is escaped as needed. **/
-    inline void string(std::string_view unescaped)
+    void string(std::string_view unescaped)
     {
         oneChar('\"');
         size_t i = 0;
@@ -165,7 +165,7 @@ public:
         oneChar('\"');
     }
 
-    inline void oneChar(char c)
+    void oneChar(char c)
     {
         buffer.push_back(c);
     }
@@ -182,7 +182,7 @@ class SimdJSONElementFormatter
 public:
     explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
     /** Append an element to the builder (to be printed) **/
-    inline void append(simdjson::dom::element value)
+    void append(simdjson::dom::element value)
     {
         switch (value.type())
         {
@@ -224,7 +224,7 @@ public:
         }
     }
     /** Append an array to the builder (to be printed) **/
-    inline void append(simdjson::dom::array value)
+    void append(simdjson::dom::array value)
     {
         format.startArray();
         auto iter = value.begin();
@@ -241,7 +241,7 @@ public:
         format.endArray();
     }
 
-    inline void append(simdjson::dom::object value)
+    void append(simdjson::dom::object value)
     {
         format.startObject();
         auto pair = value.begin();
@@ -258,7 +258,7 @@ public:
         format.endObject();
     }
 
-    inline void append(simdjson::dom::key_value_pair kv)
+    void append(simdjson::dom::key_value_pair kv)
     {
         format.key(kv.key);
         append(kv.value);
diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h
index b4069027ad1..ece5114a998 100644
--- a/src/Common/PODArray.h
+++ b/src/Common/PODArray.h
@@ -284,7 +284,7 @@ public:
     }
 
     template <typename It1, typename It2>
-    inline void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
+    void assertNotIntersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]])
     {
 #if !defined(NDEBUG)
         const char * ptr_begin = reinterpret_cast<const char *>(&*from_begin);
diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h
index d6fc1656eca..fb0c75e7c95 100644
--- a/src/Common/PoolBase.h
+++ b/src/Common/PoolBase.h
@@ -174,7 +174,7 @@ public:
             items.emplace_back(std::make_shared<PooledObject>(allocObject(), *this));
     }
 
-    inline size_t size()
+    size_t size()
     {
         std::lock_guard lock(mutex);
         return items.size();
diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h
index a30e19d8212..238321ec76e 100644
--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@@ -385,7 +385,7 @@ private:
      * PASS is counted from least significant (0), so the first pass is NUM_PASSES - 1.
      */
     template <size_t PASS>
-    static inline void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
+    static void radixSortMSDInternal(Element * arr, size_t size, size_t limit)
     {
         /// The beginning of every i-1-th bucket. 0th element will be equal to 1st.
         /// Last element will point to array end.
@@ -528,7 +528,7 @@ private:
 
     // A helper to choose sorting algorithm based on array length
     template <size_t PASS>
-    static inline void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit)
+    static void radixSortMSDInternalHelper(Element * arr, size_t size, size_t limit)
     {
         if (size <= INSERTION_SORT_THRESHOLD)
             insertionSortInternal(arr, size);
diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h
index 7a740ae6c9b..81ac4e71e8c 100644
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@@ -131,12 +131,12 @@ public:
 
     ~SpaceSaving() { destroyElements(); }
 
-    inline size_t size() const
+    size_t size() const
     {
         return counter_list.size();
     }
 
-    inline size_t capacity() const
+    size_t capacity() const
     {
         return m_capacity;
     }
diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h
index 26aeab08302..0af3ccb4c80 100644
--- a/src/Common/ThreadProfileEvents.h
+++ b/src/Common/ThreadProfileEvents.h
@@ -107,7 +107,7 @@ struct RUsageCounters
     }
 
 private:
-    static inline UInt64 getClockMonotonic()
+    static UInt64 getClockMonotonic()
     {
         struct timespec ts;
         if (0 != clock_gettime(CLOCK_MONOTONIC, &ts))
diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h
index 6513bdb8bc3..9c2852e4a10 100644
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@@ -54,16 +54,16 @@ namespace VolnitskyTraits
     /// min haystack size to use main algorithm instead of fallback
     static constexpr size_t min_haystack_size_for_algorithm = 20000;
 
-    static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
+    static bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
     {
         return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
             || (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
     }
 
-    static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
+    static Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
 
     template <typename Callback>
-    static inline bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
+    static bool putNGramASCIICaseInsensitive(const UInt8 * pos, int offset, Callback && putNGramBase)
     {
         struct Chars
         {
@@ -115,7 +115,7 @@ namespace VolnitskyTraits
     }
 
     template <typename Callback>
-    static inline bool putNGramUTF8CaseInsensitive(
+    static bool putNGramUTF8CaseInsensitive(
         const UInt8 * pos, int offset, const UInt8 * begin, size_t size, Callback && putNGramBase)
     {
         const UInt8 * end = begin + size;
@@ -349,7 +349,7 @@ namespace VolnitskyTraits
     }
 
     template <bool CaseSensitive, bool ASCII, typename Callback>
-    static inline bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
+    static bool putNGram(const UInt8 * pos, int offset, [[maybe_unused]] const UInt8 * begin, size_t size, Callback && putNGramBase)
     {
         if constexpr (CaseSensitive)
         {
@@ -580,7 +580,7 @@ public:
         return true;
     }
 
-    inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
+    bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
     {
         const size_t fallback_size = fallback_needles.size();
         for (size_t i = 0; i < fallback_size; ++i)
@@ -609,7 +609,7 @@ public:
         return false;
     }
 
-    inline size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
+    size_t searchOneFirstIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
     {
         const size_t fallback_size = fallback_needles.size();
 
@@ -647,7 +647,7 @@ public:
     }
 
     template <typename CountCharsCallback>
-    inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const
+    UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & count_chars) const
     {
         const size_t fallback_size = fallback_needles.size();
 
@@ -682,7 +682,7 @@ public:
     }
 
     template <typename CountCharsCallback, typename AnsType>
-    inline void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const
+    void searchOneAll(const UInt8 * haystack, const UInt8 * haystack_end, AnsType * answer, const CountCharsCallback & count_chars) const
     {
         const size_t fallback_size = fallback_needles.size();
         for (size_t i = 0; i < fallback_size; ++i)
diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h
index ec49c94808e..ddd30c4eef2 100644
--- a/src/Common/ZooKeeper/IKeeper.h
+++ b/src/Common/ZooKeeper/IKeeper.h
@@ -491,12 +491,12 @@ public:
         incrementErrorMetrics(code);
     }
 
-    inline static Exception createDeprecated(const std::string & msg, Error code_)
+    static Exception createDeprecated(const std::string & msg, Error code_)
     {
         return Exception(msg, code_, 0);
     }
 
-    inline static Exception fromPath(Error code_, const std::string & path)
+    static Exception fromPath(Error code_, const std::string & path)
     {
         return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path);
     }
@@ -504,7 +504,7 @@ public:
     /// Message must be a compile-time constant
     template <typename T>
     requires std::is_convertible_v<T, String>
-    inline static Exception fromMessage(Error code_, T && message)
+    static Exception fromMessage(Error code_, T && message)
     {
         return Exception(std::forward<T>(message), code_);
     }
diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp
index ce3bbb86d7c..a99b1f2dd3d 100644
--- a/src/Common/findExtreme.cpp
+++ b/src/Common/findExtreme.cpp
@@ -11,13 +11,13 @@ namespace DB
 template <has_find_extreme_implementation T>
 struct MinComparator
 {
-    static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); }
+    static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::min(a, b); }
 };
 
 template <has_find_extreme_implementation T>
 struct MaxComparator
 {
-    static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); }
+    static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::max(a, b); }
 };
 
 MULTITARGET_FUNCTION_AVX2_SSE42(
diff --git a/src/Core/Field.h b/src/Core/Field.h
index 4424d669c4d..73d3f4ec44e 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -855,13 +855,13 @@ template <> struct Field::EnumToType<Field::Types::AggregateFunctionState> { usi
 template <> struct Field::EnumToType<Field::Types::CustomType> { using Type = CustomType; };
 template <> struct Field::EnumToType<Field::Types::Bool> { using Type = UInt64; };
 
-inline constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t)
+constexpr bool isInt64OrUInt64FieldType(Field::Types::Which t)
 {
     return t == Field::Types::Int64
         || t == Field::Types::UInt64;
 }
 
-inline constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t)
+constexpr bool isInt64OrUInt64orBoolFieldType(Field::Types::Which t)
 {
     return t == Field::Types::Int64
         || t == Field::Types::UInt64
diff --git a/src/Core/Joins.h b/src/Core/Joins.h
index ccdd6eefab7..96d2b51325c 100644
--- a/src/Core/Joins.h
+++ b/src/Core/Joins.h
@@ -19,16 +19,16 @@ enum class JoinKind : uint8_t
 
 const char * toString(JoinKind kind);
 
-inline constexpr bool isLeft(JoinKind kind)         { return kind == JoinKind::Left; }
-inline constexpr bool isRight(JoinKind kind)        { return kind == JoinKind::Right; }
-inline constexpr bool isInner(JoinKind kind)        { return kind == JoinKind::Inner; }
-inline constexpr bool isFull(JoinKind kind)         { return kind == JoinKind::Full; }
-inline constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; }
-inline constexpr bool isRightOrFull(JoinKind kind)  { return kind == JoinKind::Right || kind == JoinKind::Full; }
-inline constexpr bool isLeftOrFull(JoinKind kind)   { return kind == JoinKind::Left  || kind == JoinKind::Full; }
-inline constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; }
-inline constexpr bool isInnerOrLeft(JoinKind kind)  { return kind == JoinKind::Inner || kind == JoinKind::Left; }
-inline constexpr bool isPaste(JoinKind kind)        { return kind == JoinKind::Paste; }
+constexpr bool isLeft(JoinKind kind)         { return kind == JoinKind::Left; }
+constexpr bool isRight(JoinKind kind)        { return kind == JoinKind::Right; }
+constexpr bool isInner(JoinKind kind)        { return kind == JoinKind::Inner; }
+constexpr bool isFull(JoinKind kind)         { return kind == JoinKind::Full; }
+constexpr bool isCrossOrComma(JoinKind kind) { return kind == JoinKind::Comma || kind == JoinKind::Cross; }
+constexpr bool isRightOrFull(JoinKind kind)  { return kind == JoinKind::Right || kind == JoinKind::Full; }
+constexpr bool isLeftOrFull(JoinKind kind)   { return kind == JoinKind::Left  || kind == JoinKind::Full; }
+constexpr bool isInnerOrRight(JoinKind kind) { return kind == JoinKind::Inner || kind == JoinKind::Right; }
+constexpr bool isInnerOrLeft(JoinKind kind)  { return kind == JoinKind::Inner || kind == JoinKind::Left; }
+constexpr bool isPaste(JoinKind kind)        { return kind == JoinKind::Paste; }
 
 /// Allows more optimal JOIN for typical cases.
 enum class JoinStrictness : uint8_t
@@ -66,7 +66,7 @@ enum class ASOFJoinInequality : uint8_t
 
 const char * toString(ASOFJoinInequality asof_join_inequality);
 
-inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name)
+constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_name)
 {
     ASOFJoinInequality inequality = ASOFJoinInequality::None;
 
@@ -82,7 +82,7 @@ inline constexpr ASOFJoinInequality getASOFJoinInequality(std::string_view func_
     return inequality;
 }
 
-inline constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality)
+constexpr ASOFJoinInequality reverseASOFJoinInequality(ASOFJoinInequality inequality)
 {
     if (inequality == ASOFJoinInequality::Less)
         return ASOFJoinInequality::Greater;
diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h
index a0f47c44460..3d34d404595 100644
--- a/src/Daemon/BaseDaemon.h
+++ b/src/Daemon/BaseDaemon.h
@@ -40,7 +40,7 @@ class BaseDaemon : public Poco::Util::ServerApplication, public Loggers
     friend class SignalListener;
 
 public:
-    static inline constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
+    static constexpr char DEFAULT_GRAPHITE_CONFIG_NAME[] = "graphite";
 
     BaseDaemon();
     ~BaseDaemon() override;
diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h
index 642d2de833f..997c554059b 100644
--- a/src/DataTypes/DataTypeDecimalBase.h
+++ b/src/DataTypes/DataTypeDecimalBase.h
@@ -147,7 +147,7 @@ public:
 
     static T getScaleMultiplier(UInt32 scale);
 
-    inline DecimalUtils::DataTypeDecimalTrait<T> getTrait() const
+    DecimalUtils::DataTypeDecimalTrait<T> getTrait() const
     {
         return {precision, scale};
     }
diff --git a/src/Dictionaries/CacheDictionaryStorage.h b/src/Dictionaries/CacheDictionaryStorage.h
index 01217c58e31..a960a916027 100644
--- a/src/Dictionaries/CacheDictionaryStorage.h
+++ b/src/Dictionaries/CacheDictionaryStorage.h
@@ -754,7 +754,7 @@ private:
 
     std::vector<Attribute> attributes;
 
-    inline void setCellDeadline(Cell & cell, TimePoint now)
+    void setCellDeadline(Cell & cell, TimePoint now)
     {
         if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
         {
@@ -774,7 +774,7 @@ private:
         cell.deadline = std::chrono::system_clock::to_time_t(deadline);
     }
 
-    inline size_t getCellIndex(const KeyType key) const
+    size_t getCellIndex(const KeyType key) const
     {
         const size_t hash = DefaultHash<KeyType>()(key);
         const size_t index = hash & size_overlap_mask;
@@ -783,7 +783,7 @@ private:
 
     using KeyStateAndCellIndex = std::pair<KeyState::State, size_t>;
 
-    inline KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
+    KeyStateAndCellIndex getKeyStateAndCellIndex(const KeyType key, const time_t now) const
     {
         size_t place_value = getCellIndex(key);
         const size_t place_value_end = place_value + max_collision_length;
@@ -810,7 +810,7 @@ private:
         return std::make_pair(KeyState::not_found, place_value & size_overlap_mask);
     }
 
-    inline size_t getCellIndexForInsert(const KeyType & key) const
+    size_t getCellIndexForInsert(const KeyType & key) const
     {
         size_t place_value = getCellIndex(key);
         const size_t place_value_end = place_value + max_collision_length;
diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h
index 8bf190d3edc..64fc05e99ab 100644
--- a/src/Dictionaries/DictionaryHelpers.h
+++ b/src/Dictionaries/DictionaryHelpers.h
@@ -44,7 +44,7 @@ public:
     {
     }
 
-    inline bool isConstant() const { return default_values_column == nullptr; }
+    bool isConstant() const { return default_values_column == nullptr; }
 
     Field getDefaultValue(size_t row) const
     {
@@ -450,17 +450,17 @@ public:
         keys_size = key_columns.front()->size();
     }
 
-    inline size_t getKeysSize() const
+    size_t getKeysSize() const
     {
         return keys_size;
     }
 
-    inline size_t getCurrentKeyIndex() const
+    size_t getCurrentKeyIndex() const
     {
         return current_key_index;
     }
 
-    inline KeyType extractCurrentKey()
+    KeyType extractCurrentKey()
     {
         assert(current_key_index < keys_size);
 
diff --git a/src/Dictionaries/Embedded/RegionsNames.h b/src/Dictionaries/Embedded/RegionsNames.h
index 0053c74745a..0e4c1fe8b88 100644
--- a/src/Dictionaries/Embedded/RegionsNames.h
+++ b/src/Dictionaries/Embedded/RegionsNames.h
@@ -48,14 +48,14 @@ public:
     };
 
 private:
-    static inline constexpr const char * languages[] =
+    static constexpr const char * languages[] =
     {
         #define M(NAME, FALLBACK, NUM) #NAME,
         FOR_EACH_LANGUAGE(M)
         #undef M
     };
 
-    static inline constexpr Language fallbacks[] =
+    static constexpr Language fallbacks[] =
     {
         #define M(NAME, FALLBACK, NUM) Language::FALLBACK,
         FOR_EACH_LANGUAGE(M)
diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h
index dcd7434946f..532154cd190 100644
--- a/src/Dictionaries/ICacheDictionaryStorage.h
+++ b/src/Dictionaries/ICacheDictionaryStorage.h
@@ -26,15 +26,15 @@ struct KeyState
         : state(state_)
     {}
 
-    inline bool isFound() const { return state == State::found; }
-    inline bool isExpired() const { return state == State::expired; }
-    inline bool isNotFound() const { return state == State::not_found; }
-    inline bool isDefault() const { return is_default; }
-    inline void setDefault() { is_default = true; }
-    inline void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
+    bool isFound() const { return state == State::found; }
+    bool isExpired() const { return state == State::expired; }
+    bool isNotFound() const { return state == State::not_found; }
+    bool isDefault() const { return is_default; }
+    void setDefault() { is_default = true; }
+    void setDefaultValue(bool is_default_value) { is_default = is_default_value; }
     /// Valid only if keyState is found or expired
-    inline size_t getFetchedColumnIndex() const { return fetched_column_index; }
-    inline void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
+    size_t getFetchedColumnIndex() const { return fetched_column_index; }
+    void setFetchedColumnIndex(size_t fetched_column_index_value) { fetched_column_index = fetched_column_index_value; }
 private:
     State state = not_found;
     size_t fetched_column_index = 0;
diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp
index 1bc6d16c932..a67118caaf8 100644
--- a/src/Dictionaries/IPAddressDictionary.cpp
+++ b/src/Dictionaries/IPAddressDictionary.cpp
@@ -66,7 +66,7 @@ namespace
             return buf;
         }
 
-        inline UInt8 prefixIPv6() const
+        UInt8 prefixIPv6() const
         {
             return isv6 ? prefix : prefix + 96;
         }
diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp
index 2e93a8e6001..ab999202e42 100644
--- a/src/Dictionaries/RegExpTreeDictionary.cpp
+++ b/src/Dictionaries/RegExpTreeDictionary.cpp
@@ -474,7 +474,7 @@ public:
     }
 
     // Checks if no more values can be added for a given attribute
-    inline bool full(const String & attr_name, std::unordered_set<String> * const defaults = nullptr) const
+    bool full(const String & attr_name, std::unordered_set<String> * const defaults = nullptr) const
     {
         if (collect_values_limit)
         {
@@ -490,7 +490,7 @@ public:
     }
 
     // Returns the number of full attributes
-    inline size_t attributesFull() const { return n_full_attributes; }
+    size_t attributesFull() const { return n_full_attributes; }
 };
 
 std::pair<String, bool> processBackRefs(const String & data, const re2::RE2 & searcher, const std::vector<StringPiece> & pieces)
diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h
index e3eea71cd9a..cb0ade9b899 100644
--- a/src/Dictionaries/SSDCacheDictionaryStorage.h
+++ b/src/Dictionaries/SSDCacheDictionaryStorage.h
@@ -134,7 +134,7 @@ public:
 
     /// Reset block with new block_data
     /// block_data must be filled with zeroes if it is new block
-    inline void reset(char * new_block_data)
+    void reset(char * new_block_data)
     {
         block_data = new_block_data;
         current_block_offset = block_header_size;
@@ -142,13 +142,13 @@ public:
     }
 
     /// Check if it is enough place to write key in block
-    inline bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
+    bool enoughtPlaceToWriteKey(const SSDCacheSimpleKey & cache_key) const
     {
         return (current_block_offset + (sizeof(cache_key.key) + sizeof(cache_key.size) + cache_key.size)) <= block_size;
     }
 
     /// Check if it is enough place to write key in block
-    inline bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
+    bool enoughtPlaceToWriteKey(const SSDCacheComplexKey & cache_key) const
     {
         const StringRef & key = cache_key.key;
         size_t complex_key_size = sizeof(key.size) + key.size;
@@ -159,7 +159,7 @@ public:
     /// Write key and returns offset in ssd cache block where data is written
     /// It is client responsibility to check if there is enough place in block to write key
     /// Returns true if key was written and false if there was not enough place to write key
-    inline bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
+    bool writeKey(const SSDCacheSimpleKey & cache_key, size_t & offset_in_block)
     {
         assert(cache_key.size > 0);
 
@@ -188,7 +188,7 @@ public:
         return true;
     }
 
-    inline bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
+    bool writeKey(const SSDCacheComplexKey & cache_key, size_t & offset_in_block)
     {
         assert(cache_key.size > 0);
 
@@ -223,20 +223,20 @@ public:
         return true;
     }
 
-    inline size_t getKeysSize() const { return keys_size; }
+    size_t getKeysSize() const { return keys_size; }
 
     /// Write keys size into block header
-    inline void writeKeysSize()
+    void writeKeysSize()
     {
         char * keys_size_offset_data = block_data + block_header_check_sum_size;
         std::memcpy(keys_size_offset_data, &keys_size, sizeof(size_t));
     }
 
     /// Get check sum from block header
-    inline size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
+    size_t getCheckSum() const { return unalignedLoad<size_t>(block_data); }
 
     /// Calculate check sum in block
-    inline size_t calculateCheckSum() const
+    size_t calculateCheckSum() const
     {
         size_t calculated_check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
 
@@ -244,7 +244,7 @@ public:
     }
 
     /// Check if check sum from block header matched calculated check sum in block
-    inline bool checkCheckSum() const
+    bool checkCheckSum() const
     {
         size_t calculated_check_sum = calculateCheckSum();
         size_t check_sum = getCheckSum();
@@ -253,16 +253,16 @@ public:
     }
 
     /// Write check sum in block header
-    inline void writeCheckSum()
+    void writeCheckSum()
     {
         size_t check_sum = static_cast<size_t>(CityHash_v1_0_2::CityHash64(block_data + block_header_check_sum_size, block_size - block_header_check_sum_size));
         std::memcpy(block_data, &check_sum, sizeof(size_t));
     }
 
-    inline size_t getBlockSize() const { return block_size; }
+    size_t getBlockSize() const { return block_size; }
 
     /// Returns block data
-    inline char * getBlockData() const { return block_data; }
+    char * getBlockData() const { return block_data; }
 
     /// Read keys that were serialized in block
     /// It is client responsibility to ensure that simple or complex keys were written in block
@@ -405,16 +405,16 @@ public:
         current_write_block.writeCheckSum();
     }
 
-    inline char * getPlace(SSDCacheIndex index) const
+    char * getPlace(SSDCacheIndex index) const
     {
         return buffer.m_data + index.block_index * block_size + index.offset_in_block;
     }
 
-    inline size_t getCurrentBlockIndex() const { return current_block_index; }
+    size_t getCurrentBlockIndex() const { return current_block_index; }
 
-    inline const char * getData() const { return buffer.m_data; }
+    const char * getData() const { return buffer.m_data; }
 
-    inline size_t getSizeInBytes() const { return block_size * partition_blocks_size; }
+    size_t getSizeInBytes() const { return block_size * partition_blocks_size; }
 
     void readKeys(PaddedPODArray<KeyType> & keys) const
     {
@@ -431,7 +431,7 @@ public:
         }
     }
 
-    inline void reset()
+    void reset()
     {
         current_block_index = 0;
         current_write_block.reset(buffer.m_data);
@@ -751,9 +751,9 @@ public:
         }
     }
 
-    inline size_t getCurrentBlockIndex() const { return current_block_index; }
+    size_t getCurrentBlockIndex() const { return current_block_index; }
 
-    inline void reset()
+    void reset()
     {
         current_block_index = 0;
     }
@@ -789,7 +789,7 @@ private:
         int fd = -1;
     };
 
-    inline static int preallocateDiskSpace(int fd, size_t offset, size_t len)
+    static int preallocateDiskSpace(int fd, size_t offset, size_t len)
     {
         #if defined(OS_FREEBSD)
             return posix_fallocate(fd, offset, len);
@@ -798,7 +798,7 @@ private:
         #endif
     }
 
-    inline static char * getRequestBuffer(const iocb & request)
+    static char * getRequestBuffer(const iocb & request)
     {
         char * result = nullptr;
 
@@ -811,7 +811,7 @@ private:
         return result;
     }
 
-    inline static ssize_t eventResult(io_event & event)
+    static ssize_t eventResult(io_event & event)
     {
         ssize_t  bytes_written;
 
@@ -986,9 +986,9 @@ private:
         size_t in_memory_partition_index;
         CellState state;
 
-        inline bool isInMemory() const { return state == in_memory; }
-        inline bool isOnDisk() const { return state == on_disk; }
-        inline bool isDefaultValue() const { return state == default_value; }
+        bool isInMemory() const { return state == in_memory; }
+        bool isOnDisk() const { return state == on_disk; }
+        bool isDefaultValue() const { return state == default_value; }
     };
 
     struct KeyToBlockOffset
@@ -1367,7 +1367,7 @@ private:
         }
     }
 
-    inline void setCellDeadline(Cell & cell, TimePoint now)
+    void setCellDeadline(Cell & cell, TimePoint now)
     {
         if (configuration.lifetime.min_sec == 0 && configuration.lifetime.max_sec == 0)
         {
@@ -1384,7 +1384,7 @@ private:
         cell.deadline = std::chrono::system_clock::to_time_t(deadline);
     }
 
-    inline void eraseKeyFromIndex(KeyType key)
+    void eraseKeyFromIndex(KeyType key)
     {
         auto it = index.find(key);
 
diff --git a/src/Disks/IO/IOUringReader.h b/src/Disks/IO/IOUringReader.h
index 89e71e4b215..359b3badc45 100644
--- a/src/Disks/IO/IOUringReader.h
+++ b/src/Disks/IO/IOUringReader.h
@@ -61,12 +61,12 @@ private:
 
     void monitorRing();
 
-    template<typename T> inline void failPromise(std::promise<T> & promise, const Exception & ex)
+    template<typename T> void failPromise(std::promise<T> & promise, const Exception & ex)
     {
         promise.set_exception(std::make_exception_ptr(ex));
     }
 
-    inline std::future<Result> makeFailedResult(const Exception & ex)
+    std::future<Result> makeFailedResult(const Exception & ex)
     {
         auto promise = std::promise<Result>{};
         failPromise(promise, ex);
diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h
index ff07309e248..7fd5b7476e1 100644
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@@ -68,7 +68,7 @@ struct DivideIntegralImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         using CastA = std::conditional_t<is_big_int_v<B> && std::is_same_v<A, UInt8>, uint8_t, A>;
         using CastB = std::conditional_t<is_big_int_v<A> && std::is_same_v<B, UInt8>, uint8_t, B>;
@@ -120,7 +120,7 @@ struct ModuloImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         if constexpr (std::is_floating_point_v<ResultType>)
         {
@@ -175,7 +175,7 @@ struct PositiveModuloImpl : ModuloImpl<A, B>
     using ResultType = typename NumberTraits::ResultOfPositiveModulo<A, B>::Type;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         auto res = ModuloImpl<A, B>::template apply<OriginResultType>(a, b);
         if constexpr (is_signed_v<A>)
diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h
index aa0e1b04835..5b8fa41958a 100644
--- a/src/Functions/ExtractString.h
+++ b/src/Functions/ExtractString.h
@@ -20,7 +20,7 @@ namespace DB
 // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word
 struct ExtractStringImpl
 {
-    static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
+    static ALWAYS_INLINE const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
     {
         // jump separators
         while (pos < end && isUTF8Sep(*pos))
@@ -35,10 +35,10 @@ struct ExtractStringImpl
     }
 
     // we use ASCII non-alphanum character as UTF8 separator
-    static ALWAYS_INLINE inline bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); }
+    static ALWAYS_INLINE bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); }
 
     // read one UTF8 character
-    static ALWAYS_INLINE inline void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end)
+    static ALWAYS_INLINE void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end)
     {
         size_t length = UTF8::seqLength(*pos);
 
diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h
index 6203999fa37..5d19ba44d9b 100644
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@@ -284,7 +284,7 @@ struct BinaryOperation
 
 private:
     template <OpCase op_case>
-    static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
+    static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
     {
         if constexpr (op_case == OpCase::Vector)
             c[i] = Op::template apply<ResultType>(a[i], b[i]);
@@ -432,7 +432,7 @@ template <typename Op>
 struct FixedStringReduceOperationImpl
 {
     template <OpCase op_case>
-    static void inline process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N)
+    static void process(const UInt8 * __restrict a, const UInt8 * __restrict b, UInt16 * __restrict result, size_t size, size_t N)
     {
         if constexpr (op_case == OpCase::Vector)
             vectorVector(a, b, result, size, N);
@@ -503,7 +503,7 @@ struct StringReduceOperationImpl
         }
     }
 
-    static inline UInt64 constConst(std::string_view a, std::string_view b)
+    static UInt64 constConst(std::string_view a, std::string_view b)
     {
         return process(
             reinterpret_cast<const UInt8 *>(a.data()),
@@ -643,7 +643,7 @@ public:
 
 private:
     template <OpCase op_case, typename ApplyFunc>
-    static inline void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func)
+    static void processWithRightNullmapImpl(const auto & a, const auto & b, ResultContainerType & c, size_t size, const NullMap * right_nullmap, ApplyFunc apply_func)
     {
         if (right_nullmap)
         {
diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h
index 37db514fd1f..83ed874c47b 100644
--- a/src/Functions/FunctionSQLJSON.h
+++ b/src/Functions/FunctionSQLJSON.h
@@ -44,27 +44,27 @@ class DefaultJSONStringSerializer
 public:
     explicit DefaultJSONStringSerializer(ColumnString & col_str_) : col_str(col_str_) { }
 
-    inline void addRawData(const char * ptr, size_t len)
+    void addRawData(const char * ptr, size_t len)
     {
         out << std::string_view(ptr, len);
     }
 
-    inline void addRawString(std::string_view str)
+    void addRawString(std::string_view str)
     {
         out << str;
     }
 
     /// serialize the json element into stringstream
-    inline void addElement(const Element & element)
+    void addElement(const Element & element)
     {
         out << element.getElement();
     }
-    inline void commit()
+    void commit()
     {
         auto out_str = out.str();
         col_str.insertData(out_str.data(), out_str.size());
     }
-    inline void rollback() {}
+    void rollback() {}
 private:
     ColumnString & col_str;
     std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
@@ -82,27 +82,27 @@ public:
         prev_offset = offsets.empty() ? 0 : offsets.back();
     }
     /// Put the data into column's buffer directly.
-    inline void addRawData(const char * ptr, size_t len)
+    void addRawData(const char * ptr, size_t len)
     {
         chars.insert(ptr, ptr + len);
     }
 
-    inline void addRawString(std::string_view str)
+    void addRawString(std::string_view str)
     {
         chars.insert(str.data(), str.data() + str.size());
     }
 
     /// serialize the json element into column's buffer directly
-    inline void addElement(const Element & element)
+    void addElement(const Element & element)
     {
         formatter.append(element.getElement());
     }
-    inline void commit()
+    void commit()
     {
         chars.push_back(0);
         offsets.push_back(chars.size());
     }
-    inline void rollback()
+    void rollback()
     {
         chars.resize(prev_offset);
     }
diff --git a/src/Functions/FunctionsAES.h b/src/Functions/FunctionsAES.h
index 14745460658..524b4f82acd 100644
--- a/src/Functions/FunctionsAES.h
+++ b/src/Functions/FunctionsAES.h
@@ -59,7 +59,7 @@ enum class CipherMode : uint8_t
 template <CipherMode mode>
 struct KeyHolder
 {
-    inline StringRef setKey(size_t cipher_key_size, StringRef key) const
+    StringRef setKey(size_t cipher_key_size, StringRef key) const
     {
         if (key.size != cipher_key_size)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size);
@@ -71,7 +71,7 @@ struct KeyHolder
 template <>
 struct KeyHolder<CipherMode::MySQLCompatibility>
 {
-    inline StringRef setKey(size_t cipher_key_size, StringRef key)
+    StringRef setKey(size_t cipher_key_size, StringRef key)
     {
         if (key.size < cipher_key_size)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid key size: {} expected {}", key.size, cipher_key_size);
diff --git a/src/Functions/FunctionsBitToArray.cpp b/src/Functions/FunctionsBitToArray.cpp
index 566ce16d1a7..adabda1a7f8 100644
--- a/src/Functions/FunctionsBitToArray.cpp
+++ b/src/Functions/FunctionsBitToArray.cpp
@@ -79,7 +79,7 @@ public:
 
 private:
     template <typename T>
-    inline static void writeBitmask(T x, WriteBuffer & out)
+    static void writeBitmask(T x, WriteBuffer & out)
     {
         using UnsignedT = make_unsigned_t<T>;
         UnsignedT u_x = x;
diff --git a/src/Functions/FunctionsCodingIP.cpp b/src/Functions/FunctionsCodingIP.cpp
index 54f7b6dd1f4..e01967274f4 100644
--- a/src/Functions/FunctionsCodingIP.cpp
+++ b/src/Functions/FunctionsCodingIP.cpp
@@ -785,7 +785,7 @@ private:
 
 #include <emmintrin.h>
 
-    static inline void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep)
+    static void applyCIDRMask(const char * __restrict src, char * __restrict dst_lower, char * __restrict dst_upper, UInt8 bits_to_keep)
     {
         __m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(getCIDRMaskIPv6(bits_to_keep).data()));
         __m128i lower = _mm_and_si128(_mm_loadu_si128(reinterpret_cast<const __m128i *>(src)), mask);
@@ -916,7 +916,7 @@ public:
 class FunctionIPv4CIDRToRange : public IFunction
 {
 private:
-    static inline std::pair<UInt32, UInt32> applyCIDRMask(UInt32 src, UInt8 bits_to_keep)
+    static std::pair<UInt32, UInt32> applyCIDRMask(UInt32 src, UInt8 bits_to_keep)
     {
         if (bits_to_keep >= 8 * sizeof(UInt32))
             return { src, src };
diff --git a/src/Functions/FunctionsConsistentHashing.h b/src/Functions/FunctionsConsistentHashing.h
index 6f2eec5be98..306b6395dc5 100644
--- a/src/Functions/FunctionsConsistentHashing.h
+++ b/src/Functions/FunctionsConsistentHashing.h
@@ -83,7 +83,7 @@ private:
     using BucketsType = typename Impl::BucketsType;
 
     template <typename T>
-    inline BucketsType checkBucketsRange(T buckets) const
+    BucketsType checkBucketsRange(T buckets) const
     {
         if (unlikely(buckets <= 0))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "The second argument of function {} (number of buckets) must be positive number", getName());
diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp
index 55485d41ce0..94391606762 100644
--- a/src/Functions/FunctionsLanguageClassification.cpp
+++ b/src/Functions/FunctionsLanguageClassification.cpp
@@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED;
 
 struct FunctionDetectLanguageImpl
 {
-    static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string)
+    static ALWAYS_INLINE std::string_view codeISO(std::string_view code_string)
     {
         if (code_string.ends_with("-Latn"))
             code_string.remove_suffix(code_string.size() - 5);
diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp
index 7e7ae76d6eb..2f5ce6deebf 100644
--- a/src/Functions/FunctionsLogical.cpp
+++ b/src/Functions/FunctionsLogical.cpp
@@ -170,7 +170,7 @@ public:
         : vec(in[in.size() - N]->getData()), next(in) {}
 
     /// Returns a combination of values in the i-th row of all columns stored in the constructor.
-    inline ResultValueType apply(const size_t i) const
+    ResultValueType apply(const size_t i) const
     {
         const auto a = !!vec[i];
         return Op::apply(a, next.apply(i));
@@ -190,7 +190,7 @@ public:
     explicit AssociativeApplierImpl(const UInt8ColumnPtrs & in)
         : vec(in[in.size() - 1]->getData()) {}
 
-    inline ResultValueType apply(const size_t i) const { return !!vec[i]; }
+    ResultValueType apply(const size_t i) const { return !!vec[i]; }
 
 private:
     const UInt8Container & vec;
@@ -291,7 +291,7 @@ public:
         }
 
     /// Returns a combination of values in the i-th row of all columns stored in the constructor.
-    inline ResultValueType apply(const size_t i) const
+    ResultValueType apply(const size_t i) const
     {
         return Op::ternaryApply(vec[i], next.apply(i));
     }
@@ -315,7 +315,7 @@ public:
             TernaryValueBuilder::build(in[in.size() - 1], vec.data());
         }
 
-    inline ResultValueType apply(const size_t i) const { return vec[i]; }
+    ResultValueType apply(const size_t i) const { return vec[i]; }
 
 private:
     UInt8Container vec;
diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h
index 41464329f79..3c2eb3ee0b8 100644
--- a/src/Functions/FunctionsLogical.h
+++ b/src/Functions/FunctionsLogical.h
@@ -84,47 +84,47 @@ struct AndImpl
 {
     using ResultType = UInt8;
 
-    static inline constexpr bool isSaturable() { return true; }
+    static constexpr bool isSaturable() { return true; }
 
     /// Final value in two-valued logic (no further operations with True, False will change this value)
-    static inline constexpr bool isSaturatedValue(bool a) { return !a; }
+    static constexpr bool isSaturatedValue(bool a) { return !a; }
 
     /// Final value in three-valued logic (no further operations with True, False, Null will change this value)
-    static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; }
+    static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::False; }
 
-    static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; }
+    static constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; }
 
-    static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); }
+    static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::min(a, b); }
 
     /// Will use three-valued logic for NULLs (see above) or default implementation (any operation with NULL returns NULL).
-    static inline constexpr bool specialImplementationForNulls() { return true; }
+    static constexpr bool specialImplementationForNulls() { return true; }
 };
 
 struct OrImpl
 {
     using ResultType = UInt8;
 
-    static inline constexpr bool isSaturable() { return true; }
-    static inline constexpr bool isSaturatedValue(bool a) { return a; }
-    static inline constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; }
-    static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; }
-    static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); }
-    static inline constexpr bool specialImplementationForNulls() { return true; }
+    static constexpr bool isSaturable() { return true; }
+    static constexpr bool isSaturatedValue(bool a) { return a; }
+    static constexpr bool isSaturatedValueTernary(UInt8 a) { return a == Ternary::True; }
+    static constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; }
+    static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return std::max(a, b); }
+    static constexpr bool specialImplementationForNulls() { return true; }
 };
 
 struct XorImpl
 {
     using ResultType = UInt8;
 
-    static inline constexpr bool isSaturable() { return false; }
-    static inline constexpr bool isSaturatedValue(bool) { return false; }
-    static inline constexpr bool isSaturatedValueTernary(UInt8) { return false; }
-    static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; }
-    static inline constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; }
-    static inline constexpr bool specialImplementationForNulls() { return false; }
+    static constexpr bool isSaturable() { return false; }
+    static constexpr bool isSaturatedValue(bool) { return false; }
+    static constexpr bool isSaturatedValueTernary(UInt8) { return false; }
+    static constexpr ResultType apply(UInt8 a, UInt8 b) { return a != b; }
+    static constexpr ResultType ternaryApply(UInt8 a, UInt8 b) { return a != b; }
+    static constexpr bool specialImplementationForNulls() { return false; }
 
 #if USE_EMBEDDED_COMPILER
-    static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b)
+    static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a, llvm::Value * b)
     {
         return builder.CreateXor(a, b);
     }
@@ -136,13 +136,13 @@ struct NotImpl
 {
     using ResultType = UInt8;
 
-    static inline ResultType apply(A a)
+    static ResultType apply(A a)
     {
         return !static_cast<bool>(a);
     }
 
 #if USE_EMBEDDED_COMPILER
-    static inline llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a)
+    static llvm::Value * apply(llvm::IRBuilder<> & builder, llvm::Value * a)
     {
         return builder.CreateNot(a);
     }
diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp
index a93e1d9a87d..8e9eff50aab 100644
--- a/src/Functions/FunctionsProgrammingClassification.cpp
+++ b/src/Functions/FunctionsProgrammingClassification.cpp
@@ -21,7 +21,7 @@ namespace DB
 struct FunctionDetectProgrammingLanguageImpl
 {
     /// Calculate total weight
-    static ALWAYS_INLINE inline Float64 stateMachine(
+    static ALWAYS_INLINE Float64 stateMachine(
         const FrequencyHolder::Map & standard,
         const std::unordered_map<String, Float64> & model)
     {
diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index 99f3a14dfec..1f20fbff24e 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -296,7 +296,7 @@ class FloatRoundingComputation : public BaseFloatRoundingComputation<T>
     using Base = BaseFloatRoundingComputation<T>;
 
 public:
-    static inline void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out)
+    static void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out)
     {
         auto val = Base::load(in);
 
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index 0bf6e39e651..cd33564caf9 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -99,7 +99,7 @@ struct Hash
     }
 
     template <bool CaseInsensitive>
-    static ALWAYS_INLINE inline UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size)
+    static ALWAYS_INLINE UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size)
     {
         if (size & 1)
         {
@@ -153,7 +153,7 @@ struct Hash
     }
 
     template <bool CaseInsensitive>
-    static ALWAYS_INLINE inline UInt64 shingleHash(const std::vector<BytesRef> & shingle, size_t offset = 0)
+    static ALWAYS_INLINE UInt64 shingleHash(const std::vector<BytesRef> & shingle, size_t offset = 0)
     {
         UInt64 crc = -1ULL;
 
@@ -177,14 +177,14 @@ struct SimHashImpl
     static constexpr size_t min_word_size = 4;
 
     /// Update fingerprint according to hash_value bits.
-    static ALWAYS_INLINE inline void updateFingerVector(Int64 * finger_vec, UInt64 hash_value)
+    static ALWAYS_INLINE void updateFingerVector(Int64 * finger_vec, UInt64 hash_value)
     {
         for (size_t i = 0; i < 64; ++i)
             finger_vec[i] += (hash_value & (1ULL << i)) ? 1 : -1;
     }
 
     /// Return a 64 bit value according to finger_vec.
-    static ALWAYS_INLINE inline UInt64 getSimHash(const Int64 * finger_vec)
+    static ALWAYS_INLINE UInt64 getSimHash(const Int64 * finger_vec)
     {
         UInt64 res = 0;
 
@@ -200,7 +200,7 @@ struct SimHashImpl
     // for each ngram, calculate a 64 bit hash value, and update the vector according the hash value
     // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0
 
-    static ALWAYS_INLINE inline UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size)
     {
         if (size < shingle_size)
             return Hash::shingleHash<CaseInsensitive>(-1ULL, data, size);
@@ -217,7 +217,7 @@ struct SimHashImpl
         return getSimHash(finger_vec);
     }
 
-    static ALWAYS_INLINE inline UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size)
     {
         const UInt8 * start = data;
         const UInt8 * end = data + size;
@@ -259,7 +259,7 @@ struct SimHashImpl
     // 2. next, we extract one word each time, and calculate a new hash value of the new word,then use the latest N hash
     // values to calculate the next word shingle hash value
 
-    static ALWAYS_INLINE inline UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size)
     {
         const UInt8 * start = data;
         const UInt8 * end = data + size;
@@ -400,7 +400,7 @@ struct MinHashImpl
     using MaxHeap = Heap<std::less<>>;
     using MinHeap = Heap<std::greater<>>;
 
-    static ALWAYS_INLINE inline void ngramHashASCII(
+    static ALWAYS_INLINE void ngramHashASCII(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
@@ -429,7 +429,7 @@ struct MinHashImpl
         }
     }
 
-    static ALWAYS_INLINE inline void ngramHashUTF8(
+    static ALWAYS_INLINE void ngramHashUTF8(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
@@ -472,7 +472,7 @@ struct MinHashImpl
     // MinHash word shingle hash value calculate function: String ->Tuple(UInt64, UInt64)
     // for each word shingle, we calculate a hash value, but in fact, we just maintain the
     // K minimum and K maximum hash value
-    static ALWAYS_INLINE inline void wordShingleHash(
+    static ALWAYS_INLINE void wordShingleHash(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index aadf5c246fc..5224c76d7b0 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -85,7 +85,7 @@ struct NgramDistanceImpl
     }
 
     template <size_t Offset, class Container, size_t... I>
-    static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence<I...> &)
+    static ALWAYS_INLINE void unrollLowering(Container & cont, const std::index_sequence<I...> &)
     {
         ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
     }
@@ -195,7 +195,7 @@ struct NgramDistanceImpl
     }
 
     template <bool save_ngrams>
-    static ALWAYS_INLINE inline size_t calculateNeedleStats(
+    static ALWAYS_INLINE size_t calculateNeedleStats(
         const char * data,
         const size_t size,
         NgramCount * ngram_stats,
@@ -228,7 +228,7 @@ struct NgramDistanceImpl
     }
 
     template <bool reuse_stats>
-    static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
+    static ALWAYS_INLINE UInt64 calculateHaystackStatsAndMetric(
         const char * data,
         const size_t size,
         NgramCount * ngram_stats,
@@ -275,7 +275,7 @@ struct NgramDistanceImpl
     }
 
     template <class Callback, class... Args>
-    static inline auto dispatchSearcher(Callback callback, Args &&... args)
+    static auto dispatchSearcher(Callback callback, Args &&... args)
     {
         if constexpr (!UTF8)
             return callback(std::forward<Args>(args)..., readASCIICodePoints, calculateASCIIHash);
diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h
index 6183d25c8bd..7522bd374a2 100644
--- a/src/Functions/FunctionsTimeWindow.h
+++ b/src/Functions/FunctionsTimeWindow.h
@@ -97,7 +97,7 @@ template<> \
     template <> \
     struct AddTime<IntervalKind::Kind::INTERVAL_KIND> \
     { \
-        static inline auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \
+        static auto execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) \
         { \
             return time_zone.add##INTERVAL_KIND##s(ExtendedDayNum(d), delta); \
         } \
@@ -110,7 +110,7 @@ template<> \
     template <>
     struct AddTime<IntervalKind::Kind::Week>
     {
-        static inline NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &)
+        static NO_SANITIZE_UNDEFINED ExtendedDayNum execute(UInt16 d, UInt64 delta, const DateLUTImpl &)
         {
             return ExtendedDayNum(static_cast<Int32>(d + delta * 7));
         }
@@ -120,7 +120,7 @@ template<> \
     template <> \
     struct AddTime<IntervalKind::Kind::INTERVAL_KIND> \
     { \
-        static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \
+        static NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) \
         { return static_cast<UInt32>(t + delta * (INTERVAL)); } \
     };
     ADD_TIME(Day, 86400)
@@ -133,7 +133,7 @@ template<> \
 template <> \
     struct AddTime<IntervalKind::Kind::INTERVAL_KIND> \
     { \
-        static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
+        static NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
         { \
             if (scale < (DEF_SCALE)) \
             { \
diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp
index 3de38d99c88..a8cc09186f6 100644
--- a/src/Functions/FunctionsTonalityClassification.cpp
+++ b/src/Functions/FunctionsTonalityClassification.cpp
@@ -18,7 +18,7 @@ namespace DB
   */
 struct FunctionDetectTonalityImpl
 {
-    static ALWAYS_INLINE inline Float32 detectTonality(
+    static ALWAYS_INLINE Float32 detectTonality(
         const UInt8 * str,
         const size_t str_len,
         const FrequencyHolder::Map & emotional_dict)
diff --git a/src/Functions/GCDLCMImpl.h b/src/Functions/GCDLCMImpl.h
index df531363c31..094c248497b 100644
--- a/src/Functions/GCDLCMImpl.h
+++ b/src/Functions/GCDLCMImpl.h
@@ -26,7 +26,7 @@ struct GCDLCMImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
         throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp
index eb7ef4abe56..91861e8bbd2 100644
--- a/src/Functions/GregorianDate.cpp
+++ b/src/Functions/GregorianDate.cpp
@@ -20,12 +20,12 @@ namespace ErrorCodes
 
 namespace
 {
-    inline constexpr bool is_leap_year(int32_t year)
+    constexpr bool is_leap_year(int32_t year)
     {
         return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
     }
 
-    inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
+    constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
     {
         switch (month)
         {
@@ -49,7 +49,7 @@ namespace
     /** Integer division truncated toward negative infinity.
       */
     template <typename I, typename J>
-    inline constexpr I div(I x, J y)
+    constexpr I div(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         if (x > 0 && y_cast < 0)
@@ -63,7 +63,7 @@ namespace
     /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
       */
     template <typename I, typename J>
-    inline constexpr I mod(I x, J y)
+    constexpr I mod(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         const auto r = x % y_cast;
@@ -76,7 +76,7 @@ namespace
     /** Like std::min(), but the type of operands may differ.
       */
     template <typename I, typename J>
-    inline constexpr I min(I x, J y)
+    constexpr I min(I x, J y)
     {
         const auto y_cast = static_cast<I>(y);
         return x < y_cast ? x : y_cast;
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index c4851718da6..0c57fd7f0b5 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -124,7 +124,7 @@ public:
 
     bool hasEmptyBound() const { return has_empty_bound; }
 
-    inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
+    bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
     {
         Point point(x, y);
 
diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h
index 896e9d8ca48..b52ccd3cce0 100644
--- a/src/Functions/TransformDateTime64.h
+++ b/src/Functions/TransformDateTime64.h
@@ -53,7 +53,7 @@ public:
     {}
 
     template <typename ... Args>
-    inline auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
+    auto NO_SANITIZE_UNDEFINED execute(const DateTime64 & t, Args && ... args) const
     {
         /// Type conversion from float to integer may be required.
         /// We are Ok with implementation specific result for out of range and denormals conversion.
@@ -90,14 +90,14 @@ public:
 
     template <typename T, typename... Args>
     requires(!std::same_as<T, DateTime64>)
-    inline auto execute(const T & t, Args &&... args) const
+    auto execute(const T & t, Args &&... args) const
     {
         return wrapped_transform.execute(t, std::forward<Args>(args)...);
     }
 
 
     template <typename ... Args>
-    inline auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const
+    auto NO_SANITIZE_UNDEFINED executeExtendedResult(const DateTime64 & t, Args && ... args) const
     {
         /// Type conversion from float to integer may be required.
         /// We are Ok with implementation specific result for out of range and denormals conversion.
@@ -131,7 +131,7 @@ public:
 
     template <typename T, typename ... Args>
     requires (!std::same_as<T, DateTime64>)
-    inline auto executeExtendedResult(const T & t, Args && ... args) const
+    auto executeExtendedResult(const T & t, Args && ... args) const
     {
         return wrapped_transform.executeExtendedResult(t, std::forward<Args>(args)...);
     }
diff --git a/src/Functions/abs.cpp b/src/Functions/abs.cpp
index 0cd313caf1e..9ac2363f765 100644
--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@@ -12,7 +12,7 @@ struct AbsImpl
     using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
     static constexpr bool allow_string_or_fixed_string = false;
 
-    static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
+    static NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
         if constexpr (is_decimal<A>)
             return a < A(0) ? A(-a) : a;
diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h
index 395f96bbffb..fa9b3dc92dd 100644
--- a/src/Functions/array/arrayIndex.h
+++ b/src/Functions/array/arrayIndex.h
@@ -322,7 +322,7 @@ private:
     }
 
     template <bool IsConst>
-    static inline void invokeCheckNullMaps(
+    static void invokeCheckNullMaps(
         const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
         const ColumnString::Offsets & str_offsets, const ColumnString::Chars & values,
         OffsetT<IsConst> item_offsets,
@@ -339,7 +339,7 @@ private:
     }
 
 public:
-    static inline void process(
+    static void process(
         const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
         const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values,
         Offset item_offsets, PaddedPODArray<ResultType> & result,
@@ -348,7 +348,7 @@ public:
         invokeCheckNullMaps<true>(data, offsets, string_offsets, item_values, item_offsets, result, data_map, item_map);
     }
 
-    static inline void process(
+    static void process(
         const ColumnString::Chars & data, const ColumnArray::Offsets & offsets,
         const ColumnString::Offsets & string_offsets, const ColumnString::Chars & item_values,
         const ColumnString::Offsets & item_offsets, PaddedPODArray<ResultType> & result,
@@ -467,10 +467,10 @@ private:
         NullMaps maps;
         ResultColumnPtr result { ResultColumnType::create() };
 
-        inline void moveResult() { result_column = std::move(result); }
+        void moveResult() { result_column = std::move(result); }
     };
 
-    static inline bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg)
+    static bool allowArguments(const DataTypePtr & inner_type, const DataTypePtr & arg)
     {
         auto inner_type_decayed = removeNullable(removeLowCardinality(inner_type));
         auto arg_decayed = removeNullable(removeLowCardinality(arg));
@@ -633,7 +633,7 @@ private:
      * (s1, s1, s2, ...), (s2, s1, s2, ...), (s3, s1, s2, ...)
      */
     template <typename... Integral>
-    static inline ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments)
+    static ColumnPtr executeIntegral(const ColumnsWithTypeAndName & arguments)
     {
         const ColumnArray * const left = checkAndGetColumn<ColumnArray>(arguments[0].column.get());
 
@@ -658,14 +658,14 @@ private:
     }
 
     template <typename... Integral>
-    static inline bool executeIntegral(ExecutionData& data)
+    static bool executeIntegral(ExecutionData& data)
     {
         return (executeIntegralExpanded<Integral, Integral...>(data) || ...);
     }
 
     /// Invoke executeIntegralImpl with such parameters: (A, other1), (A, other2), ...
     template <typename A, typename... Other>
-    static inline bool executeIntegralExpanded(ExecutionData& data)
+    static bool executeIntegralExpanded(ExecutionData& data)
     {
         return (executeIntegralImpl<A, Other>(data) || ...);
     }
diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp
index e87eff6add1..ca1e8f21aee 100644
--- a/src/Functions/array/arrayNorm.cpp
+++ b/src/Functions/array/arrayNorm.cpp
@@ -25,19 +25,19 @@ struct L1Norm
     struct ConstParams {};
 
     template <typename ResultType>
-    inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
+    static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
     {
         return result + fabs(value);
     }
 
     template <typename ResultType>
-    inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
+    static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
     {
         return result + other_result;
     }
 
     template <typename ResultType>
-    inline static ResultType finalize(ResultType result, const ConstParams &)
+    static ResultType finalize(ResultType result, const ConstParams &)
     {
         return result;
     }
@@ -50,19 +50,19 @@ struct L2Norm
     struct ConstParams {};
 
     template <typename ResultType>
-    inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
+    static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
     {
         return result + value * value;
     }
 
     template <typename ResultType>
-    inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
+    static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
     {
         return result + other_result;
     }
 
     template <typename ResultType>
-    inline static ResultType finalize(ResultType result, const ConstParams &)
+    static ResultType finalize(ResultType result, const ConstParams &)
     {
         return sqrt(result);
     }
@@ -73,7 +73,7 @@ struct L2SquaredNorm : L2Norm
     static constexpr auto name = "L2Squared";
 
     template <typename ResultType>
-    inline static ResultType finalize(ResultType result, const ConstParams &)
+    static ResultType finalize(ResultType result, const ConstParams &)
     {
         return result;
     }
@@ -91,19 +91,19 @@ struct LpNorm
     };
 
     template <typename ResultType>
-    inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params)
+    static ResultType accumulate(ResultType result, ResultType value, const ConstParams & params)
     {
         return result + static_cast<ResultType>(std::pow(fabs(value), params.power));
     }
 
     template <typename ResultType>
-    inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
+    static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
     {
         return result + other_result;
     }
 
     template <typename ResultType>
-    inline static ResultType finalize(ResultType result, const ConstParams & params)
+    static ResultType finalize(ResultType result, const ConstParams & params)
     {
         return static_cast<ResultType>(std::pow(result, params.inverted_power));
     }
@@ -116,19 +116,19 @@ struct LinfNorm
     struct ConstParams {};
 
     template <typename ResultType>
-    inline static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
+    static ResultType accumulate(ResultType result, ResultType value, const ConstParams &)
     {
         return fmax(result, fabs(value));
     }
 
     template <typename ResultType>
-    inline static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
+    static ResultType combine(ResultType result, ResultType other_result, const ConstParams &)
     {
         return fmax(result, other_result);
     }
 
     template <typename ResultType>
-    inline static ResultType finalize(ResultType result, const ConstParams &)
+    static ResultType finalize(ResultType result, const ConstParams &)
     {
         return result;
     }
diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp
index 8efc5181919..c6ab9023142 100644
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@@ -20,7 +20,7 @@ struct BitAndImpl
     static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         return static_cast<Result>(a) & static_cast<Result>(b);
     }
@@ -28,7 +28,7 @@ struct BitAndImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitAndImpl expected an integral type");
diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp
index 11c0c1d1b7d..bd89b6eb69a 100644
--- a/src/Functions/bitBoolMaskAnd.cpp
+++ b/src/Functions/bitBoolMaskAnd.cpp
@@ -25,7 +25,7 @@ struct BitBoolMaskAndImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
+    static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
     {
         // Should be a logical error, but this function is callable from SQL.
         // Need to investigate this.
diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp
index 7940bf3e2ca..1ddf2d258f8 100644
--- a/src/Functions/bitBoolMaskOr.cpp
+++ b/src/Functions/bitBoolMaskOr.cpp
@@ -25,7 +25,7 @@ struct BitBoolMaskOrImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
+    static Result apply([[maybe_unused]] A left, [[maybe_unused]] B right)
     {
         if constexpr (!std::is_same_v<A, ResultType> || !std::is_same_v<B, ResultType>)
             // Should be a logical error, but this function is callable from SQL.
diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp
index f1a3ac897c1..68555b1386c 100644
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@@ -13,7 +13,7 @@ struct BitCountImpl
     using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>;
     static constexpr bool allow_string_or_fixed_string = true;
 
-    static inline ResultType apply(A a)
+    static ResultType apply(A a)
     {
         /// We count bits in the value representation in memory. For example, we support floats.
         /// We need to avoid sign-extension when converting signed numbers to larger type. So, uint8_t(-1) has 8 bits.
diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
index f00f38b61af..f8a1a95ae14 100644
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@@ -19,7 +19,7 @@ struct BitHammingDistanceImpl
     static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
+    static NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
         /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill.
         /// This behavior can change in the future.
diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp
index 62ebdc7c52a..44dc77bb7bb 100644
--- a/src/Functions/bitNot.cpp
+++ b/src/Functions/bitNot.cpp
@@ -19,7 +19,7 @@ struct BitNotImpl
     using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
     static constexpr bool allow_string_or_fixed_string = true;
 
-    static inline ResultType NO_SANITIZE_UNDEFINED apply(A a)
+    static ResultType NO_SANITIZE_UNDEFINED apply(A a)
     {
         return ~static_cast<ResultType>(a);
     }
@@ -27,7 +27,7 @@ struct BitNotImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
     {
         if (!arg->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitNotImpl expected an integral type");
diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp
index 9e19fc55219..22ce15d892d 100644
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@@ -19,7 +19,7 @@ struct BitOrImpl
     static constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         return static_cast<Result>(a) | static_cast<Result>(b);
     }
@@ -27,7 +27,7 @@ struct BitOrImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitOrImpl expected an integral type");
diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp
index c72466b8d49..2fe2c4e0f1d 100644
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@@ -20,7 +20,7 @@ struct BitRotateLeftImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers");
@@ -32,7 +32,7 @@ struct BitRotateLeftImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateLeftImpl expected an integral type");
diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp
index 045758f9a31..a2f0fe12324 100644
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@@ -20,7 +20,7 @@ struct BitRotateRightImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers");
@@ -32,7 +32,7 @@ struct BitRotateRightImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateRightImpl expected an integral type");
diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp
index 7b3748edb5c..c366a1ecb44 100644
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@@ -20,7 +20,7 @@ struct BitShiftLeftImpl
     static const constexpr bool allow_string_integer = true;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         if constexpr (is_big_int_v<B>)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument");
@@ -145,7 +145,7 @@ struct BitShiftLeftImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftLeftImpl expected an integral type");
diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp
index 21a0f7584aa..1c37cd3bf4c 100644
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@@ -21,7 +21,7 @@ struct BitShiftRightImpl
     static const constexpr bool allow_string_integer = true;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         if constexpr (is_big_int_v<B>)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument");
@@ -31,7 +31,7 @@ struct BitShiftRightImpl
             return static_cast<Result>(a) >> static_cast<Result>(b);
     }
 
-    static inline NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits)
+    static NO_SANITIZE_UNDEFINED void bitShiftRightForBytes(const UInt8 * op_pointer, const UInt8 * begin, UInt8 * out, const size_t shift_right_bits)
     {
         while (op_pointer > begin)
         {
@@ -123,7 +123,7 @@ struct BitShiftRightImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftRightImpl expected an integral type");
diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp
index d8957598c62..4ff436d5708 100644
--- a/src/Functions/bitSwapLastTwo.cpp
+++ b/src/Functions/bitSwapLastTwo.cpp
@@ -21,7 +21,7 @@ struct BitSwapLastTwoImpl
     using ResultType = UInt8;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
+    static ResultType NO_SANITIZE_UNDEFINED apply([[maybe_unused]] A a)
     {
         if constexpr (!std::is_same_v<A, ResultType>)
             // Should be a logical error, but this function is callable from SQL.
@@ -35,7 +35,7 @@ struct BitSwapLastTwoImpl
 #if USE_EMBEDDED_COMPILER
 static constexpr bool compilable = true;
 
-static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
+static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
 {
     if (!arg->getType()->isIntegerTy())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "__bitSwapLastTwo expected an integral type");
diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp
index 4c9c6aa2dfb..78ec9c8b773 100644
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@@ -21,7 +21,7 @@ struct BitTestImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    NO_SANITIZE_UNDEFINED static Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument");
diff --git a/src/Functions/bitTestAll.cpp b/src/Functions/bitTestAll.cpp
index a2dcef3eb96..92f63bfa262 100644
--- a/src/Functions/bitTestAll.cpp
+++ b/src/Functions/bitTestAll.cpp
@@ -9,7 +9,7 @@ namespace
 struct BitTestAllImpl
 {
     template <typename A, typename B>
-    static inline UInt8 apply(A a, B b) { return (a & b) == b; }
+    static UInt8 apply(A a, B b) { return (a & b) == b; }
 };
 
 struct NameBitTestAll { static constexpr auto name = "bitTestAll"; };
diff --git a/src/Functions/bitTestAny.cpp b/src/Functions/bitTestAny.cpp
index 6b20d6c184c..c8f445d524e 100644
--- a/src/Functions/bitTestAny.cpp
+++ b/src/Functions/bitTestAny.cpp
@@ -9,7 +9,7 @@ namespace
 struct BitTestAnyImpl
 {
     template <typename A, typename B>
-    static inline UInt8 apply(A a, B b) { return (a & b) != 0; }
+    static UInt8 apply(A a, B b) { return (a & b) != 0; }
 };
 
 struct NameBitTestAny { static constexpr auto name = "bitTestAny"; };
diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp
index 99c06172c30..d243a6724a8 100644
--- a/src/Functions/bitWrapperFunc.cpp
+++ b/src/Functions/bitWrapperFunc.cpp
@@ -21,7 +21,7 @@ struct BitWrapperFuncImpl
     using ResultType = UInt8;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
+    static ResultType NO_SANITIZE_UNDEFINED apply(A a [[maybe_unused]])
     {
         // Should be a logical error, but this function is callable from SQL.
         // Need to investigate this.
diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp
index 78c4c64d06e..43004c6f676 100644
--- a/src/Functions/bitXor.cpp
+++ b/src/Functions/bitXor.cpp
@@ -19,7 +19,7 @@ struct BitXorImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         return static_cast<Result>(a) ^ static_cast<Result>(b);
     }
@@ -27,7 +27,7 @@ struct BitXorImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (!left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "BitXorImpl expected an integral type");
diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp
index 4d7a4f0b53d..c06dfe15dc4 100644
--- a/src/Functions/dateName.cpp
+++ b/src/Functions/dateName.cpp
@@ -214,7 +214,7 @@ private:
     template <typename Time>
     struct QuarterWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToQuarterImpl::execute(source, timezone), buffer);
         }
@@ -223,7 +223,7 @@ private:
     template <typename Time>
     struct MonthWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             const auto month = ToMonthImpl::execute(source, timezone);
             static constexpr std::string_view month_names[] =
@@ -249,7 +249,7 @@ private:
     template <typename Time>
     struct WeekWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToISOWeekImpl::execute(source, timezone), buffer);
         }
@@ -258,7 +258,7 @@ private:
     template <typename Time>
     struct DayOfYearWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToDayOfYearImpl::execute(source, timezone), buffer);
         }
@@ -267,7 +267,7 @@ private:
     template <typename Time>
     struct DayWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToDayOfMonthImpl::execute(source, timezone), buffer);
         }
@@ -276,7 +276,7 @@ private:
     template <typename Time>
     struct WeekDayWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             const auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
             static constexpr std::string_view day_names[] =
@@ -297,7 +297,7 @@ private:
     template <typename Time>
     struct HourWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToHourImpl::execute(source, timezone), buffer);
         }
@@ -306,7 +306,7 @@ private:
     template <typename Time>
     struct MinuteWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToMinuteImpl::execute(source, timezone), buffer);
         }
@@ -315,7 +315,7 @@ private:
     template <typename Time>
     struct SecondWriter
     {
-        static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
+        static void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
             writeText(ToSecondImpl::execute(source, timezone), buffer);
         }
diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp
index ca552256cd1..7c67245c382 100644
--- a/src/Functions/divide.cpp
+++ b/src/Functions/divide.cpp
@@ -16,7 +16,7 @@ struct DivideFloatingImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
+    static NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
     {
         return static_cast<Result>(a) / b;
     }
@@ -24,7 +24,7 @@ struct DivideFloatingImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         if (left->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "DivideFloatingImpl expected a floating-point type");
diff --git a/src/Functions/divideDecimal.cpp b/src/Functions/divideDecimal.cpp
index 1d0db232062..c8d2c5edc8a 100644
--- a/src/Functions/divideDecimal.cpp
+++ b/src/Functions/divideDecimal.cpp
@@ -18,7 +18,7 @@ struct DivideDecimalsImpl
     static constexpr auto name = "divideDecimal";
 
     template <typename FirstType, typename SecondType>
-    static inline Decimal256
+    static Decimal256
     execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale)
     {
         if (b.value == 0)
diff --git a/src/Functions/factorial.cpp b/src/Functions/factorial.cpp
index b814e8198e6..7ff9126c004 100644
--- a/src/Functions/factorial.cpp
+++ b/src/Functions/factorial.cpp
@@ -19,7 +19,7 @@ struct FactorialImpl
     static const constexpr bool allow_decimal = false;
     static const constexpr bool allow_string_or_fixed_string = false;
 
-    static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
+    static NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
         if constexpr (std::is_floating_point_v<A> || is_over_big_int<A>)
             throw Exception(
diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp
index 1c12317f510..1bd71f19f76 100644
--- a/src/Functions/greatCircleDistance.cpp
+++ b/src/Functions/greatCircleDistance.cpp
@@ -94,13 +94,13 @@ struct Impl
         }
     }
 
-    static inline NO_SANITIZE_UNDEFINED size_t toIndex(T x)
+    static NO_SANITIZE_UNDEFINED size_t toIndex(T x)
     {
         /// Implementation specific behaviour on overflow or infinite value.
         return static_cast<size_t>(x);
     }
 
-    static inline T degDiff(T f)
+    static T degDiff(T f)
     {
         f = std::abs(f);
         if (f > 180)
@@ -108,7 +108,7 @@ struct Impl
         return f;
     }
 
-    inline T fastCos(T x)
+    T fastCos(T x)
     {
         T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
         size_t i = toIndex(y);
@@ -117,7 +117,7 @@ struct Impl
         return cos_lut[i] + (cos_lut[i + 1] - cos_lut[i]) * y;
     }
 
-    inline T fastSin(T x)
+    T fastSin(T x)
     {
         T y = std::abs(x) * (T(COS_LUT_SIZE) / T(PI) / T(2.0));
         size_t i = toIndex(y);
@@ -128,7 +128,7 @@ struct Impl
 
     /// fast implementation of asin(sqrt(x))
     /// max error in floats 0.00369%, in doubles 0.00072%
-    inline T fastAsinSqrt(T x)
+    T fastAsinSqrt(T x)
     {
         if (x < T(0.122))
         {
diff --git a/src/Functions/greatest.cpp b/src/Functions/greatest.cpp
index 93fd7e24853..87a48c887b4 100644
--- a/src/Functions/greatest.cpp
+++ b/src/Functions/greatest.cpp
@@ -15,7 +15,7 @@ struct GreatestBaseImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         return static_cast<Result>(a) > static_cast<Result>(b) ?
                static_cast<Result>(a) : static_cast<Result>(b);
@@ -24,7 +24,7 @@ struct GreatestBaseImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
     {
         if (!left->getType()->isIntegerTy())
         {
@@ -46,7 +46,7 @@ struct GreatestSpecialImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         static_assert(std::is_same_v<Result, ResultType>, "ResultType != Result");
         return accurate::greaterOp(a, b) ? static_cast<Result>(a) : static_cast<Result>(b);
diff --git a/src/Functions/h3GetUnidirectionalEdge.cpp b/src/Functions/h3GetUnidirectionalEdge.cpp
index 4e41cdbfef6..9e253e87104 100644
--- a/src/Functions/h3GetUnidirectionalEdge.cpp
+++ b/src/Functions/h3GetUnidirectionalEdge.cpp
@@ -108,7 +108,7 @@ public:
     /// suppress asan errors generated by the following:
     /// 'NEW_ADJUSTMENT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:142:24
     /// 'NEW_DIGIT_III' defined in '../contrib/h3/src/h3lib/lib/algos.c:121:24
-    __attribute__((no_sanitize_address)) static inline UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest)
+    __attribute__((no_sanitize_address)) static UInt64 getUnidirectionalEdge(const UInt64 origin, const UInt64 dest)
     {
         const UInt64 res = cellsToDirectedEdge(origin, dest);
         return res;
diff --git a/src/Functions/initialQueryID.cpp b/src/Functions/initialQueryID.cpp
index 469f37cf614..9c9390d4e50 100644
--- a/src/Functions/initialQueryID.cpp
+++ b/src/Functions/initialQueryID.cpp
@@ -19,16 +19,16 @@ public:
 
     explicit FunctionInitialQueryID(const String & initial_query_id_) : initial_query_id(initial_query_id_) {}
 
-    inline String getName() const override { return name; }
+    String getName() const override { return name; }
 
-    inline size_t getNumberOfArguments() const override { return 0; }
+    size_t getNumberOfArguments() const override { return 0; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
     {
         return std::make_shared<DataTypeString>();
     }
 
-    inline bool isDeterministic() const override { return false; }
+    bool isDeterministic() const override { return false; }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
 
diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp
index 38939556fa5..6b5bb00eacd 100644
--- a/src/Functions/intDiv.cpp
+++ b/src/Functions/intDiv.cpp
@@ -80,7 +80,7 @@ struct DivideIntegralByConstantImpl
 
 private:
     template <OpCase op_case>
-    static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
+    static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
     {
         if constexpr (op_case == OpCase::Vector)
             c[i] = Op::template apply<ResultType>(a[i], b[i]);
diff --git a/src/Functions/intDivOrZero.cpp b/src/Functions/intDivOrZero.cpp
index 96ff6ea80fc..f32eac17127 100644
--- a/src/Functions/intDivOrZero.cpp
+++ b/src/Functions/intDivOrZero.cpp
@@ -13,7 +13,7 @@ struct DivideIntegralOrZeroImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         if (unlikely(divisionLeadsToFPE(a, b)))
             return 0;
diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp
index 6944c4701bc..733f9d55702 100644
--- a/src/Functions/intExp10.cpp
+++ b/src/Functions/intExp10.cpp
@@ -19,7 +19,7 @@ struct IntExp10Impl
     using ResultType = UInt64;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline ResultType apply([[maybe_unused]] A a)
+    static ResultType apply([[maybe_unused]] A a)
     {
         if constexpr (is_big_int_v<A> || std::is_same_v<A, Decimal256>)
             throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "IntExp10 is not implemented for big integers");
diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp
index 4e5cc60a731..7e016a0dbd2 100644
--- a/src/Functions/intExp2.cpp
+++ b/src/Functions/intExp2.cpp
@@ -20,7 +20,7 @@ struct IntExp2Impl
     using ResultType = UInt64;
     static constexpr bool allow_string_or_fixed_string = false;
 
-    static inline ResultType apply([[maybe_unused]] A a)
+    static ResultType apply([[maybe_unused]] A a)
     {
         if constexpr (is_big_int_v<A>)
             throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "intExp2 not implemented for big integers");
@@ -31,7 +31,7 @@ struct IntExp2Impl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
     {
         if (!arg->getType()->isIntegerTy())
             throw Exception(ErrorCodes::LOGICAL_ERROR, "IntExp2Impl expected an integral type");
diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp
index e7aba672356..d5f5e6a8986 100644
--- a/src/Functions/isValidUTF8.cpp
+++ b/src/Functions/isValidUTF8.cpp
@@ -65,9 +65,9 @@ SOFTWARE.
  */
 
 #ifndef __SSE4_1__
-    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); }
+    static UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); }
 #else
-    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
+    static UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
     {
         /*
         * Map high nibble of "First Byte" to legal character length minus 1
diff --git a/src/Functions/jumpConsistentHash.cpp b/src/Functions/jumpConsistentHash.cpp
index ffc21eb5cea..fbac5d4fdd5 100644
--- a/src/Functions/jumpConsistentHash.cpp
+++ b/src/Functions/jumpConsistentHash.cpp
@@ -29,7 +29,7 @@ struct JumpConsistentHashImpl
     using BucketsType = ResultType;
     static constexpr auto max_buckets = static_cast<UInt64>(std::numeric_limits<BucketsType>::max());
 
-    static inline ResultType apply(UInt64 hash, BucketsType n)
+    static ResultType apply(UInt64 hash, BucketsType n)
     {
         return JumpConsistentHash(hash, n);
     }
diff --git a/src/Functions/kostikConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp
index 47a9a928976..42004ed40d9 100644
--- a/src/Functions/kostikConsistentHash.cpp
+++ b/src/Functions/kostikConsistentHash.cpp
@@ -17,7 +17,7 @@ struct KostikConsistentHashImpl
     using BucketsType = ResultType;
     static constexpr auto max_buckets = 32768;
 
-    static inline ResultType apply(UInt64 hash, BucketsType n)
+    static ResultType apply(UInt64 hash, BucketsType n)
     {
         return ConsistentHashing(hash, n);
     }
diff --git a/src/Functions/least.cpp b/src/Functions/least.cpp
index f5680d4d468..babb8378d80 100644
--- a/src/Functions/least.cpp
+++ b/src/Functions/least.cpp
@@ -15,7 +15,7 @@ struct LeastBaseImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         /** gcc 4.9.2 successfully vectorizes a loop from this function. */
         return static_cast<Result>(a) < static_cast<Result>(b) ? static_cast<Result>(a) : static_cast<Result>(b);
@@ -24,7 +24,7 @@ struct LeastBaseImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed)
     {
         if (!left->getType()->isIntegerTy())
         {
@@ -46,7 +46,7 @@ struct LeastSpecialImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         static_assert(std::is_same_v<Result, ResultType>, "ResultType != Result");
         return accurate::lessOp(a, b) ? static_cast<Result>(a) : static_cast<Result>(b);
diff --git a/src/Functions/minus.cpp b/src/Functions/minus.cpp
index 04877a42b18..f3b9b8a7bcb 100644
--- a/src/Functions/minus.cpp
+++ b/src/Functions/minus.cpp
@@ -13,7 +13,7 @@ struct MinusImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
+    static NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
         {
@@ -28,7 +28,7 @@ struct MinusImpl
 
     /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise.
     template <typename Result = ResultType>
-    static inline bool apply(A a, B b, Result & c)
+    static bool apply(A a, B b, Result & c)
     {
         return common::subOverflow(static_cast<Result>(a), b, c);
     }
@@ -36,7 +36,7 @@ struct MinusImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         return left->getType()->isIntegerTy() ? b.CreateSub(left, right) : b.CreateFSub(left, right);
     }
diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp
index cbc2ec2cd0a..ebc1c4f5275 100644
--- a/src/Functions/modulo.cpp
+++ b/src/Functions/modulo.cpp
@@ -105,7 +105,7 @@ struct ModuloByConstantImpl
 
 private:
     template <OpCase op_case>
-    static inline void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
+    static void apply(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t i)
     {
         if constexpr (op_case == OpCase::Vector)
             c[i] = Op::template apply<ResultType>(a[i], b[i]);
diff --git a/src/Functions/moduloOrZero.cpp b/src/Functions/moduloOrZero.cpp
index 3551ae74c5f..cd7873b3b9e 100644
--- a/src/Functions/moduloOrZero.cpp
+++ b/src/Functions/moduloOrZero.cpp
@@ -15,7 +15,7 @@ struct ModuloOrZeroImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline Result apply(A a, B b)
+    static Result apply(A a, B b)
     {
         if constexpr (std::is_floating_point_v<ResultType>)
         {
diff --git a/src/Functions/multiply.cpp b/src/Functions/multiply.cpp
index 4dc8cd10f31..67b6fff6b58 100644
--- a/src/Functions/multiply.cpp
+++ b/src/Functions/multiply.cpp
@@ -14,7 +14,7 @@ struct MultiplyImpl
     static const constexpr bool allow_string_integer = false;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
+    static NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
         {
@@ -29,7 +29,7 @@ struct MultiplyImpl
 
     /// Apply operation and check overflow. It's used for Decimal operations. @returns true if overflowed, false otherwise.
     template <typename Result = ResultType>
-    static inline bool apply(A a, B b, Result & c)
+    static bool apply(A a, B b, Result & c)
     {
         if constexpr (std::is_same_v<Result, float> || std::is_same_v<Result, double>)
         {
@@ -43,7 +43,7 @@ struct MultiplyImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         return left->getType()->isIntegerTy() ? b.CreateMul(left, right) : b.CreateFMul(left, right);
     }
diff --git a/src/Functions/multiplyDecimal.cpp b/src/Functions/multiplyDecimal.cpp
index ed6487c6683..7e30a893d72 100644
--- a/src/Functions/multiplyDecimal.cpp
+++ b/src/Functions/multiplyDecimal.cpp
@@ -17,7 +17,7 @@ struct MultiplyDecimalsImpl
     static constexpr auto name = "multiplyDecimal";
 
     template <typename FirstType, typename SecondType>
-    static inline Decimal256
+    static Decimal256
     execute(FirstType a, SecondType b, UInt16 scale_a, UInt16 scale_b, UInt16 result_scale)
     {
         if (a.value == 0 || b.value == 0)
diff --git a/src/Functions/negate.cpp b/src/Functions/negate.cpp
index bd47780dea8..2c9b461274d 100644
--- a/src/Functions/negate.cpp
+++ b/src/Functions/negate.cpp
@@ -11,7 +11,7 @@ struct NegateImpl
     using ResultType = std::conditional_t<is_decimal<A>, A, typename NumberTraits::ResultOfNegate<A>::Type>;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
+    static NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
         return -static_cast<ResultType>(a);
     }
@@ -19,7 +19,7 @@ struct NegateImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
     {
         return arg->getType()->isIntegerTy() ? b.CreateNeg(arg) : b.CreateFNeg(arg);
     }
diff --git a/src/Functions/plus.cpp b/src/Functions/plus.cpp
index cd9cf6cec5c..ffb0fe2ade7 100644
--- a/src/Functions/plus.cpp
+++ b/src/Functions/plus.cpp
@@ -14,7 +14,7 @@ struct PlusImpl
     static const constexpr bool is_commutative = true;
 
     template <typename Result = ResultType>
-    static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
+    static NO_SANITIZE_UNDEFINED Result apply(A a, B b)
     {
         /// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
         if constexpr (is_big_int_v<A> || is_big_int_v<B>)
@@ -30,7 +30,7 @@ struct PlusImpl
 
     /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise.
     template <typename Result = ResultType>
-    static inline bool apply(A a, B b, Result & c)
+    static bool apply(A a, B b, Result & c)
     {
         return common::addOverflow(static_cast<Result>(a), b, c);
     }
@@ -38,7 +38,7 @@ struct PlusImpl
 #if USE_EMBEDDED_COMPILER
     static constexpr bool compilable = true;
 
-    static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
+    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool)
     {
         return left->getType()->isIntegerTy() ? b.CreateAdd(left, right) : b.CreateFAdd(left, right);
     }
diff --git a/src/Functions/queryID.cpp b/src/Functions/queryID.cpp
index 704206e1de5..5d0ac719797 100644
--- a/src/Functions/queryID.cpp
+++ b/src/Functions/queryID.cpp
@@ -19,16 +19,16 @@ public:
 
     explicit FunctionQueryID(const String & query_id_) : query_id(query_id_) {}
 
-    inline String getName() const override { return name; }
+    String getName() const override { return name; }
 
-    inline size_t getNumberOfArguments() const override { return 0; }
+    size_t getNumberOfArguments() const override { return 0; }
 
     DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
     {
         return std::make_shared<DataTypeString>();
     }
 
-    inline bool isDeterministic() const override { return false; }
+    bool isDeterministic() const override { return false; }
 
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
 
diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp
index 84597f4eadc..7f2fe646062 100644
--- a/src/Functions/repeat.cpp
+++ b/src/Functions/repeat.cpp
@@ -22,14 +22,14 @@ namespace
 struct RepeatImpl
 {
     /// Safety threshold against DoS.
-    static inline void checkRepeatTime(UInt64 repeat_time)
+    static void checkRepeatTime(UInt64 repeat_time)
     {
         static constexpr UInt64 max_repeat_times = 1'000'000;
         if (repeat_time > max_repeat_times)
             throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", repeat_time, max_repeat_times);
     }
 
-    static inline void checkStringSize(UInt64 size)
+    static void checkStringSize(UInt64 size)
     {
         static constexpr UInt64 max_string_size = 1 << 30;
         if (size > max_string_size)
diff --git a/src/Functions/roundAge.cpp b/src/Functions/roundAge.cpp
index cca92c19b0c..38eda9f3383 100644
--- a/src/Functions/roundAge.cpp
+++ b/src/Functions/roundAge.cpp
@@ -12,7 +12,7 @@ struct RoundAgeImpl
     using ResultType = UInt8;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline ResultType apply(A x)
+    static ResultType apply(A x)
     {
         return x < 1 ? 0
             : (x < 18 ? 17
diff --git a/src/Functions/roundDuration.cpp b/src/Functions/roundDuration.cpp
index 918f0b3425d..963080ba0d2 100644
--- a/src/Functions/roundDuration.cpp
+++ b/src/Functions/roundDuration.cpp
@@ -12,7 +12,7 @@ struct RoundDurationImpl
     using ResultType = UInt16;
     static constexpr bool allow_string_or_fixed_string = false;
 
-    static inline ResultType apply(A x)
+    static ResultType apply(A x)
     {
         return x < 1 ? 0
             : (x < 10 ? 1
diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp
index 607c67b742e..eb0df8884c5 100644
--- a/src/Functions/roundToExp2.cpp
+++ b/src/Functions/roundToExp2.cpp
@@ -65,7 +65,7 @@ struct RoundToExp2Impl
     using ResultType = T;
     static constexpr const bool allow_string_or_fixed_string = false;
 
-    static inline T apply(T x)
+    static T apply(T x)
     {
         return roundDownToPowerOfTwo<T>(x);
     }
diff --git a/src/Functions/sign.cpp b/src/Functions/sign.cpp
index 6c849760eed..3dd2ac8e3aa 100644
--- a/src/Functions/sign.cpp
+++ b/src/Functions/sign.cpp
@@ -11,7 +11,7 @@ struct SignImpl
     using ResultType = Int8;
     static constexpr bool allow_string_or_fixed_string = false;
 
-    static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
+    static NO_SANITIZE_UNDEFINED ResultType apply(A a)
     {
         if constexpr (is_decimal<A> || std::is_floating_point_v<A>)
             return a < A(0) ? -1 : a == A(0) ? 0 : 1;
diff --git a/src/Functions/space.cpp b/src/Functions/space.cpp
index 4cfa629aa33..83183c991bc 100644
--- a/src/Functions/space.cpp
+++ b/src/Functions/space.cpp
@@ -27,7 +27,7 @@ private:
     static constexpr auto space = ' ';
 
     /// Safety threshold against DoS.
-    static inline void checkRepeatTime(size_t repeat_time)
+    static void checkRepeatTime(size_t repeat_time)
     {
         static constexpr auto max_repeat_times = 1'000'000uz;
         if (repeat_time > max_repeat_times)
diff --git a/src/Functions/tokenExtractors.cpp b/src/Functions/tokenExtractors.cpp
index a29d759d2ca..e7dcb5cced3 100644
--- a/src/Functions/tokenExtractors.cpp
+++ b/src/Functions/tokenExtractors.cpp
@@ -116,7 +116,7 @@ public:
 private:
 
     template <typename ExtractorType, typename StringColumnType, typename ResultStringColumnType>
-    inline void executeImpl(
+    void executeImpl(
         const ExtractorType & extractor,
         StringColumnType & input_data_column,
         ResultStringColumnType & result_data_column,
diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h
index e98f00270e2..62fe011c0b6 100644
--- a/src/IO/BufferBase.h
+++ b/src/IO/BufferBase.h
@@ -37,13 +37,13 @@ public:
     {
         Buffer(Position begin_pos_, Position end_pos_) : begin_pos(begin_pos_), end_pos(end_pos_) {}
 
-        inline Position begin() const { return begin_pos; }
-        inline Position end() const { return end_pos; }
-        inline size_t size() const { return size_t(end_pos - begin_pos); }
-        inline void resize(size_t size) { end_pos = begin_pos + size; }
-        inline bool empty() const { return size() == 0; }
+        Position begin() const { return begin_pos; }
+        Position end() const { return end_pos; }
+        size_t size() const { return size_t(end_pos - begin_pos); }
+        void resize(size_t size) { end_pos = begin_pos + size; }
+        bool empty() const { return size() == 0; }
 
-        inline void swap(Buffer & other) noexcept
+        void swap(Buffer & other) noexcept
         {
             std::swap(begin_pos, other.begin_pos);
             std::swap(end_pos, other.end_pos);
@@ -71,21 +71,21 @@ public:
     }
 
     /// get buffer
-    inline Buffer & internalBuffer() { return internal_buffer; }
+    Buffer & internalBuffer() { return internal_buffer; }
 
     /// get the part of the buffer from which you can read / write data
-    inline Buffer & buffer() { return working_buffer; }
+    Buffer & buffer() { return working_buffer; }
 
     /// get (for reading and modifying) the position in the buffer
-    inline Position & position() { return pos; }
+    Position & position() { return pos; }
 
     /// offset in bytes of the cursor from the beginning of the buffer
-    inline size_t offset() const { return size_t(pos - working_buffer.begin()); }
+    size_t offset() const { return size_t(pos - working_buffer.begin()); }
 
     /// How many bytes are available for read/write
-    inline size_t available() const { return size_t(working_buffer.end() - pos); }
+    size_t available() const { return size_t(working_buffer.end() - pos); }
 
-    inline void swap(BufferBase & other) noexcept
+    void swap(BufferBase & other) noexcept
     {
         internal_buffer.swap(other.internal_buffer);
         working_buffer.swap(other.working_buffer);
diff --git a/src/IO/HTTPHeaderEntries.h b/src/IO/HTTPHeaderEntries.h
index 5862f1ead15..36b2ccc4ba5 100644
--- a/src/IO/HTTPHeaderEntries.h
+++ b/src/IO/HTTPHeaderEntries.h
@@ -10,7 +10,7 @@ struct HTTPHeaderEntry
     std::string value;
 
     HTTPHeaderEntry(const std::string & name_, const std::string & value_) : name(name_), value(value_) {}
-    inline bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; }
+    bool operator==(const HTTPHeaderEntry & other) const { return name == other.name && value == other.value; }
 };
 
 using HTTPHeaderEntries = std::vector<HTTPHeaderEntry>;
diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h
index 73e52f2c503..eba614d9d0a 100644
--- a/src/IO/HadoopSnappyReadBuffer.h
+++ b/src/IO/HadoopSnappyReadBuffer.h
@@ -37,7 +37,7 @@ public:
 
     Status readBlock(size_t * avail_in, const char ** next_in, size_t * avail_out, char ** next_out);
 
-    inline void reset()
+    void reset()
     {
         buffer_length = 0;
         block_length = -1;
@@ -73,7 +73,7 @@ class HadoopSnappyReadBuffer : public CompressedReadBufferWrapper
 public:
     using Status = HadoopSnappyDecoder::Status;
 
-    inline static String statusToString(Status status)
+    static String statusToString(Status status)
     {
         switch (status)
         {
diff --git a/src/IO/IReadableWriteBuffer.h b/src/IO/IReadableWriteBuffer.h
index dda5fc07c8e..db379fef969 100644
--- a/src/IO/IReadableWriteBuffer.h
+++ b/src/IO/IReadableWriteBuffer.h
@@ -8,7 +8,7 @@ namespace DB
 struct IReadableWriteBuffer
 {
     /// At the first time returns getReadBufferImpl(). Next calls return nullptr.
-    inline std::unique_ptr<ReadBuffer> tryGetReadBuffer()
+    std::unique_ptr<ReadBuffer> tryGetReadBuffer()
     {
         if (!can_reread)
             return nullptr;
diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h
index 2ee209ffd6c..e831956956f 100644
--- a/src/IO/PeekableReadBuffer.h
+++ b/src/IO/PeekableReadBuffer.h
@@ -83,9 +83,9 @@ private:
 
     bool peekNext();
 
-    inline bool useSubbufferOnly() const { return !peeked_size; }
-    inline bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); }
-    inline bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; }
+    bool useSubbufferOnly() const { return !peeked_size; }
+    bool currentlyReadFromOwnMemory() const { return working_buffer.begin() != sub_buf->buffer().begin(); }
+    bool checkpointInOwnMemory() const { return checkpoint_in_own_memory; }
 
     void checkStateCorrect() const;
 
diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h
index 056e25a5fbe..73f5335411f 100644
--- a/src/IO/ReadBuffer.h
+++ b/src/IO/ReadBuffer.h
@@ -85,7 +85,7 @@ public:
     }
 
 
-    inline void nextIfAtEnd()
+    void nextIfAtEnd()
     {
         if (!hasPendingData())
             next();
diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h
index 424cf65caf2..3b03356a8fb 100644
--- a/src/IO/S3/Requests.h
+++ b/src/IO/S3/Requests.h
@@ -169,7 +169,7 @@ using DeleteObjectsRequest = ExtendedRequest<Model::DeleteObjectsRequest>;
 class ComposeObjectRequest : public ExtendedRequest<Aws::S3::S3Request>
 {
 public:
-    inline const char * GetServiceRequestName() const override { return "ComposeObject"; }
+    const char * GetServiceRequestName() const override { return "ComposeObject"; }
 
     AWS_S3_API Aws::String SerializePayload() const override;
 
diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h
index 1ceb938e454..ef4e0058ec3 100644
--- a/src/IO/WriteBuffer.h
+++ b/src/IO/WriteBuffer.h
@@ -41,7 +41,7 @@ public:
       * If direct write is performed into [position(), buffer().end()) and its length is not enough,
       * you need to fill it first (i.g with write call), after it the capacity is regained.
       */
-    inline void next()
+    void next()
     {
         if (!offset())
             return;
@@ -69,7 +69,7 @@ public:
     /// Calling finalize() in the destructor of derived classes is a bad practice.
     virtual ~WriteBuffer();
 
-    inline void nextIfAtEnd()
+    void nextIfAtEnd()
     {
         if (!hasPendingData())
             next();
@@ -96,7 +96,7 @@ public:
         }
     }
 
-    inline void write(char x)
+    void write(char x)
     {
         if (finalized)
             throw Exception{ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized buffer"};
diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.h b/src/IO/ZstdDeflatingAppendableWriteBuffer.h
index d9c4f32d6da..34cdf03df25 100644
--- a/src/IO/ZstdDeflatingAppendableWriteBuffer.h
+++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.h
@@ -27,7 +27,7 @@ class ZstdDeflatingAppendableWriteBuffer : public BufferWithOwnMemory<WriteBuffe
 public:
     using ZSTDLastBlock = const std::array<char, 3>;
     /// Frame end block. If we read non-empty file and see no such flag we should add it.
-    static inline constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00};
+    static constexpr ZSTDLastBlock ZSTD_CORRECT_TERMINATION_LAST_BLOCK = {0x01, 0x00, 0x00};
 
     ZstdDeflatingAppendableWriteBuffer(
         std::unique_ptr<WriteBufferFromFileBase> out_,
diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h
index 5a8a5bfb184..0b0460b26c8 100644
--- a/src/Interpreters/DDLTask.h
+++ b/src/Interpreters/DDLTask.h
@@ -133,10 +133,10 @@ struct DDLTaskBase
 
     virtual void createSyncedNodeIfNeed(const ZooKeeperPtr & /*zookeeper*/) {}
 
-    inline String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; }
-    inline String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; }
-    inline String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); }
-    inline String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; }
+    String getActiveNodePath() const { return fs::path(entry_path) / "active" / host_id_str; }
+    String getFinishedNodePath() const { return fs::path(entry_path) / "finished" / host_id_str; }
+    String getShardNodePath() const { return fs::path(entry_path) / "shards" / getShardID(); }
+    String getSyncedNodePath() const { return fs::path(entry_path) / "synced" / host_id_str; }
 
     static String getLogEntryName(UInt32 log_entry_number);
     static UInt32 getLogEntryNumber(const String & log_entry_name);
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 5caa034e0e9..37125d9900c 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -284,7 +284,7 @@ private:
     static constexpr UInt64 bits_for_first_level = 4;
     using UUIDToStorageMap = std::array<UUIDToStorageMapPart, 1ull << bits_for_first_level>;
 
-    static inline size_t getFirstLevelIdx(const UUID & uuid)
+    static size_t getFirstLevelIdx(const UUID & uuid)
     {
         return UUIDHelpers::getHighBytes(uuid) >> (64 - bits_for_first_level);
     }
diff --git a/src/Interpreters/JIT/CHJIT.cpp b/src/Interpreters/JIT/CHJIT.cpp
index 046d0b4fc10..21c773ee1d7 100644
--- a/src/Interpreters/JIT/CHJIT.cpp
+++ b/src/Interpreters/JIT/CHJIT.cpp
@@ -119,9 +119,9 @@ public:
         return result;
     }
 
-    inline size_t getAllocatedSize() const { return allocated_size; }
+    size_t getAllocatedSize() const { return allocated_size; }
 
-    inline size_t getPageSize() const { return page_size; }
+    size_t getPageSize() const { return page_size; }
 
     ~PageArena()
     {
@@ -177,10 +177,10 @@ private:
         {
         }
 
-        inline void * base() const { return pages_base; }
-        inline size_t pagesSize() const { return pages_size; }
-        inline size_t pageSize() const { return page_size; }
-        inline size_t blockSize() const { return pages_size * page_size; }
+        void * base() const { return pages_base; }
+        size_t pagesSize() const { return pages_size; }
+        size_t pageSize() const { return page_size; }
+        size_t blockSize() const { return pages_size * page_size; }
 
     private:
         void * pages_base;
@@ -298,7 +298,7 @@ public:
         return true;
     }
 
-    inline size_t allocatedSize() const
+    size_t allocatedSize() const
     {
         size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize();
         size_t code_size = ex_page_arena.getAllocatedSize();
diff --git a/src/Interpreters/JIT/CHJIT.h b/src/Interpreters/JIT/CHJIT.h
index fc883802426..89d446fd3b3 100644
--- a/src/Interpreters/JIT/CHJIT.h
+++ b/src/Interpreters/JIT/CHJIT.h
@@ -85,7 +85,7 @@ public:
 
     /** Total compiled code size for module that are currently valid.
       */
-    inline size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); }
+    size_t getCompiledCodeSize() const { return compiled_code_size.load(std::memory_order_relaxed); }
 
 private:
 
diff --git a/src/Interpreters/JIT/CompileDAG.h b/src/Interpreters/JIT/CompileDAG.h
index 13ec763b6fc..8db4ac5e110 100644
--- a/src/Interpreters/JIT/CompileDAG.h
+++ b/src/Interpreters/JIT/CompileDAG.h
@@ -65,17 +65,17 @@ public:
         nodes.emplace_back(std::move(node));
     }
 
-    inline size_t getNodesCount() const { return nodes.size(); }
-    inline size_t getInputNodesCount() const { return input_nodes_count; }
+    size_t getNodesCount() const { return nodes.size(); }
+    size_t getInputNodesCount() const { return input_nodes_count; }
 
-    inline Node & operator[](size_t index) { return nodes[index]; }
-    inline const Node & operator[](size_t index) const { return nodes[index]; }
+    Node & operator[](size_t index) { return nodes[index]; }
+    const Node & operator[](size_t index) const { return nodes[index]; }
 
-    inline Node & front() { return nodes.front(); }
-    inline const Node & front() const { return nodes.front(); }
+    Node & front() { return nodes.front(); }
+    const Node & front() const { return nodes.front(); }
 
-    inline Node & back() { return nodes.back(); }
-    inline const Node & back() const { return nodes.back(); }
+    Node & back() { return nodes.back(); }
+    const Node & back() const { return nodes.back(); }
 
 private:
     std::vector<Node> nodes;
diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h
index ff48f34d82c..f15ee2c2fb2 100644
--- a/src/Interpreters/JoinUtils.h
+++ b/src/Interpreters/JoinUtils.h
@@ -49,7 +49,7 @@ public:
         return nullptr;
     }
 
-    inline bool isRowFiltered(size_t row) const
+    bool isRowFiltered(size_t row) const
     {
         return !assert_cast<const ColumnUInt8 &>(*column).getData()[row];
     }
diff --git a/src/Interpreters/examples/hash_map_string_3.cpp b/src/Interpreters/examples/hash_map_string_3.cpp
index 57e36bed545..44ee3542bd9 100644
--- a/src/Interpreters/examples/hash_map_string_3.cpp
+++ b/src/Interpreters/examples/hash_map_string_3.cpp
@@ -96,7 +96,7 @@ inline bool operator==(StringRef_CompareAlwaysTrue, StringRef_CompareAlwaysTrue)
 
 struct FastHash64
 {
-    static inline uint64_t mix(uint64_t h)
+    static uint64_t mix(uint64_t h)
     {
         h ^= h >> 23;
         h *= 0x2127599bf4325c37ULL;
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
index ab16aaa56ad..58f78e5af42 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@@ -80,7 +80,7 @@ public:
     bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; }
 
     bool checkForSuffixImpl(bool check_eof);
-    inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); }
+    void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); }
 
     EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; }
 
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
index 38870473289..9a7bc03ea78 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@@ -84,7 +84,7 @@ public:
 
     void readPrefix();
     void skipField(EscapingRule escaping_rule);
-    inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
+    void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); }
 
     template <typename ReturnType = void>
     ReturnType tryReadPrefixOrSuffix(size_t & input_part_beg, size_t input_part_end);
diff --git a/src/Processors/Port.h b/src/Processors/Port.h
index f3c7bbb5fee..2d39f2dd6be 100644
--- a/src/Processors/Port.h
+++ b/src/Processors/Port.h
@@ -38,7 +38,7 @@ public:
         UInt64 version = 0;
         UInt64 prev_version = 0;
 
-        void inline ALWAYS_INLINE update()
+        void ALWAYS_INLINE update()
         {
             if (version == prev_version && update_list)
                 update_list->push_back(id);
@@ -46,7 +46,7 @@ public:
             ++version;
         }
 
-        void inline ALWAYS_INLINE trigger() { prev_version = version; }
+        void ALWAYS_INLINE trigger() { prev_version = version; }
     };
 
 protected:
@@ -249,7 +249,7 @@ public:
     }
 
 protected:
-    void inline ALWAYS_INLINE updateVersion()
+    void ALWAYS_INLINE updateVersion()
     {
         if (likely(update_info))
             update_info->update();
diff --git a/src/Server/HTTPHandler.h b/src/Server/HTTPHandler.h
index ae4cf034276..a96402247a2 100644
--- a/src/Server/HTTPHandler.h
+++ b/src/Server/HTTPHandler.h
@@ -77,12 +77,12 @@ private:
         bool exception_is_written = false;
         std::function<void(WriteBuffer &, const String &)> exception_writer;
 
-        inline bool hasDelayed() const
+        bool hasDelayed() const
         {
             return out_maybe_delayed_and_compressed != out_maybe_compressed.get();
         }
 
-        inline void finalize()
+        void finalize()
         {
             if (finalized)
                 return;
@@ -94,7 +94,7 @@ private:
                 out->finalize();
         }
 
-        inline bool isFinalized() const
+        bool isFinalized() const
         {
             return finalized;
         }
diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h
index a5dea2f63db..4c8c7974005 100644
--- a/src/Storages/Cache/ExternalDataSourceCache.h
+++ b/src/Storages/Cache/ExternalDataSourceCache.h
@@ -70,7 +70,7 @@ public:
 
     void initOnce(ContextPtr context, const String & root_dir_, size_t limit_size_, size_t bytes_read_before_flush_);
 
-    inline bool isInitialized() const { return initialized; }
+    bool isInitialized() const { return initialized; }
 
     std::pair<std::unique_ptr<LocalFileHolder>, std::unique_ptr<ReadBuffer>>
     createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr<ReadBuffer> & read_buffer, bool is_random_accessed);
diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h
index 782a6b89519..22b3d64b1db 100644
--- a/src/Storages/Cache/RemoteCacheController.h
+++ b/src/Storages/Cache/RemoteCacheController.h
@@ -45,41 +45,41 @@ public:
      */
     void waitMoreData(size_t start_offset_, size_t end_offset_);
 
-    inline size_t size() const { return current_offset; }
+    size_t size() const { return current_offset; }
 
-    inline const std::filesystem::path & getLocalPath() { return local_path; }
-    inline String getRemotePath() const { return file_metadata_ptr->remote_path; }
+    const std::filesystem::path & getLocalPath() { return local_path; }
+    String getRemotePath() const { return file_metadata_ptr->remote_path; }
 
-    inline UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; }
+    UInt64 getLastModificationTimestamp() const { return file_metadata_ptr->last_modification_timestamp; }
     bool isModified(IRemoteFileMetadataPtr file_metadata_);
-    inline void markInvalid()
+    void markInvalid()
     {
         std::lock_guard lock(mutex);
         valid = false;
     }
-    inline bool isValid()
+    bool isValid()
     {
         std::lock_guard lock(mutex);
         return valid;
     }
-    inline bool isEnable()
+    bool isEnable()
     {
         std::lock_guard lock(mutex);
         return is_enable;
 
     }
-    inline void disable()
+    void disable()
     {
         std::lock_guard lock(mutex);
         is_enable = false;
     }
-    inline void enable()
+    void enable()
     {
         std::lock_guard lock(mutex);
         is_enable = true;
     }
     IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; }
-    inline size_t getFileSize() const { return file_metadata_ptr->file_size; }
+    size_t getFileSize() const { return file_metadata_ptr->file_size; }
 
     void startBackgroundDownload(std::unique_ptr<ReadBuffer> in_readbuffer_, BackgroundSchedulePool & thread_pool);
 
diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h
index 536214e159f..20d005c8038 100644
--- a/src/Storages/Hive/HiveFile.h
+++ b/src/Storages/Hive/HiveFile.h
@@ -65,8 +65,8 @@ public:
         {ORC_INPUT_FORMAT, FileFormat::ORC},
     };
 
-    static inline bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); }
-    static inline FileFormat toFileFormat(const String & format_class)
+    static bool isFormatClass(const String & format_class) { return VALID_HDFS_FORMATS.contains(format_class); }
+    static FileFormat toFileFormat(const String & format_class)
     {
         if (isFormatClass(format_class))
         {
diff --git a/src/Storages/Kafka/KafkaConsumer.h b/src/Storages/Kafka/KafkaConsumer.h
index f160d1c0855..a3bc97779b3 100644
--- a/src/Storages/Kafka/KafkaConsumer.h
+++ b/src/Storages/Kafka/KafkaConsumer.h
@@ -82,17 +82,17 @@ public:
 
     auto pollTimeout() const { return poll_timeout; }
 
-    inline bool hasMorePolledMessages() const
+    bool hasMorePolledMessages() const
     {
         return (stalled_status == NOT_STALLED) && (current != messages.end());
     }
 
-    inline bool polledDataUnusable() const
+    bool polledDataUnusable() const
     {
         return  (stalled_status != NOT_STALLED) && (stalled_status != NO_MESSAGES_RETURNED);
     }
 
-    inline bool isStalled() const { return stalled_status != NOT_STALLED; }
+    bool isStalled() const { return stalled_status != NOT_STALLED; }
 
     void storeLastReadMessageOffset();
     void resetToLastCommitted(const char * msg);
diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h
index c9a4887cca3..bf29aaf32d0 100644
--- a/src/Storages/MergeTree/BackgroundProcessList.h
+++ b/src/Storages/MergeTree/BackgroundProcessList.h
@@ -87,7 +87,7 @@ public:
 
     virtual void onEntryCreate(const Entry & /* entry */) {}
     virtual void onEntryDestroy(const Entry & /* entry */) {}
-    virtual inline ~BackgroundProcessList() = default;
+    virtual ~BackgroundProcessList() = default;
 };
 
 }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index c380f99060e..c63f811363a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -456,23 +456,23 @@ public:
     /// File with compression codec name which was used to compress part columns
     /// by default. Some columns may have their own compression codecs, but
     /// default will be stored in this file.
-    static inline constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt";
+    static constexpr auto DEFAULT_COMPRESSION_CODEC_FILE_NAME = "default_compression_codec.txt";
 
     /// "delete-on-destroy.txt" is deprecated. It is no longer being created, only is removed.
-    static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt";
+    static constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED = "delete-on-destroy.txt";
 
-    static inline constexpr auto UUID_FILE_NAME = "uuid.txt";
+    static constexpr auto UUID_FILE_NAME = "uuid.txt";
 
     /// File that contains information about kinds of serialization of columns
     /// and information that helps to choose kind of serialization later during merging
     /// (number of rows, number of rows with default values, etc).
-    static inline constexpr auto SERIALIZATION_FILE_NAME = "serialization.json";
+    static constexpr auto SERIALIZATION_FILE_NAME = "serialization.json";
 
     /// Version used for transactions.
-    static inline constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt";
+    static constexpr auto TXN_VERSION_METADATA_FILE_NAME = "txn_version.txt";
 
 
-    static inline constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt";
+    static constexpr auto METADATA_VERSION_FILE_NAME = "metadata_version.txt";
 
     /// One of part files which is used to check how many references (I'd like
     /// to say hardlinks, but it will confuse even more) we have for the part
@@ -484,7 +484,7 @@ public:
     /// it was mutation without any change for source part. In this case we
     /// really don't need to remove data from remote FS and need only decrement
     /// reference counter locally.
-    static inline constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt";
+    static constexpr auto FILE_FOR_REFERENCES_CHECK = "checksums.txt";
 
     /// Checks that all TTLs (table min/max, column ttls, so on) for part
     /// calculated. Part without calculated TTL may exist if TTL was added after
diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
index b19c42c8db8..c1514416301 100644
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@@ -41,13 +41,13 @@ struct MergeTreeBlockSizePredictor
     void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay());
 
     /// Return current block size (after update())
-    inline size_t getBlockSize() const
+    size_t getBlockSize() const
     {
         return block_size_bytes;
     }
 
     /// Predicts what number of rows should be read to exhaust byte quota per column
-    inline size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const
+    size_t estimateNumRowsForMaxSizeColumn(size_t bytes_quota) const
     {
         double max_size_per_row = std::max<double>(std::max<size_t>(max_size_per_row_fixed, 1), max_size_per_row_dynamic);
         return (bytes_quota > block_size_rows * max_size_per_row)
@@ -56,14 +56,14 @@ struct MergeTreeBlockSizePredictor
     }
 
     /// Predicts what number of rows should be read to exhaust byte quota per block
-    inline size_t estimateNumRows(size_t bytes_quota) const
+    size_t estimateNumRows(size_t bytes_quota) const
     {
         return (bytes_quota > block_size_bytes)
             ? static_cast<size_t>((bytes_quota - block_size_bytes) / std::max<size_t>(1, static_cast<size_t>(bytes_per_row_current)))
             : 0;
     }
 
-    inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay())
+    void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay())
     {
         double alpha = std::pow(1. - decay, rows_was_read);
         double current_ration = rows_was_filtered / std::max(1.0, static_cast<double>(rows_was_read));
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
index 85006c3ffde..87445c99ade 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
+++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h
@@ -64,8 +64,8 @@ public:
     std::string describe() const;
 };
 
-constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; }
-constexpr inline auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; }
+constexpr auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; }
+constexpr auto getAdaptiveMrkSizeWide() { return sizeof(UInt64) * 3; }
 inline size_t getAdaptiveMrkSizeCompact(size_t columns_num);
 
 }
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index 9d086e1dc37..f96206ce657 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -307,7 +307,7 @@ public:
     /// Get best replica having this partition on a same type remote disk
     String getSharedDataReplica(const IMergeTreeDataPart & part, const DataSourceDescription & data_source_description) const;
 
-    inline const String & getReplicaName() const { return replica_name; }
+    const String & getReplicaName() const { return replica_name; }
 
     /// Restores table metadata if ZooKeeper lost it.
     /// Used only on restarted readonly replicas (not checked). All active (Active) parts are moved to detached/
diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h
index dd1d64973d1..907a3fc0b13 100644
--- a/src/Storages/UVLoop.h
+++ b/src/Storages/UVLoop.h
@@ -57,9 +57,9 @@ public:
         }
     }
 
-    inline uv_loop_t * getLoop() { return loop_ptr.get(); }
+    uv_loop_t * getLoop() { return loop_ptr.get(); }
 
-    inline const uv_loop_t * getLoop() const { return loop_ptr.get(); }
+    const uv_loop_t * getLoop() const { return loop_ptr.get(); }
 
 private:
     std::unique_ptr<uv_loop_t> loop_ptr;
diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h
index 1946d8e8905..ed7f80e5df9 100644
--- a/src/TableFunctions/ITableFunction.h
+++ b/src/TableFunctions/ITableFunction.h
@@ -39,7 +39,7 @@ class Context;
 class ITableFunction : public std::enable_shared_from_this<ITableFunction>
 {
 public:
-    static inline std::string getDatabaseName() { return "_table_function"; }
+    static std::string getDatabaseName() { return "_table_function"; }
 
     /// Get the main function name.
     virtual std::string getName() const = 0;

From 75d163da12b8c6b5671d40f33eaa12e0409f2566 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Sun, 19 May 2024 12:17:01 +0000
Subject: [PATCH 183/392] Fix tests

---
 .../03159_dynamic_type_all_types.reference           | 12 ++----------
 .../0_stateless/03159_dynamic_type_all_types.sql     |  8 ++------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
index abecca893f9..72c5b90dbba 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.reference
@@ -109,10 +109,6 @@ MultiPolygon	[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)]
 Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
 Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
-Object(\'json\')	{"1":"2"}
-Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
@@ -258,10 +254,6 @@ MultiPolygon	[[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)]
 Map(Dynamic, Dynamic)	{'11':'v1','22':'1'}
 Nested(x UInt32, y String)	[(1,'aa'),(2,'bb')]
 Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String))	[(1,(2,['aa','bb']),[(3,'cc'),(4,'dd')]),(5,(6,['ee','ff']),[(7,'gg'),(8,'hh')])]
-Object(\'json\')	{"1":"2"}
-Object(Nullable(\'json\'))	{"k1":1,"k2":2,"1":null,"2":null,"2020-10-10":null}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":null,"2":null,"2020-10-10":"foo"}
-Object(Nullable(\'json\'))	{"k1":null,"k2":null,"1":2,"2":3,"2020-10-10":null}
 Point	(1.23,4.5600000000000005)
 Ring	[(1.23,4.5600000000000005),(2.34,5.67)]
 String	string
@@ -296,5 +288,5 @@ UInt256	1
 UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639934
 UInt256	115792089237316195423570985008687907853269984665640564039457584007913129639935
 
-50
-50
+48
+48
diff --git a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
index 64fab07ed4f..d302205ca23 100644
--- a/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
+++ b/tests/queries/0_stateless/03159_dynamic_type_all_types.sql
@@ -49,10 +49,6 @@ INSERT INTO t VALUES ('1'::Bool), (0::Bool);
 -- Dates: use Date and Date32 for days, and DateTime and DateTime64 for instances in time
 INSERT INTO t VALUES ('2022-01-01'::Date), ('2022-01-01'::Date32), ('2022-01-01 01:01:01'::DateTime), ('2022-01-01 01:01:01.011'::DateTime64);
 
--- JSON
-INSERT INTO t VALUES ('{"1":"2"}'::JSON);
-INSERT INTO t FORMAT JSONEachRow {"d" : {"k1" : 1, "k2" : 2}} {"d" : {"1" : 2, "2" : 3}} {"d" : {"2020-10-10" : "foo"}};
-
 -- UUID
 INSERT INTO t VALUES ('dededdb6-7835-4ce4-8d11-b5de6f2820e9'::UUID);
 INSERT INTO t VALUES ('00000000-0000-0000-0000-000000000000'::UUID);
@@ -86,13 +82,13 @@ INSERT INTO t VALUES (interval '1' day), (interval '2' month), (interval '3' yea
 INSERT INTO t VALUES ([(1, 'aa'), (2, 'bb')]::Nested(x UInt32, y String));
 INSERT INTO t VALUES ([(1, (2, ['aa', 'bb']), [(3, 'cc'), (4, 'dd')]), (5, (6, ['ee', 'ff']), [(7, 'gg'), (8, 'hh')])]::Nested(x UInt32, y Tuple(y1 UInt32, y2 Array(String)), z Nested(z1 UInt32, z2 String)));
 
-SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d);
+SELECT dynamicType(d), d FROM t ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d;
 
 CREATE TABLE t2 (d Dynamic(max_types=255)) ENGINE = Memory;
 INSERT INTO t2 SELECT * FROM t;
 
 SELECT '';
-SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d, toString(d);
+SELECT dynamicType(d), d FROM t2 ORDER BY substring(dynamicType(d),1,1), length(dynamicType(d)), d;
 
 SELECT '';
 SELECT uniqExact(dynamicType(d)) t_ FROM t;

From bb0fcc929695701ccde2ca49298e50792636fa1c Mon Sep 17 00:00:00 2001
From: pufit <pufit@clickhouse.com>
Date: Sun, 19 May 2024 08:33:37 -0400
Subject: [PATCH 184/392] better tests

---
 ...te_view_with_sql_security_option.reference |  2 +
 ...84_create_view_with_sql_security_option.sh | 78 +++++++++----------
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
index 931cf8ac19c..0589fdeef04 100644
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference
@@ -24,6 +24,8 @@ OK
 2
 OK
 OK
+OK
+100
 100
 ===== TestGrants =====
 OK
diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
index f1da343da36..f32aee44bee 100755
--- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
+++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh
@@ -159,6 +159,45 @@ ${CLICKHOUSE_CLIENT} --query "REVOKE SELECT ON $db.test_table FROM $user1"
 (( $(${CLICKHOUSE_CLIENT} --user $user2 --query "SELECT * FROM $db.test_mv_4" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED"
 (( $(${CLICKHOUSE_CLIENT} --query "INSERT INTO $db.test_table VALUES ('foo'), ('bar');" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED"
 
+${CLICKHOUSE_CLIENT} --multiquery <<EOF
+CREATE TABLE $db.source
+(
+    a UInt64
+)
+ENGINE = MergeTree
+ORDER BY a;
+
+CREATE TABLE $db.destination1
+(
+    a UInt64
+)
+ENGINE = MergeTree
+ORDER BY a;
+
+CREATE TABLE $db.destination2
+(
+    a UInt64
+)
+ENGINE = MergeTree
+ORDER BY a;
+
+CREATE MATERIALIZED VIEW $db.mv1 TO $db.destination1
+AS SELECT *
+FROM $db.source;
+
+ALTER TABLE $db.mv1 MODIFY DEFINER=default SQL SECURITY DEFINER;
+
+CREATE MATERIALIZED VIEW $db.mv2 TO $db.destination2
+AS SELECT *
+FROM $db.destination1;
+EOF
+
+(( $(${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100" 2>&1 | grep -c "Not enough privileges") >= 1 )) && echo "OK" || echo "UNEXPECTED"
+${CLICKHOUSE_CLIENT} --query "GRANT INSERT ON $db.source TO $user2"
+${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100"
+
+${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1"
+${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2"
 
 echo "===== TestGrants ====="
 ${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1"
@@ -192,45 +231,6 @@ ${CLICKHOUSE_CLIENT} --user $user1 --query "
 
 ${CLICKHOUSE_CLIENT} --query "GRANT SET DEFINER ON $user2 TO $user1"
 
-${CLICKHOUSE_CLIENT} --multiquery <<EOF
-CREATE TABLE $db.source
-(
-    a UInt64
-)
-ENGINE = MergeTree
-ORDER BY a;
-
-CREATE TABLE $db.destination1
-(
-    a UInt64
-)
-ENGINE = MergeTree
-ORDER BY a;
-
-CREATE TABLE $db.destination2
-(
-    a UInt64
-)
-ENGINE = MergeTree
-ORDER BY a;
-
-CREATE MATERIALIZED VIEW $db.mv1 TO $db.destination1
-AS SELECT *
-FROM $db.source;
-
-ALTER TABLE $db.mv1 MODIFY DEFINER=default SQL SECURITY DEFINER;
-
-CREATE MATERIALIZED VIEW $db.mv2 TO $db.destination2
-AS SELECT *
-FROM $db.destination1;
-
-GRANT INSERT ON $db.source TO $user2;
-EOF
-
-${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM generateRandom() LIMIT 100"
-${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2"
-
-
 echo "===== TestRowPolicy ====="
 ${CLICKHOUSE_CLIENT} --multiquery <<EOF
 CREATE TABLE $db.test_row_t (x Int32, y Int32) ENGINE = MergeTree ORDER BY x;

From ed28cac4c2ba9a33a07049d227d599534be60cf9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 12:51:14 +0000
Subject: [PATCH 185/392] Restore the warning

---
 .clang-tidy                                   |  2 ++
 src/Common/CurrentThread.h                    |  2 +-
 src/Common/findExtreme.cpp                    |  4 ++--
 src/Functions/ExtractString.h                 |  6 +++---
 .../FunctionsLanguageClassification.cpp       |  2 +-
 .../FunctionsProgrammingClassification.cpp    |  2 +-
 src/Functions/FunctionsStringHash.cpp         | 20 +++++++++----------
 src/Functions/FunctionsStringSimilarity.cpp   |  6 +++---
 .../FunctionsTonalityClassification.cpp       |  2 +-
 src/Functions/PolygonUtils.h                  |  2 +-
 src/Processors/Port.h                         |  6 +++---
 11 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 66417c41c46..7e8f604467b 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -118,6 +118,8 @@ Checks: [
   '-readability-magic-numbers',
   '-readability-named-parameter',
   '-readability-redundant-declaration',
+  '-readability-redundant-inline-specifier', # generally useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE).
+                                             # it has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM
   '-readability-simplify-boolean-expr',
   '-readability-suspicious-call-argument',
   '-readability-uppercase-literal-suffix',
diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index 8dade8c6fd5..e1eb926c951 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -62,7 +62,7 @@ public:
     static void updatePerformanceCountersIfNeeded();
 
     static ProfileEvents::Counters & getProfileEvents();
-    static MemoryTracker * getMemoryTracker()
+    ALWAYS_INLINE inline static MemoryTracker * getMemoryTracker()
     {
         if (!current_thread) [[unlikely]]
             return nullptr;
diff --git a/src/Common/findExtreme.cpp b/src/Common/findExtreme.cpp
index a99b1f2dd3d..ce3bbb86d7c 100644
--- a/src/Common/findExtreme.cpp
+++ b/src/Common/findExtreme.cpp
@@ -11,13 +11,13 @@ namespace DB
 template <has_find_extreme_implementation T>
 struct MinComparator
 {
-    static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::min(a, b); }
+    static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); }
 };
 
 template <has_find_extreme_implementation T>
 struct MaxComparator
 {
-    static ALWAYS_INLINE const T & cmp(const T & a, const T & b) { return std::max(a, b); }
+    static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); }
 };
 
 MULTITARGET_FUNCTION_AVX2_SSE42(
diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h
index 5b8fa41958a..aa0e1b04835 100644
--- a/src/Functions/ExtractString.h
+++ b/src/Functions/ExtractString.h
@@ -20,7 +20,7 @@ namespace DB
 // includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word
 struct ExtractStringImpl
 {
-    static ALWAYS_INLINE const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
+    static ALWAYS_INLINE inline const UInt8 * readOneWord(const UInt8 *& pos, const UInt8 * end)
     {
         // jump separators
         while (pos < end && isUTF8Sep(*pos))
@@ -35,10 +35,10 @@ struct ExtractStringImpl
     }
 
     // we use ASCII non-alphanum character as UTF8 separator
-    static ALWAYS_INLINE bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); }
+    static ALWAYS_INLINE inline bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); }
 
     // read one UTF8 character
-    static ALWAYS_INLINE void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end)
+    static ALWAYS_INLINE inline void readOneUTF8Code(const UInt8 *& pos, const UInt8 * end)
     {
         size_t length = UTF8::seqLength(*pos);
 
diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp
index 94391606762..55485d41ce0 100644
--- a/src/Functions/FunctionsLanguageClassification.cpp
+++ b/src/Functions/FunctionsLanguageClassification.cpp
@@ -31,7 +31,7 @@ extern const int SUPPORT_IS_DISABLED;
 
 struct FunctionDetectLanguageImpl
 {
-    static ALWAYS_INLINE std::string_view codeISO(std::string_view code_string)
+    static ALWAYS_INLINE inline std::string_view codeISO(std::string_view code_string)
     {
         if (code_string.ends_with("-Latn"))
             code_string.remove_suffix(code_string.size() - 5);
diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp
index 8e9eff50aab..a93e1d9a87d 100644
--- a/src/Functions/FunctionsProgrammingClassification.cpp
+++ b/src/Functions/FunctionsProgrammingClassification.cpp
@@ -21,7 +21,7 @@ namespace DB
 struct FunctionDetectProgrammingLanguageImpl
 {
     /// Calculate total weight
-    static ALWAYS_INLINE Float64 stateMachine(
+    static ALWAYS_INLINE inline Float64 stateMachine(
         const FrequencyHolder::Map & standard,
         const std::unordered_map<String, Float64> & model)
     {
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index cd33564caf9..0bf6e39e651 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -99,7 +99,7 @@ struct Hash
     }
 
     template <bool CaseInsensitive>
-    static ALWAYS_INLINE UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size)
+    static ALWAYS_INLINE inline UInt64 shingleHash(UInt64 crc, const UInt8 * start, size_t size)
     {
         if (size & 1)
         {
@@ -153,7 +153,7 @@ struct Hash
     }
 
     template <bool CaseInsensitive>
-    static ALWAYS_INLINE UInt64 shingleHash(const std::vector<BytesRef> & shingle, size_t offset = 0)
+    static ALWAYS_INLINE inline UInt64 shingleHash(const std::vector<BytesRef> & shingle, size_t offset = 0)
     {
         UInt64 crc = -1ULL;
 
@@ -177,14 +177,14 @@ struct SimHashImpl
     static constexpr size_t min_word_size = 4;
 
     /// Update fingerprint according to hash_value bits.
-    static ALWAYS_INLINE void updateFingerVector(Int64 * finger_vec, UInt64 hash_value)
+    static ALWAYS_INLINE inline void updateFingerVector(Int64 * finger_vec, UInt64 hash_value)
     {
         for (size_t i = 0; i < 64; ++i)
             finger_vec[i] += (hash_value & (1ULL << i)) ? 1 : -1;
     }
 
     /// Return a 64 bit value according to finger_vec.
-    static ALWAYS_INLINE UInt64 getSimHash(const Int64 * finger_vec)
+    static ALWAYS_INLINE inline UInt64 getSimHash(const Int64 * finger_vec)
     {
         UInt64 res = 0;
 
@@ -200,7 +200,7 @@ struct SimHashImpl
     // for each ngram, calculate a 64 bit hash value, and update the vector according the hash value
     // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0
 
-    static ALWAYS_INLINE UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE inline UInt64 ngramHashASCII(const UInt8 * data, size_t size, size_t shingle_size)
     {
         if (size < shingle_size)
             return Hash::shingleHash<CaseInsensitive>(-1ULL, data, size);
@@ -217,7 +217,7 @@ struct SimHashImpl
         return getSimHash(finger_vec);
     }
 
-    static ALWAYS_INLINE UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE inline UInt64 ngramHashUTF8(const UInt8 * data, size_t size, size_t shingle_size)
     {
         const UInt8 * start = data;
         const UInt8 * end = data + size;
@@ -259,7 +259,7 @@ struct SimHashImpl
     // 2. next, we extract one word each time, and calculate a new hash value of the new word,then use the latest N hash
     // values to calculate the next word shingle hash value
 
-    static ALWAYS_INLINE UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size)
+    static ALWAYS_INLINE inline UInt64 wordShingleHash(const UInt8 * data, size_t size, size_t shingle_size)
     {
         const UInt8 * start = data;
         const UInt8 * end = data + size;
@@ -400,7 +400,7 @@ struct MinHashImpl
     using MaxHeap = Heap<std::less<>>;
     using MinHeap = Heap<std::greater<>>;
 
-    static ALWAYS_INLINE void ngramHashASCII(
+    static ALWAYS_INLINE inline void ngramHashASCII(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
@@ -429,7 +429,7 @@ struct MinHashImpl
         }
     }
 
-    static ALWAYS_INLINE void ngramHashUTF8(
+    static ALWAYS_INLINE inline void ngramHashUTF8(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
@@ -472,7 +472,7 @@ struct MinHashImpl
     // MinHash word shingle hash value calculate function: String ->Tuple(UInt64, UInt64)
     // for each word shingle, we calculate a hash value, but in fact, we just maintain the
     // K minimum and K maximum hash value
-    static ALWAYS_INLINE void wordShingleHash(
+    static ALWAYS_INLINE inline void wordShingleHash(
         MinHeap & min_heap,
         MaxHeap & max_heap,
         const UInt8 * data,
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index 5224c76d7b0..7b3f2337c89 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -85,7 +85,7 @@ struct NgramDistanceImpl
     }
 
     template <size_t Offset, class Container, size_t... I>
-    static ALWAYS_INLINE void unrollLowering(Container & cont, const std::index_sequence<I...> &)
+    static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence<I...> &)
     {
         ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
     }
@@ -195,7 +195,7 @@ struct NgramDistanceImpl
     }
 
     template <bool save_ngrams>
-    static ALWAYS_INLINE size_t calculateNeedleStats(
+    static ALWAYS_INLINE inline size_t calculateNeedleStats(
         const char * data,
         const size_t size,
         NgramCount * ngram_stats,
@@ -228,7 +228,7 @@ struct NgramDistanceImpl
     }
 
     template <bool reuse_stats>
-    static ALWAYS_INLINE UInt64 calculateHaystackStatsAndMetric(
+    static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
         const char * data,
         const size_t size,
         NgramCount * ngram_stats,
diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp
index a8cc09186f6..3de38d99c88 100644
--- a/src/Functions/FunctionsTonalityClassification.cpp
+++ b/src/Functions/FunctionsTonalityClassification.cpp
@@ -18,7 +18,7 @@ namespace DB
   */
 struct FunctionDetectTonalityImpl
 {
-    static ALWAYS_INLINE Float32 detectTonality(
+    static ALWAYS_INLINE inline Float32 detectTonality(
         const UInt8 * str,
         const size_t str_len,
         const FrequencyHolder::Map & emotional_dict)
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index 0c57fd7f0b5..4ab146b085f 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -124,7 +124,7 @@ public:
 
     bool hasEmptyBound() const { return has_empty_bound; }
 
-    bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
+    bool ALWAYS_INLINE inline contains(CoordinateType x, CoordinateType y) const
     {
         Point point(x, y);
 
diff --git a/src/Processors/Port.h b/src/Processors/Port.h
index 2d39f2dd6be..f3c7bbb5fee 100644
--- a/src/Processors/Port.h
+++ b/src/Processors/Port.h
@@ -38,7 +38,7 @@ public:
         UInt64 version = 0;
         UInt64 prev_version = 0;
 
-        void ALWAYS_INLINE update()
+        void inline ALWAYS_INLINE update()
         {
             if (version == prev_version && update_list)
                 update_list->push_back(id);
@@ -46,7 +46,7 @@ public:
             ++version;
         }
 
-        void ALWAYS_INLINE trigger() { prev_version = version; }
+        void inline ALWAYS_INLINE trigger() { prev_version = version; }
     };
 
 protected:
@@ -249,7 +249,7 @@ public:
     }
 
 protected:
-    void ALWAYS_INLINE updateVersion()
+    void inline ALWAYS_INLINE updateVersion()
     {
         if (likely(update_info))
             update_info->update();

From 639f7f166f6ba1f4c078b30e66fd40605b9866f5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 12:53:17 +0000
Subject: [PATCH 186/392] Fix typo

---
 .clang-tidy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.clang-tidy b/.clang-tidy
index 7e8f604467b..7dafaeb9e3f 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -119,7 +119,7 @@ Checks: [
   '-readability-named-parameter',
   '-readability-redundant-declaration',
   '-readability-redundant-inline-specifier', # generally useful but incompatible with __attribute((always_inline))__ (aka. ALWAYS_INLINE).
-                                             # it has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM
+                                             # ALWAYS_INLINE has an effect only if combined with `inline`: https://godbolt.org/z/Eefd74qdM
   '-readability-simplify-boolean-expr',
   '-readability-suspicious-call-argument',
   '-readability-uppercase-literal-suffix',

From ff392b0aeb668d34049dfaee0966fba91186227c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 13:00:30 +0000
Subject: [PATCH 187/392] Minor corrections

---
 src/Common/CurrentThread.h   | 2 +-
 src/Functions/PolygonUtils.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/CurrentThread.h b/src/Common/CurrentThread.h
index e1eb926c951..53b61ba315f 100644
--- a/src/Common/CurrentThread.h
+++ b/src/Common/CurrentThread.h
@@ -62,7 +62,7 @@ public:
     static void updatePerformanceCountersIfNeeded();
 
     static ProfileEvents::Counters & getProfileEvents();
-    ALWAYS_INLINE inline static MemoryTracker * getMemoryTracker()
+    inline ALWAYS_INLINE static MemoryTracker * getMemoryTracker()
     {
         if (!current_thread) [[unlikely]]
             return nullptr;
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index 4ab146b085f..c4851718da6 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -124,7 +124,7 @@ public:
 
     bool hasEmptyBound() const { return has_empty_bound; }
 
-    bool ALWAYS_INLINE inline contains(CoordinateType x, CoordinateType y) const
+    inline bool ALWAYS_INLINE contains(CoordinateType x, CoordinateType y) const
     {
         Point point(x, y);
 

From f143ae6969c77b5ebe44ec4865251caaa18db7fa Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 14:31:21 +0000
Subject: [PATCH 188/392] Fix build

---
 src/Coordination/KeeperServer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index b07c90b8660..736a01443ce 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -990,7 +990,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
         raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
         return Accepted;
     }
-    chassert(false);
+    std::unreachable();
 }
 
 ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)

From 513900cb524d7b3e96cfbe8b8b56d9b0b0eb6070 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Sun, 19 May 2024 15:44:19 +0000
Subject: [PATCH 189/392] assume columns from projection are aggregates

---
 src/Planner/PlannerExpressionAnalysis.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index e7d553af944..399bbfc67cf 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -454,6 +454,13 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
             before_sort_actions_inputs_name_to_node.emplace(node->result_name, node);
 
         std::unordered_set<std::string_view> aggregation_keys;
+
+        auto projection_expression_dag = std::make_shared<ActionsDAG>();
+        for (const auto & node : query_node.getProjection())
+            actions_visitor.visit(projection_expression_dag, node);
+        for (const auto & node : projection_expression_dag->getNodes())
+            aggregation_keys.insert(node.result_name);
+
         if (aggregation_analysis_result_optional)
             aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end());
 

From 1293a0f79572213f2cd90f5a6f09fbe39d8dbf9e Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 18:47:58 +0000
Subject: [PATCH 190/392] Cosmetics, pt. I

---
 src/Functions/generateSnowflakeID.cpp | 95 +++++++++++++--------------
 1 file changed, 45 insertions(+), 50 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 1decda0ab46..28fc2eb6b05 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -18,8 +18,7 @@ namespace ErrorCodes
 namespace
 {
 
-/*
-  Snowflake ID 
+/* Snowflake ID
   https://en.wikipedia.org/wiki/Snowflake_ID
 
  0                   1                   2                   3
@@ -30,35 +29,34 @@ namespace
 |                   |     machine_id    |    machine_seq_num    |
 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
 
-- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
-- The middle 10 bits are the machine ID.
-- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
+- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
+- The middle 10 bits are the machine ID
+- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
 */
 
-constexpr auto timestamp_size = 41;
-constexpr auto machine_id_size = 10;
-constexpr auto machine_seq_num_size = 12;
+constexpr auto timestamp_bits_count = 41;
+constexpr auto machine_id_bits_count = 10;
+constexpr auto machine_seq_num_bits_count = 12;
 
-constexpr int64_t timestamp_mask = ((1LL << timestamp_size) - 1) << (machine_id_size + machine_seq_num_size);
-constexpr int64_t machine_id_mask = ((1LL << machine_id_size) - 1) << machine_seq_num_size;
-constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_size) - 1;
+constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count);
+constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
+constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1;
 constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
 
 Int64 getMachineID()
 {
-    auto serverUUID = ServerUUID::get();
-
-    // hash serverUUID into 64 bits
-    Int64 h = UUIDHelpers::getHighBytes(serverUUID);
-    Int64 l = UUIDHelpers::getLowBytes(serverUUID);
-    return ((h * 11) ^ (l * 17)) & machine_id_mask;
+    UUID server_uuid = ServerUUID::get();
+    /// hash into 64 bits
+    UInt64 hi = UUIDHelpers::getHighBytes(server_uuid);
+    UInt64 lo = UUIDHelpers::getLowBytes(server_uuid);
+    return ((hi * 11) ^ (lo * 17)) & machine_id_mask;
 }
 
 Int64 getTimestamp()
 {
-    const auto tm_point = std::chrono::system_clock::now();
-    return std::chrono::duration_cast<std::chrono::milliseconds>(
-            tm_point.time_since_epoch()).count() & ((1LL << timestamp_size) - 1);
+    auto now = std::chrono::system_clock::now();
+    auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+    return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1);
 }
 
 }
@@ -66,16 +64,11 @@ Int64 getTimestamp()
 class FunctionSnowflakeID : public IFunction
 {
 private:
-    mutable std::atomic<Int64> lowest_available_snowflake_id{0};
-    // 1 atomic value because we don't want to use mutex
+    mutable std::atomic<Int64> lowest_available_snowflake_id = 0; /// atomic to avoid a mutex
 
 public:
     static constexpr auto name = "generateSnowflakeID";
-
-    static FunctionPtr create(ContextPtr /*context*/)
-    {
-        return std::make_shared<FunctionSnowflakeID>();
-    }
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionSnowflakeID>(); }
 
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 0; }
@@ -95,31 +88,34 @@ public:
         return std::make_shared<DataTypeInt64>();
     }
 
-
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
     {
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
-        Int64 size64 = static_cast<Int64>(input_rows_count);
+
         vec_to.resize(input_rows_count);
 
         if (input_rows_count == 0) {
             return col_res;
         }
 
-        Int64 machine_id = getMachineID();
+        const Int64 machine_id = getMachineID();
         Int64 current_timestamp = getTimestamp();
         Int64 current_machine_seq_num;
 
-        Int64 available_id, next_available_id;
+        Int64 available_snowflake_id, next_available_snowflake_id;
+
+        const Int64 size64 = static_cast<Int64>(input_rows_count);
+
         do
         {
-            available_id = lowest_available_snowflake_id.load();
-            Int64 available_timestamp = (available_id & timestamp_mask) >> (machine_id_size + machine_seq_num_size);
-            Int64 available_machine_seq_num = available_id & machine_seq_num_mask;
+            available_snowflake_id = lowest_available_snowflake_id.load();
+            const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count);
+            const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask;
 
             if (current_timestamp > available_timestamp)
             {
+                /// handle overflow
                 current_machine_seq_num = 0;
             }
             else
@@ -128,24 +124,23 @@ public:
                 current_machine_seq_num = available_machine_seq_num;
             }
 
-            // calculate new `lowest_available_snowflake_id`
+            /// calculate new lowest_available_snowflake_id
+            const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
             Int64 new_timestamp;
-            Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
-            if (size64 >= seq_nums_in_current_timestamp_left) {
+            if (size64 >= seq_nums_in_current_timestamp_left)
                 new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
-            } else {
+            else
                 new_timestamp = current_timestamp;
-            }
-            Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
-            next_available_id = (new_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | new_machine_seq_num;
+            const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
+            next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num;
         }
-        while (!lowest_available_snowflake_id.compare_exchange_strong(available_id, next_available_id));
-        // failed CAS     => another thread updated `lowest_available_snowflake_id`
-        // successful CAS => we have our range of exclusive values
+        while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id));
+        /// failed CAS     => another thread updated `lowest_available_snowflake_id`
+        /// successful CAS => we have our range of exclusive values
 
-        for (Int64 & el : vec_to)
+        for (Int64 & to_row : vec_to)
         {
-            el = (current_timestamp << (machine_id_size + machine_seq_num_size)) | machine_id | current_machine_seq_num;
+            to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num;
             if (current_machine_seq_num++ == max_machine_seq_num)
             {
                 current_machine_seq_num = 0;
@@ -163,10 +158,10 @@ REGISTER_FUNCTION(GenerateSnowflakeID)
     factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
     {
         .description=R"(
-Generates Snowflake ID -- unique identificators contains:
-- The first 41 (+ 1 top zero bit) bits is timestamp in Unix time milliseconds
-- The middle 10 bits are the machine ID.
-- The last 12 bits decode to number of ids processed by the machine at the given millisecond.
+Generates a SnowflakeID -- unique identificators contains:
+- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
+- The middle 10 bits are the machine ID
+- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
 
 In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
 This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.

From 08a3c16a5aca95c73cc0ea1aaf2d57edb6acaef2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 18:53:51 +0000
Subject: [PATCH 191/392] Cosmetics, pt. II

---
 src/Functions/generateSnowflakeID.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 28fc2eb6b05..d70b8349cd8 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -105,7 +105,7 @@ public:
 
         Int64 available_snowflake_id, next_available_snowflake_id;
 
-        const Int64 size64 = static_cast<Int64>(input_rows_count);
+        const Int64 input_rows_count_signed = static_cast<Int64>(input_rows_count);
 
         do
         {
@@ -127,11 +127,11 @@ public:
             /// calculate new lowest_available_snowflake_id
             const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
             Int64 new_timestamp;
-            if (size64 >= seq_nums_in_current_timestamp_left)
-                new_timestamp = current_timestamp + 1 + (size64 - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
+            if (input_rows_count_signed >= seq_nums_in_current_timestamp_left)
+                new_timestamp = current_timestamp + 1 + (input_rows_count_signed - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
             else
                 new_timestamp = current_timestamp;
-            const Int64 new_machine_seq_num = (current_machine_seq_num + size64) & machine_seq_num_mask;
+            const Int64 new_machine_seq_num = (current_machine_seq_num + input_rows_count_signed) & machine_seq_num_mask;
             next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num;
         }
         while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id));

From e8d66bf4d79d4ee1f3b18a4ccb1865f3f7ce7294 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 19 May 2024 19:16:24 +0000
Subject: [PATCH 192/392] Cosmetics, pt. III

---
 src/Functions/serial.cpp | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp
index 3da2f4ce218..de3036ad242 100644
--- a/src/Functions/serial.cpp
+++ b/src/Functions/serial.cpp
@@ -17,16 +17,16 @@ namespace ErrorCodes
 class FunctionSerial : public IFunction
 {
 private:
-    mutable zkutil::ZooKeeperPtr zk{nullptr};
+    mutable zkutil::ZooKeeperPtr zk;
     ContextPtr context;
 
 public:
     static constexpr auto name = "serial";
 
-    explicit FunctionSerial(ContextPtr ctx) : context(ctx)
+    explicit FunctionSerial(ContextPtr context_) : context(context_)
     {
-        if (ctx->hasZooKeeper()) {
-            zk = ctx->getZooKeeper();
+        if (context->hasZooKeeper()) {
+            zk = context->getZooKeeper();
         }
     }
 
@@ -37,7 +37,6 @@ public:
 
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return 1; }
-
     bool isStateful() const override { return true; }
     bool isDeterministic() const override { return false; }
     bool isDeterministicInScopeOfQuery() const override { return false; }
@@ -74,14 +73,14 @@ public:
 
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
-        size_t size = input_rows_count;
-        vec_to.resize(size);
+
+        vec_to.resize(input_rows_count);
 
         const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString();
 
-        // CAS in ZooKeeper
-        // `get` value and version, `trySet` new with version check
-        // I didn't get how to do it with `multi`
+        /// CAS in ZooKeeper
+        /// `get` value and version, `trySet` new with version check
+        /// I didn't get how to do it with `multi`
 
         Int64 counter;
         std::string counter_path = serial_path + "/counter";
@@ -93,10 +92,10 @@ public:
         Coordination::Stat stat;
         while (true)
         {
-            std::string counter_string = zk->get(counter_path, &stat);
+            const String counter_string = zk->get(counter_path, &stat);
             counter = std::stoll(counter_string);
-            std::string updated_counter = std::to_string(counter + input_rows_count);
-            Coordination::Error err = zk->trySet(counter_path, updated_counter);
+            String updated_counter = std::to_string(counter + input_rows_count);
+            const Coordination::Error err = zk->trySet(counter_path, updated_counter);
             if (err == Coordination::Error::ZOK)
             {
                 // CAS is done
@@ -111,7 +110,7 @@ public:
         }
 
         // Make a result
-        for (auto& val : vec_to)
+        for (auto & val : vec_to)
         {
             val = counter;
             ++counter;
@@ -137,16 +136,16 @@ The server should be configured with a ZooKeeper.
         },
         .returned_value = "Sequential numbers of type Int64 starting from the previous counter value",
         .examples{
-            {"first call", "SELECT serial('name')", R"(
-┌─serial('name')─┐
+            {"first call", "SELECT serial('id1')", R"(
+┌─serial('id1')──┐
 │              1 │
 └────────────────┘)"},
-            {"second call", "SELECT serial('name')", R"(
-┌─serial('name')─┐
+            {"second call", "SELECT serial('id1')", R"(
+┌─serial('id1')──┐
 │              2 │
 └────────────────┘)"},
-            {"column call", "SELECT *, serial('name') FROM test_table", R"(
-┌─CounterID─┬─UserID─┬─ver─┬─serial('name')─┐
+            {"column call", "SELECT *, serial('id1') FROM test_table", R"(
+┌─CounterID─┬─UserID─┬─ver─┬─serial('id1')──┐
 │         1 │      3 │   3 │              3 │
 │         1 │      1 │   1 │              4 │
 │         1 │      2 │   2 │              5 │

From 5d848aa32f1127098895cc29ad3200b5b325768a Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Sun, 19 May 2024 23:20:40 +0800
Subject: [PATCH 193/392] update comment of method visitNullableBySteps, try to
 suppress clang-18 tidy warnings

Change-Id: I3119c44dc764caed0dc471f52ac5e634c75c7b50
---
 .../Impl/Parquet/ParquetDataValuesReader.cpp       | 14 +++++++++++---
 .../Formats/Impl/Parquet/ParquetDataValuesReader.h | 13 +++++++------
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
index 65f569ec264..b8e4db8700c 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.cpp
@@ -14,6 +14,17 @@ namespace ErrorCodes
     extern const int PARQUET_EXCEPTION;
 }
 
+RleValuesReader::RleValuesReader(
+    std::unique_ptr<arrow::bit_util::BitReader> bit_reader_, Int32 bit_width_)
+    : bit_reader(std::move(bit_reader_)), bit_width(bit_width_)
+{
+    if (unlikely(bit_width >= 64))
+    {
+        // e.g. in GetValue_ in bit_stream_utils.h, uint64 type is used to read bit values
+        throw Exception(ErrorCodes::PARQUET_EXCEPTION, "unsupported bit width {}", bit_width);
+    }
+}
+
 void RleValuesReader::nextGroup()
 {
     // refer to:
@@ -29,9 +40,6 @@ void RleValuesReader::nextGroup()
     {
         cur_group_size *= 8;
         cur_packed_bit_values.resize(cur_group_size);
-
-        // try to suppress clang tidy warnings by assertion
-        assert(bit_width < 64);
         bit_reader->GetBatch(bit_width, cur_packed_bit_values.data(), cur_group_size);
     }
     else
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 0f916ff862d..75adb55df7e 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -18,8 +18,7 @@ namespace DB
 class RleValuesReader
 {
 public:
-    RleValuesReader(std::unique_ptr<arrow::bit_util::BitReader> bit_reader_, Int32 bit_width_)
-        : bit_reader(std::move(bit_reader_)), bit_width(bit_width_) {}
+    RleValuesReader(std::unique_ptr<arrow::bit_util::BitReader> bit_reader_, Int32 bit_width_);
 
     /**
      * @brief Used when the bit_width is 0, so all elements have same value.
@@ -71,12 +70,14 @@ public:
      * @tparam IndividualNullVisitor A callback with signature: void(size_t cursor), used to process null value
      * @tparam SteppedValidVisitor  A callback with signature:
      *  void(size_t cursor, const std::vector<UInt8> & valid_index_steps)
-     *  for n valid elements with null value interleaved in a BitPacked group,
+     *  valid_index_steps records the gap size between two valid elements,
      *  i-th item in valid_index_steps describes how many elements there are
      *  from i-th valid element (include) to (i+1)-th valid element (exclude).
      *
-     *  take following BitPacked group with 2 valid elements for example:
-     *      null valid null null valid null
+     *  take following BitPacked group values for example, and assuming max_def_level is 1:
+     *      [1,   0,    1,   1,   0,    1    ]
+     *       null valid null null valid null
+     *  the second line shows the corresponding validation state,
      *  then the valid_index_steps has values [1, 3, 2].
      *  Please note that the the sum of valid_index_steps is same as elements number in this group.
      *
@@ -117,7 +118,7 @@ private:
     std::vector<Int32> cur_packed_bit_values;
     std::vector<UInt8> valid_index_steps;
 
-    Int32 bit_width;
+    const Int32 bit_width;
 
     UInt32 cur_group_size = 0;
     UInt32 cur_group_cursor = 0;

From ad5f6f27dff104f6229819be27fba3732226603e Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Mon, 20 May 2024 16:28:21 +0800
Subject: [PATCH 194/392] fix reader type, update comment

Change-Id: Iefec91bca223eedaabe302b7891808c6d94eed9d
---
 .../Impl/Parquet/ParquetDataValuesReader.h    |  1 +
 .../Impl/Parquet/ParquetRecordReader.cpp      | 29 ++++++++++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
index 75adb55df7e..fbccb612b3c 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
+++ b/src/Processors/Formats/Impl/Parquet/ParquetDataValuesReader.h
@@ -80,6 +80,7 @@ public:
      *  the second line shows the corresponding validation state,
      *  then the valid_index_steps has values [1, 3, 2].
      *  Please note that the the sum of valid_index_steps is same as elements number in this group.
+     *  TODO the definition of valid_index_steps should be updated when supporting nested types
      *
      * @tparam RepeatedVisitor  A callback with signature: void(bool is_valid, UInt32 cursor, UInt32 count)
      */
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 0b797dd66ad..69da40b47e6 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -27,6 +27,7 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int NOT_IMPLEMENTED;
     extern const int PARQUET_EXCEPTION;
 }
 
@@ -225,7 +226,7 @@ std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt32INT(const parque
 {
     switch (int_type.bit_width())
     {
-        case sizeof(Int32):
+        case 32:
         {
             if (int_type.is_signed())
                 return makeLeafReader<DataTypeInt32>();
@@ -241,7 +242,7 @@ std::unique_ptr<ParquetColumnReader> ColReaderFactory::fromInt64INT(const parque
 {
     switch (int_type.bit_width())
     {
-        case sizeof(Int64):
+        case 64:
         {
             if (int_type.is_signed())
                 return makeLeafReader<DataTypeInt64>();
@@ -312,16 +313,28 @@ ParquetRecordReader::ParquetRecordReader(
 {
     log = &Poco::Logger::get("ParquetRecordReader");
 
+    std::unordered_map<String, parquet::schema::NodePtr> parquet_columns;
+    auto root = file_reader->metadata()->schema()->group_node();
+    for (int i = 0; i < root->field_count(); ++i)
+    {
+        auto & node = root->field(i);
+        parquet_columns.emplace(node->name(), node);
+    }
+
     parquet_col_indice.reserve(header.columns());
     column_readers.reserve(header.columns());
     for (const auto & col_with_name : header)
     {
-        auto idx = file_reader->metadata()->schema()->ColumnIndex(col_with_name.name);
-        if (idx < 0)
-        {
-            auto msg = PreformattedMessage::create("can not find column with name: {}", col_with_name.name);
-            throw Exception(std::move(msg), ErrorCodes::PARQUET_EXCEPTION);
-        }
+        auto it = parquet_columns.find(col_with_name.name);
+        if (it == parquet_columns.end())
+            throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name);
+
+        auto node = it->second;
+        if (!node->is_primitive())
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader");
+
+        auto idx = file_reader->metadata()->schema()->ColumnIndex(*node);
+        chassert(idx >= 0);
         parquet_col_indice.push_back(idx);
     }
     if (reader_properties.pre_buffer())

From 84459052b6cddd9a5e1ca4bcd00e5edfc6e49f12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Mon, 20 May 2024 21:27:24 +0200
Subject: [PATCH 195/392] Prevent LOGICAL_ERROR on CREATE TABLE as
 MaterializedView

---
 src/Interpreters/InterpreterCreateQuery.cpp        |  7 +++++++
 .../0_stateless/03161_create_table_as_mv.reference |  0
 .../0_stateless/03161_create_table_as_mv.sql       | 14 ++++++++++++++
 3 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/03161_create_table_as_mv.reference
 create mode 100644 tests/queries/0_stateless/03161_create_table_as_mv.sql

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 519cbde588f..711693f71b1 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -977,6 +977,13 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
         if (as_create.is_ordinary_view)
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a View", qualified_name);
 
+        if (as_create.is_materialized_view && as_create.to_table_id)
+            throw Exception(
+                ErrorCodes::INCORRECT_QUERY,
+                "Cannot CREATE a table AS {}, it is a Materialized View without storage. Use \"AS `{}`\" instead",
+                qualified_name,
+                as_create.to_table_id.getQualifiedName());
+
         if (as_create.is_live_view)
             throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot CREATE a table AS {}, it is a Live View", qualified_name);
 
diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.reference b/tests/queries/0_stateless/03161_create_table_as_mv.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03161_create_table_as_mv.sql b/tests/queries/0_stateless/03161_create_table_as_mv.sql
new file mode 100644
index 00000000000..e80659ac923
--- /dev/null
+++ b/tests/queries/0_stateless/03161_create_table_as_mv.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS base_table;
+DROP TABLE IF EXISTS target_table;
+DROP TABLE IF EXISTS mv_from_base_to_target;
+DROP TABLE IF EXISTS mv_with_storage;
+DROP TABLE IF EXISTS other_table_1;
+DROP TABLE IF EXISTS other_table_2;
+
+CREATE TABLE base_table (date DateTime, id String, cost Float64) ENGINE = MergeTree() ORDER BY date;
+CREATE TABLE target_table (id String, total AggregateFunction(sum, Float64)) ENGINE = MergeTree() ORDER BY id;
+CREATE MATERIALIZED VIEW mv_from_base_to_target TO target_table AS Select id, sumState(cost) FROM base_table GROUP BY id;
+CREATE MATERIALIZED VIEW mv_with_storage ENGINE=MergeTree() ORDER BY id AS Select id, sumState(cost) FROM base_table GROUP BY id;
+
+CREATE TABLE other_table_1 AS mv_with_storage;
+CREATE TABLE other_table_2 AS mv_from_base_to_target; -- { serverError INCORRECT_QUERY }

From d66f0d6420e2d7972ce7eeb95188d394ed5a575f Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 May 2024 10:36:13 +0200
Subject: [PATCH 196/392] Properly fallback when native copy fails

---
 src/Backups/BackupIO_S3.cpp                   |  9 +-
 src/Disks/ObjectStorages/IObjectStorage.h     | 12 ++-
 .../ObjectStorages/S3/S3ObjectStorage.cpp     |  5 +
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  1 +
 .../ObjectStorages/Web/WebObjectStorage.h     |  2 +
 src/IO/S3/copyS3File.cpp                      | 95 ++++++++++++-------
 src/IO/S3/copyS3File.h                        |  5 +-
 7 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 15860363615..eb6773b196e 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -195,7 +195,8 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 blob_storage_log,
                 object_attributes,
                 threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
-                /* for_disk_s3= */ true);
+                /* for_disk_s3= */ true,
+                destination_disk->getObjectStorage()->getS3StorageClient());
 
             return file_size;
         };
@@ -252,7 +253,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
             copyS3File(
-                client,
+                src_disk->getObjectStorage()->getS3StorageClient(),
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
                 start_pos,
@@ -263,7 +264,9 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 read_settings,
                 blob_storage_log,
                 {},
-                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"));
+                threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
+                /*for_disk_s3=*/false,
+                client);
             return; /// copied!
         }
     }
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index eae31af9d44..5ec318a1ca4 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <filesystem>
 #include <string>
 #include <map>
 #include <mutex>
@@ -31,6 +30,10 @@
 #include <azure/storage/blobs.hpp>
 #endif
 
+#if USE_AWS_S3
+#include <IO/S3/Client.h>
+#endif
+
 namespace DB
 {
 
@@ -244,6 +247,13 @@ public:
     }
 #endif
 
+#if USE_AWS_S3
+    virtual std::shared_ptr<const S3::Client> getS3StorageClient()
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This function is only implemented for S3ObjectStorage");
+    }
+#endif
+
 
 private:
     mutable std::mutex throttlers_mutex;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 043e5b8ef8c..223f9d34a44 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -573,6 +573,11 @@ ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & p
     return key_generator->generate(path, /* is_directory */ false);
 }
 
+std::shared_ptr<const S3::Client> S3ObjectStorage::getS3StorageClient()
+{
+    return client.get();
+}
+
 }
 
 #endif
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 5eaab4b585c..b9fd2cbf4b2 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -162,6 +162,7 @@ public:
 
     bool isReadOnly() const override { return s3_settings.get()->read_only; }
 
+    std::shared_ptr<const S3::Client> getS3StorageClient() override;
 private:
     void setNewSettings(std::unique_ptr<S3ObjectStorageSettings> && s3_settings_);
 
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index b8ab510a6fb..d57da588601 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -3,6 +3,8 @@
 #include "config.h"
 
 #include <Disks/ObjectStorages/IObjectStorage.h>
+
+#include <filesystem>
 #include <shared_mutex>
 
 namespace Poco
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 549d0a569c6..46cadcef68c 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -652,7 +652,8 @@ namespace
             const std::optional<std::map<String, String>> & object_metadata_,
             ThreadPoolCallbackRunnerUnsafe<void> schedule_,
             bool for_disk_s3_,
-            BlobStorageLogWriterPtr blob_storage_log_)
+            BlobStorageLogWriterPtr blob_storage_log_,
+            std::function<void()> fallback_method_)
             : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
@@ -660,6 +661,7 @@ namespace
             , size(src_size_)
             , supports_multipart_copy(client_ptr_->supportsMultiPartCopy())
             , read_settings(read_settings_)
+            , fallback_method(std::move(fallback_method_))
         {
         }
 
@@ -682,14 +684,7 @@ namespace
         size_t size;
         bool supports_multipart_copy;
         const ReadSettings read_settings;
-
-        CreateReadBuffer getSourceObjectReadBuffer()
-        {
-            return [&]
-            {
-                return std::make_unique<ReadBufferFromS3>(client_ptr, src_bucket, src_key, "", request_settings, read_settings);
-            };
-        }
+        std::function<void()> fallback_method;
 
         void performSingleOperationCopy()
         {
@@ -754,18 +749,7 @@ namespace
                                 dest_bucket,
                                 dest_key,
                                 size);
-                        copyDataToS3File(
-                            getSourceObjectReadBuffer(),
-                            offset,
-                            size,
-                            client_ptr,
-                            dest_bucket,
-                            dest_key,
-                            request_settings,
-                            blob_storage_log,
-                            object_metadata,
-                            schedule,
-                            for_disk_s3);
+                        fallback_method();
                         break;
                     }
                     else
@@ -859,13 +843,24 @@ void copyDataToS3File(
     ThreadPoolCallbackRunnerUnsafe<void> schedule,
     bool for_disk_s3)
 {
-    CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3, blob_storage_log};
+    CopyDataToFileHelper helper{
+        create_read_buffer,
+        offset,
+        size,
+        dest_s3_client,
+        dest_bucket,
+        dest_key,
+        settings,
+        object_metadata,
+        schedule,
+        for_disk_s3,
+        blob_storage_log};
     helper.performCopy();
 }
 
 
 void copyS3File(
-    const std::shared_ptr<const S3::Client> & s3_client,
+    const std::shared_ptr<const S3::Client> & src_s3_client,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -877,21 +872,53 @@ void copyS3File(
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunnerUnsafe<void> schedule,
-    bool for_disk_s3)
+    bool for_disk_s3,
+    std::shared_ptr<const S3::Client> dest_s3_client)
 {
-    if (settings.allow_native_copy)
+    if (!dest_s3_client)
+        dest_s3_client = src_s3_client;
+
+    std::function<void()> fallback_method = [&]
     {
-        CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, read_settings, object_metadata, schedule, for_disk_s3, blob_storage_log};
-        helper.performCopy();
-    }
-    else
+        auto create_read_buffer
+            = [&] { return std::make_unique<ReadBufferFromS3>(src_s3_client, src_bucket, src_key, "", settings, read_settings); };
+
+        copyDataToS3File(
+            create_read_buffer,
+            src_offset,
+            src_size,
+            dest_s3_client,
+            dest_bucket,
+            dest_key,
+            settings,
+            blob_storage_log,
+            object_metadata,
+            schedule,
+            for_disk_s3);
+    };
+
+    if (!settings.allow_native_copy)
     {
-        auto create_read_buffer = [&]
-        {
-            return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, read_settings);
-        };
-        copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, blob_storage_log, object_metadata, schedule, for_disk_s3);
+        fallback_method();
+        return;
     }
+
+    CopyFileHelper helper{
+        src_s3_client,
+        src_bucket,
+        src_key,
+        src_offset,
+        src_size,
+        dest_bucket,
+        dest_key,
+        settings,
+        read_settings,
+        object_metadata,
+        schedule,
+        for_disk_s3,
+        blob_storage_log,
+        std::move(fallback_method)};
+    helper.performCopy();
 }
 
 }
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index d5da4d260b1..cb1960cc368 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -31,7 +31,7 @@ using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
 ///
 /// read_settings - is used for throttling in case of native copy is not possible
 void copyS3File(
-    const std::shared_ptr<const S3::Client> & s3_client,
+    const std::shared_ptr<const S3::Client> & src_s3_client,
     const String & src_bucket,
     const String & src_key,
     size_t src_offset,
@@ -43,7 +43,8 @@ void copyS3File(
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
-    bool for_disk_s3 = false);
+    bool for_disk_s3 = false,
+    std::shared_ptr<const S3::Client> dest_s3_client = nullptr);
 
 /// Copies data from any seekable source to S3.
 /// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3

From b253ca36084ec50e8d06dfe50cb3561cd915a602 Mon Sep 17 00:00:00 2001
From: copperybean <copperybean.zhang@gmail.com>
Date: Mon, 20 May 2024 23:12:07 +0800
Subject: [PATCH 197/392] fix clang-tidy warnings

Change-Id: Iff9f5f894e815b184ac35f61b4cac87908c612b5
---
 contrib/arrow                                               | 2 +-
 src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/arrow b/contrib/arrow
index 8f36d71d185..5cfccd8ea65 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb
+Subproject commit 5cfccd8ea65f33d4517e7409815d761c7650b45d
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
index 69da40b47e6..a7e51f88b3c 100644
--- a/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
+++ b/src/Processors/Formats/Impl/Parquet/ParquetRecordReader.cpp
@@ -314,10 +314,10 @@ ParquetRecordReader::ParquetRecordReader(
     log = &Poco::Logger::get("ParquetRecordReader");
 
     std::unordered_map<String, parquet::schema::NodePtr> parquet_columns;
-    auto root = file_reader->metadata()->schema()->group_node();
+    const auto * root = file_reader->metadata()->schema()->group_node();
     for (int i = 0; i < root->field_count(); ++i)
     {
-        auto & node = root->field(i);
+        const auto & node = root->field(i);
         parquet_columns.emplace(node->name(), node);
     }
 
@@ -329,7 +329,7 @@ ParquetRecordReader::ParquetRecordReader(
         if (it == parquet_columns.end())
             throw Exception(ErrorCodes::PARQUET_EXCEPTION, "no column with '{}' in parquet file", col_with_name.name);
 
-        auto node = it->second;
+        const auto & node = it->second;
         if (!node->is_primitive())
             throw Exception(ErrorCodes::NOT_IMPLEMENTED, "arrays and maps are not implemented in native parquet reader");
 

From e1caea6ab51d032fcba5e4356d7a4b5869e2eb9c Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 11:29:07 +0200
Subject: [PATCH 198/392] Split attached table count into attached tables,
 views and dictionaries

---
 programs/server/Server.cpp                |  2 ++
 src/Common/CurrentMetrics.cpp             |  2 ++
 src/Core/ServerSettings.h                 |  2 ++
 src/Databases/DatabaseLazy.cpp            | 24 +++++++++++++++++++++--
 src/Databases/DatabasesCommon.cpp         | 22 +++++++++++++++++++--
 src/Interpreters/Context.cpp              | 20 +++++++++++++++++++
 src/Interpreters/Context.h                |  2 ++
 tests/config/config.d/max_num_to_warn.xml |  2 ++
 8 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 9c9476d1aa7..223bc1f77e7 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1476,6 +1476,8 @@ try
             global_context->setMaxTableSizeToDrop(new_server_settings.max_table_size_to_drop);
             global_context->setMaxPartitionSizeToDrop(new_server_settings.max_partition_size_to_drop);
             global_context->setMaxTableNumToWarn(new_server_settings.max_table_num_to_warn);
+            global_context->setMaxViewNumToWarn(new_server_settings.max_view_num_to_warn);
+            global_context->setMaxDictionaryNumToWarn(new_server_settings.max_dictionary_num_to_warn);
             global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn);
             global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn);
 
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 21b4d114d79..b557edc3e12 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -224,6 +224,8 @@
     M(PartsActive, "Active data part, used by current and upcoming SELECTs.") \
     M(AttachedDatabase, "Active database, used by current and upcoming SELECTs.") \
     M(AttachedTable, "Active table, used by current and upcoming SELECTs.") \
+    M(AttachedView, "Active view, used by current and upcoming SELECTs.") \
+    M(AttachedDictionary, "Active dictionary, used by current and upcoming SELECTs.") \
     M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
     M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
     M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 524d6ec07c2..af96ca3a557 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -97,6 +97,8 @@ namespace DB
     M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
     M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
     M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_view_num_to_warn, 5000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_dictionary_num_to_warn, 5000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index fb1b3ee626b..ca985b5a7c8 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -10,6 +10,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/IStorage.h>
+#include "Common/CurrentMetrics.h"
 #include <Common/escapeForFileName.h>
 
 #include <Common/logger_useful.h>
@@ -24,6 +25,8 @@ namespace fs = std::filesystem;
 namespace CurrentMetrics
 {
     extern const Metric AttachedTable;
+    extern const Metric AttachedView;
+    extern const Metric AttachedDictionary;
 }
 
 
@@ -184,7 +187,16 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
         throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
-    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
+    CurrentMetrics::Metric metric;
+    if (table->isView()) {
+        metric = CurrentMetrics::AttachedView;
+    } else if (table->isDictionary()) {
+        metric = CurrentMetrics::AttachedDictionary;
+    } else {
+        metric = CurrentMetrics::AttachedTable;
+    }
+    CurrentMetrics::add(metric, 1);
+    
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@@ -200,7 +212,15 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
-        CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
+        CurrentMetrics::Metric metric;
+        if (res->isView()) {
+            metric = CurrentMetrics::AttachedView;
+        } else if (res->isDictionary()) {
+            metric = CurrentMetrics::AttachedDictionary;
+        } else {
+            metric = CurrentMetrics::AttachedTable;
+        }
+        CurrentMetrics::sub(metric, 1);
     }
     return res;
 }
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index fc75f8e44b9..ab7f2fff5aa 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -18,6 +18,8 @@
 namespace CurrentMetrics
 {
     extern const Metric AttachedTable;
+    extern const Metric AttachedView;
+    extern const Metric AttachedDictionary;
 }
 
 
@@ -263,7 +265,15 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
     res = it->second;
     tables.erase(it);
     res->is_detached = true;
-    CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
+    CurrentMetrics::Metric metric;
+    if (res->isView()) {
+        metric = CurrentMetrics::AttachedView;
+    } else if (res->isDictionary()) {
+        metric = CurrentMetrics::AttachedDictionary;
+    } else {
+        metric = CurrentMetrics::AttachedTable;
+    }
+    CurrentMetrics::sub(metric, 1);
 
     auto table_id = res->getStorageID();
     if (table_id.hasUUID())
@@ -304,7 +314,15 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
     /// It is important to reset is_detached here since in case of RENAME in
     /// non-Atomic database the is_detached is set to true before RENAME.
     table->is_detached = false;
-    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
+    CurrentMetrics::Metric metric;
+    if (table->isView()) {
+        metric = CurrentMetrics::AttachedView;
+    } else if (table->isDictionary()) {
+        metric = CurrentMetrics::AttachedDictionary;
+    } else {
+        metric = CurrentMetrics::AttachedTable;
+    }
+    CurrentMetrics::add(metric, 1);
 }
 
 void DatabaseWithOwnTablesBase::shutdown()
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1bd9601dd7e..4c5df8ef4ea 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -160,6 +160,8 @@ namespace CurrentMetrics
     extern const Metric TablesLoaderForegroundThreadsScheduled;
     extern const Metric IOWriterThreadsScheduled;
     extern const Metric AttachedTable;
+    extern const Metric AttachedView;
+    extern const Metric AttachedDictionary;
     extern const Metric AttachedDatabase;
     extern const Metric PartsActive;
 }
@@ -359,6 +361,8 @@ struct ContextSharedPart : boost::noncopyable
     /// No lock required for format_schema_path modified only during initialization
     std::atomic_size_t max_database_num_to_warn = 1000lu;
     std::atomic_size_t max_table_num_to_warn = 5000lu;
+    std::atomic_size_t max_view_num_to_warn = 5000lu;
+    std::atomic_size_t max_dictionary_num_to_warn = 5000lu;
     std::atomic_size_t max_part_num_to_warn = 100000lu;
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
     String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.
@@ -935,6 +939,10 @@ Strings Context::getWarnings() const
         common_warnings = shared->warnings;
         if (CurrentMetrics::get(CurrentMetrics::AttachedTable) > static_cast<Int64>(shared->max_table_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of attached tables is more than {}", shared->max_table_num_to_warn));
+        if (CurrentMetrics::get(CurrentMetrics::AttachedView) > static_cast<Int64>(shared->max_view_num_to_warn))
+            common_warnings.emplace_back(fmt::format("The number of attached views is more than {}", shared->max_view_num_to_warn));
+        if (CurrentMetrics::get(CurrentMetrics::AttachedDictionary) > static_cast<Int64>(shared->max_dictionary_num_to_warn))
+            common_warnings.emplace_back(fmt::format("The number of attached dictionaries is more than {}", shared->max_dictionary_num_to_warn));
         if (CurrentMetrics::get(CurrentMetrics::AttachedDatabase) > static_cast<Int64>(shared->max_database_num_to_warn))
             common_warnings.emplace_back(fmt::format("The number of attached databases is more than {}", shared->max_database_num_to_warn));
         if (CurrentMetrics::get(CurrentMetrics::PartsActive) > static_cast<Int64>(shared->max_part_num_to_warn))
@@ -3711,6 +3719,18 @@ void Context::setMaxTableNumToWarn(size_t max_table_to_warn)
     shared->max_table_num_to_warn= max_table_to_warn;
 }
 
+void Context::setMaxViewNumToWarn(size_t max_view_to_warn)
+{
+    SharedLockGuard lock(shared->mutex);
+    shared->max_view_num_to_warn= max_view_to_warn;
+}
+
+void Context::setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn)
+{
+    SharedLockGuard lock(shared->mutex);
+    shared->max_dictionary_num_to_warn= max_dictionary_to_warn;
+}
+
 void Context::setMaxDatabaseNumToWarn(size_t max_database_to_warn)
 {
     SharedLockGuard lock(shared->mutex);
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 7f663773e52..814534f7035 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -861,6 +861,8 @@ public:
     const HTTPHeaderFilter & getHTTPHeaderFilter() const;
 
     void setMaxTableNumToWarn(size_t max_table_to_warn);
+    void setMaxViewNumToWarn(size_t max_view_to_warn);
+    void setMaxDictionaryNumToWarn(size_t max_dictionary_to_warn);
     void setMaxDatabaseNumToWarn(size_t max_database_to_warn);
     void setMaxPartNumToWarn(size_t max_part_to_warn);
     /// The port that the server listens for executing SQL queries.
diff --git a/tests/config/config.d/max_num_to_warn.xml b/tests/config/config.d/max_num_to_warn.xml
index 776c270823d..1f55e6fd674 100644
--- a/tests/config/config.d/max_num_to_warn.xml
+++ b/tests/config/config.d/max_num_to_warn.xml
@@ -1,5 +1,7 @@
 <clickhouse>
     <max_table_num_to_warn>5</max_table_num_to_warn>
+    <max_view_num_to_warn>5</max_view_num_to_warn>
+    <max_dictionary_num_to_warn>5</max_dictionary_num_to_warn>
     <max_database_num_to_warn>2</max_database_num_to_warn>
     <max_part_num_to_warn>10</max_part_num_to_warn>
 </clickhouse>

From 311d6d6baa32ad0bdee1c58813c6d551aaeb53e0 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 21 May 2024 09:38:36 +0000
Subject: [PATCH 199/392] Fix:
 02124_insert_deduplication_token_multiple_blocks_replica

---
 .../02124_insert_deduplication_token_multiple_blocks_replica.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh
index 1c776263f78..0c95abb9867 100755
--- a/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh
+++ b/tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks_replica.sh
@@ -9,6 +9,8 @@ INSERT_BLOCK_SETTINGS="max_insert_block_size=1&min_insert_block_size_rows=0&min_
 
 $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS block_dedup_token_replica SYNC"
 $CLICKHOUSE_CLIENT --query="CREATE TABLE block_dedup_token_replica (id Int32) ENGINE=ReplicatedMergeTree('/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/{table}', '{replica}') ORDER BY id"
+# Need to stop merges due to randomization of old_parts_lifetime setting, so all initial parts are guaranteed to exist when we check them
+$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES block_dedup_token_replica"
 
 $CLICKHOUSE_CLIENT --query="SELECT 'insert 2 blocks with dedup token, 1 row per block'"
 DEDUP_TOKEN='dedup1'

From e1fef7ecd77da0b1eaed4b0dbc7a73b36cd228ac Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 21 May 2024 12:54:46 +0200
Subject: [PATCH 200/392] Group const fields

---
 src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 4 ++--
 src/Storages/MergeTree/IMergeTreeDataPartWriter.h   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index e01572715d6..b3e33e94073 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -56,14 +56,14 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     const MergeTreeIndexGranularity & index_granularity_)
     : data_part_name(data_part_name_)
     , serializations(serializations_)
-    , data_part_storage(data_part_storage_)
     , index_granularity_info(index_granularity_info_)
     , storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
     , columns_list(columns_list_)
     , settings(settings_)
-    , index_granularity(index_granularity_)
     , with_final_mark(settings.can_use_adaptive_granularity)
+    , data_part_storage(data_part_storage_)
+    , index_granularity(index_granularity_)
 {
 }
 
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 3245a23339b..d2bf03483c9 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -50,19 +50,19 @@ protected:
 
     IDataPartStorage & getDataPartStorage() { return *data_part_storage; }
 
-    /// Serializations for every columns and subcolumns by their names.
     const String data_part_name;
+    /// Serializations for every columns and subcolumns by their names.
     const SerializationByName serializations;
-    MutableDataPartStoragePtr data_part_storage;
     const MergeTreeIndexGranularityInfo index_granularity_info;
     const MergeTreeSettingsPtr storage_settings;
     const StorageMetadataPtr metadata_snapshot;
     const NamesAndTypesList columns_list;
     const MergeTreeWriterSettings settings;
-    MergeTreeIndexGranularity index_granularity;
     const bool with_final_mark;
 
+    MutableDataPartStoragePtr data_part_storage;
     MutableColumns index_columns;
+    MergeTreeIndexGranularity index_granularity;
 };
 
 using MergeTreeDataPartWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;

From b80d878b4c7d20d6ba7ec0e820e01ae68f498c58 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 May 2024 13:21:53 +0200
Subject: [PATCH 201/392] Add test

---
 .../Cached/CachedObjectStorage.h              |   7 +
 src/IO/S3/copyS3File.cpp                      |  14 +-
 tests/integration/helpers/cluster.py          |   1 +
 .../configs/disk_s3_restricted_user.xml       |  22 +++
 .../test_backup_restore_s3/test.py            | 132 ++++++++++++++++++
 5 files changed, 171 insertions(+), 5 deletions(-)
 create mode 100644 tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml

diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index 961c2709efc..fbb9a7e731e 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -126,6 +126,13 @@ public:
     }
 #endif
 
+#if USE_AWS_S3
+    std::shared_ptr<const S3::Client> getS3StorageClient() override
+    {
+        return object_storage->getS3StorageClient();
+    }
+#endif
+
 private:
     FileCacheKey getCacheKey(const std::string & path) const;
 
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 46cadcef68c..218bdf78907 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -739,16 +739,20 @@ namespace
                 if (outcome.GetError().GetExceptionName() == "EntityTooLarge" ||
                     outcome.GetError().GetExceptionName() == "InvalidRequest" ||
                     outcome.GetError().GetExceptionName() == "InvalidArgument" ||
+                    outcome.GetError().GetExceptionName() == "AccessDenied" ||
                     (outcome.GetError().GetExceptionName() == "InternalError" &&
                         outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT &&
                         outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API")))
                 {
-                    if (!supports_multipart_copy)
+                    if (!supports_multipart_copy || outcome.GetError().GetExceptionName() == "AccessDenied")
                     {
-                        LOG_INFO(log, "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: {}",
-                                dest_bucket,
-                                dest_key,
-                                size);
+                        LOG_INFO(
+                            log,
+                            "Multipart upload using copy is not supported, will try regular upload for Bucket: {}, Key: {}, Object size: "
+                            "{}",
+                            dest_bucket,
+                            dest_key,
+                            size);
                         fallback_method();
                         break;
                     }
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index c2bea3060aa..41c162217d2 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -513,6 +513,7 @@ class ClickHouseCluster:
         self.minio_redirect_host = "proxy1"
         self.minio_redirect_ip = None
         self.minio_redirect_port = 8080
+        self.minio_docker_id = self.get_instance_docker_id(self.minio_host)
 
         self.spark_session = None
 
diff --git a/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml
new file mode 100644
index 00000000000..323e986f966
--- /dev/null
+++ b/tests/integration/test_backup_restore_s3/configs/disk_s3_restricted_user.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0"?>
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <disk_s3_restricted_user>
+                <type>s3</type>
+                <endpoint>http://minio1:9001/root/data/disks/disk_s3_restricted_user/</endpoint>
+                <access_key_id>miniorestricted1</access_key_id>
+                <secret_access_key>minio123</secret_access_key>
+            </disk_s3_restricted_user>
+        </disks>
+        <policies>
+            <policy_s3_restricted>
+                <volumes>
+                    <main>
+                        <disk>disk_s3_restricted_user</disk>
+                    </main>
+                </volumes>
+            </policy_s3_restricted>
+        </policies>
+    </storage_configuration>
+</clickhouse>
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index 05424887736..4ad2c133694 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -3,8 +3,11 @@ import pytest
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
 import uuid
+import os
 
 
+CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs")
+
 cluster = ClickHouseCluster(__file__)
 node = cluster.add_instance(
     "node",
@@ -20,13 +23,122 @@ node = cluster.add_instance(
     ],
     with_minio=True,
     with_zookeeper=True,
+    stay_alive=True,
 )
 
 
+def setup_minio_users():
+    for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]:
+        print(
+            cluster.exec_in_container(
+                cluster.minio_docker_id,
+                [
+                    "mc",
+                    "alias",
+                    "set",
+                    "root",
+                    "http://minio1:9001",
+                    "minio",
+                    "minio123",
+                ],
+            )
+        )
+        policy = f"""
+{{
+  "Version": "2012-10-17",
+  "Statement": [
+    {{
+      "Effect": "Allow",
+      "Principal": {{
+        "AWS": [
+          "*"
+        ]
+      }},
+      "Action": [
+        "s3:GetBucketLocation",
+        "s3:ListBucket",
+        "s3:ListBucketMultipartUploads"
+      ],
+      "Resource": [
+        "arn:aws:s3:::{bucket}"
+      ]
+    }},
+    {{
+      "Effect": "Allow",
+      "Principal": {{
+        "AWS": [
+          "*"
+        ]
+      }},
+      "Action": [
+        "s3:AbortMultipartUpload",
+        "s3:DeleteObject",
+        "s3:GetObject",
+        "s3:ListMultipartUploadParts",
+        "s3:PutObject"
+      ],
+      "Resource": [
+        "arn:aws:s3:::{bucket}/*"
+      ]
+    }}
+  ]
+}}"""
+
+        cluster.exec_in_container(
+            cluster.minio_docker_id,
+            ["bash", "-c", f"cat >/tmp/{bucket}_policy.json <<EOL{policy}"],
+        )
+        cluster.exec_in_container(
+            cluster.minio_docker_id, ["cat", f"/tmp/{bucket}_policy.json"]
+        )
+        print(
+            cluster.exec_in_container(
+                cluster.minio_docker_id,
+                ["mc", "admin", "user", "add", "root", user, "minio123"],
+            )
+        )
+        print(
+            cluster.exec_in_container(
+                cluster.minio_docker_id,
+                [
+                    "mc",
+                    "admin",
+                    "policy",
+                    "create",
+                    "root",
+                    f"{bucket}only",
+                    f"/tmp/{bucket}_policy.json",
+                ],
+            )
+        )
+        print(
+            cluster.exec_in_container(
+                cluster.minio_docker_id,
+                [
+                    "mc",
+                    "admin",
+                    "policy",
+                    "attach",
+                    "root",
+                    f"{bucket}only",
+                    "--user",
+                    user,
+                ],
+            )
+        )
+
+    node.stop_clickhouse()
+    node.copy_file_to_container(
+        os.path.join(CONFIG_DIR, "disk_s3_restricted_user.xml"),
+        "/etc/clickhouse-server/config.d/disk_s3_restricted_user.xml",
+    )
+    node.start_clickhouse()
+
 @pytest.fixture(scope="module", autouse=True)
 def start_cluster():
     try:
         cluster.start()
+        setup_minio_users()
         yield
     finally:
         cluster.shutdown()
@@ -137,6 +249,7 @@ def check_system_tables(backup_query_id=None):
         ("disk_s3_cache", "ObjectStorage", "S3", "Local"),
         ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"),
         ("disk_s3_plain", "ObjectStorage", "S3", "Plain"),
+        ("disk_s3_restricted_user", "ObjectStorage", "S3", "Local"),
     )
     assert len(expected_disks) == len(disks)
     for expected_disk in expected_disks:
@@ -588,3 +701,22 @@ def test_user_specific_auth(start_cluster):
     )
 
     node.query("DROP TABLE IF EXISTS test.specific_auth")
+
+
+def test_backup_to_s3_different_credentials():
+    storage_policy = "policy_s3_restricted"
+
+    def check_backup_restore(allow_s3_native_copy):
+        backup_name = new_backup_name()
+        backup_destination = f"S3('http://minio1:9001/root2/data/backups/{backup_name}', 'miniorestricted2', 'minio123')"
+        settings = {"allow_s3_native_copy": allow_s3_native_copy}
+        (backup_events, _) = check_backup_and_restore(
+            storage_policy,
+            backup_destination,
+            backup_settings=settings,
+            restore_settings=settings,
+        )
+        check_system_tables(backup_events["query_id"])
+
+    check_backup_restore(False)
+    check_backup_restore(True)

From ffa38ecd8bbabff099e1bfb916b4699c9fde1054 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 21 May 2024 13:28:20 +0200
Subject: [PATCH 202/392] Cleanups

---
 src/Storages/MergeTree/IMergeTreeDataPart.h         | 1 -
 src/Storages/MergeTree/MergeTreeDataPartCompact.cpp | 7 ++++---
 src/Storages/MergeTree/MergeTreeDataPartCompact.h   | 1 -
 src/Storages/MergeTree/MergeTreeDataPartWide.cpp    | 9 ++++++---
 src/Storages/MergeTree/MergeTreeDataPartWide.h      | 1 -
 src/Storages/MergeTree/MergedBlockOutputStream.cpp  | 2 +-
 6 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 091a7ceb5bd..f4889d64179 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -104,7 +104,6 @@ public:
         const ValueSizeMap & avg_value_size_hints_,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
 
-// TODO: remove?
     virtual bool isStoredOnDisk() const = 0;
 
     virtual bool isStoredOnRemoteDisk() const = 0;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index 373ad6c23ea..fb1c2fe35ed 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -74,9 +74,10 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
 ////        { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
 ////
     return std::make_unique<MergeTreeDataPartWriterCompact>(
-            data_part_name_, logger_name_, serializations_, data_part_storage_,
-            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
-            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
+        data_part_name_, logger_name_, serializations_, data_part_storage_,
+        index_granularity_info_, storage_settings_, columns_list, metadata_snapshot,
+        indices_to_recalc, stats_to_recalc_, marks_file_extension_,
+        default_codec_, writer_settings, computed_index_granularity);
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index ca88edba7b3..1fb84424774 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -40,7 +40,6 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-// TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
     bool isStoredOnRemoteDisk() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index 34a3f30c4ba..74cab30064a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -69,9 +69,12 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
 {
-     return std::make_unique<MergeTreeDataPartWriterWide>(data_part_name_, logger_name_, serializations_, data_part_storage_,
-            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
-            marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
+    return std::make_unique<MergeTreeDataPartWriterWide>(
+        data_part_name_, logger_name_, serializations_, data_part_storage_,
+        index_granularity_info_, storage_settings_, columns_list,
+        metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+        marks_file_extension_,
+        default_codec_, writer_settings, computed_index_granularity);
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index e3cb3f04335..7465e08b7c4 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -35,7 +35,6 @@ public:
         const ValueSizeMap & avg_value_size_hints,
         const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
 
-// TODO: remove?
     bool isStoredOnDisk() const override { return true; }
 
     bool isStoredOnRemoteDisk() const override;
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index e0fb4f703a0..0fe3ee30a0d 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -40,7 +40,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
         /* rewrite_primary_key = */ true,
         blocks_are_granules_size);
 
-// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart
+    /// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart
     if (data_part->isStoredOnDisk())
         data_part_storage->createDirectories();
 

From 1e273f10e2056f25be2a616e8fa911a00dbb948e Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 21 May 2024 11:36:57 +0000
Subject: [PATCH 203/392] Automatic style fix

---
 tests/integration/test_backup_restore_s3/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index 4ad2c133694..a76b32bce39 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -134,6 +134,7 @@ def setup_minio_users():
     )
     node.start_clickhouse()
 
+
 @pytest.fixture(scope="module", autouse=True)
 def start_cluster():
     try:

From 8fc1abf2ab06485d0c4c63d6a0a2484189f71f84 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 15:51:24 +0200
Subject: [PATCH 204/392] Add documentation of new settings

---
 .../settings.md                               | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 28831404a1f..4d239309886 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -561,6 +561,25 @@ Default value: 5000
 <max_table_num_to_warn>400</max_table_num_to_warn>
 ```
 
+## max\_view\_num\_to\_warn {#max-view-num-to-warn}
+If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
+Default value: 5000
+
+**Example**
+
+``` xml
+<max_view_num_to_warn>400</max_view_num_to_warn>
+```
+
+## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn}
+If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
+Default value: 5000
+
+**Example**
+
+``` xml
+<max_dictionary_num_to_warn>400</max_dictionary_num_to_warn>
+```
 
 ## max\_part\_num\_to\_warn {#max-part-num-to-warn}
 If the number of active parts exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.

From 681de0145888b4dd30d75fd9b1fabe5e2e084b10 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 16:00:51 +0200
Subject: [PATCH 205/392] Extract common counter logic to method

---
 src/Databases/DatabaseLazy.cpp    | 31 ++++++++++++-------------------
 src/Databases/DatabasesCommon.cpp | 31 +++++++++++++------------------
 2 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index ca985b5a7c8..a27e69c7e63 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -174,6 +174,16 @@ bool DatabaseLazy::empty() const
     return tables_cache.empty();
 }
 
+static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) {
+    if (storage->isView()) {
+        return CurrentMetrics::AttachedView;
+    } else if (storage->isDictionary()) {
+        return CurrentMetrics::AttachedDictionary;
+    } else {
+        return CurrentMetrics::AttachedTable;
+    }
+}
+
 void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
 {
     LOG_DEBUG(log, "Attach table {}.", backQuote(table_name));
@@ -187,16 +197,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
         throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
-    CurrentMetrics::Metric metric;
-    if (table->isView()) {
-        metric = CurrentMetrics::AttachedView;
-    } else if (table->isDictionary()) {
-        metric = CurrentMetrics::AttachedDictionary;
-    } else {
-        metric = CurrentMetrics::AttachedTable;
-    }
-    CurrentMetrics::add(metric, 1);
-    
+    CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1);
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@@ -212,15 +213,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
-        CurrentMetrics::Metric metric;
-        if (res->isView()) {
-            metric = CurrentMetrics::AttachedView;
-        } else if (res->isDictionary()) {
-            metric = CurrentMetrics::AttachedDictionary;
-        } else {
-            metric = CurrentMetrics::AttachedTable;
-        }
-        CurrentMetrics::sub(metric, 1);
+        CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1);
     }
     return res;
 }
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index ab7f2fff5aa..03a8feb845f 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -254,6 +254,17 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con
     return detachTableUnlocked(table_name);
 }
 
+
+static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) {
+    if (storage->isView()) {
+        return CurrentMetrics::AttachedView;
+    } else if (storage->isDictionary()) {
+        return CurrentMetrics::AttachedDictionary;
+    } else {
+        return CurrentMetrics::AttachedTable;
+    }
+}
+
 StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name)
 {
     StoragePtr res;
@@ -265,15 +276,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
     res = it->second;
     tables.erase(it);
     res->is_detached = true;
-    CurrentMetrics::Metric metric;
-    if (res->isView()) {
-        metric = CurrentMetrics::AttachedView;
-    } else if (res->isDictionary()) {
-        metric = CurrentMetrics::AttachedDictionary;
-    } else {
-        metric = CurrentMetrics::AttachedTable;
-    }
-    CurrentMetrics::sub(metric, 1);
+    CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1);
 
     auto table_id = res->getStorageID();
     if (table_id.hasUUID())
@@ -314,15 +317,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
     /// It is important to reset is_detached here since in case of RENAME in
     /// non-Atomic database the is_detached is set to true before RENAME.
     table->is_detached = false;
-    CurrentMetrics::Metric metric;
-    if (table->isView()) {
-        metric = CurrentMetrics::AttachedView;
-    } else if (table->isDictionary()) {
-        metric = CurrentMetrics::AttachedDictionary;
-    } else {
-        metric = CurrentMetrics::AttachedTable;
-    }
-    CurrentMetrics::add(metric, 1);
+    CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1);
 }
 
 void DatabaseWithOwnTablesBase::shutdown()

From 98b89323c8239ce71153f88f6232806993b1a411 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 21 May 2024 16:14:48 +0200
Subject: [PATCH 206/392] Pass virtual columns descriptions to writer

---
 .../MergeTree/IMergeTreeDataPartWriter.cpp       | 16 ++++++++++------
 .../MergeTree/IMergeTreeDataPartWriter.h         |  4 ++++
 .../MergeTree/MergeTreeDataPartCompact.cpp       |  3 ++-
 src/Storages/MergeTree/MergeTreeDataPartWide.cpp |  3 ++-
 .../MergeTree/MergeTreeDataPartWriterCompact.cpp |  3 ++-
 .../MergeTree/MergeTreeDataPartWriterCompact.h   |  1 +
 .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp  |  3 ++-
 .../MergeTree/MergeTreeDataPartWriterOnDisk.h    |  1 +
 .../MergeTree/MergeTreeDataPartWriterWide.cpp    |  3 ++-
 .../MergeTree/MergeTreeDataPartWriterWide.h      |  1 +
 .../MergeTree/MergedBlockOutputStream.cpp        |  3 ++-
 .../MergeTree/MergedColumnOnlyOutputStream.cpp   |  1 +
 12 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index b3e33e94073..27da53de9b0 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -52,6 +52,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
+    const VirtualsDescriptionPtr virtual_columns_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
     : data_part_name(data_part_name_)
@@ -59,6 +60,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     , index_granularity_info(index_granularity_info_)
     , storage_settings(storage_settings_)
     , metadata_snapshot(metadata_snapshot_)
+    , virtual_columns(virtual_columns_)
     , columns_list(columns_list_)
     , settings(settings_)
     , with_final_mark(settings.can_use_adaptive_granularity)
@@ -95,10 +97,9 @@ ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_nam
     if (const auto * column_desc = columns.tryGet(column_name))
         return get_codec_or_default(*column_desc);
 
-///// TODO: is this needed?
-//    if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name))
-//        return get_codec_or_default(*virtual_desc);
-//
+    if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name))
+        return get_codec_or_default(*virtual_desc);
+
     return default_codec->getFullCodecDesc();
 }
 
@@ -115,6 +116,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
+        const VirtualsDescriptionPtr virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
@@ -131,6 +133,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
+        const VirtualsDescriptionPtr virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
@@ -149,6 +152,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
+        const VirtualsDescriptionPtr virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
@@ -158,11 +162,11 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
 {
     if (part_type == MergeTreeDataPartType::Compact)
         return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
-            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
             marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
     else if (part_type == MergeTreeDataPartType::Wide)
         return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
-            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
             marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
     else
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString());
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index d2bf03483c9..5dcc7ddc599 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -7,6 +7,7 @@
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/IDataPartStorage.h>
 #include <Storages/Statistics/Statistics.h>
+#include <Storages/VirtualColumnsDescription.h>
 
 
 namespace DB
@@ -29,6 +30,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list_,
         const StorageMetadataPtr & metadata_snapshot_,
+        const VirtualsDescriptionPtr virtual_columns_,
         const MergeTreeWriterSettings & settings_,
         const MergeTreeIndexGranularity & index_granularity_ = {});
 
@@ -56,6 +58,7 @@ protected:
     const MergeTreeIndexGranularityInfo index_granularity_info;
     const MergeTreeSettingsPtr storage_settings;
     const StorageMetadataPtr metadata_snapshot;
+    const VirtualsDescriptionPtr virtual_columns;
     const NamesAndTypesList columns_list;
     const MergeTreeWriterSettings settings;
     const bool with_final_mark;
@@ -77,6 +80,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
+        const VirtualsDescriptionPtr virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index fb1c2fe35ed..332b7d04f7f 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -56,6 +56,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
+    const VirtualsDescriptionPtr virtual_columns,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
@@ -75,7 +76,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
 ////
     return std::make_unique<MergeTreeDataPartWriterCompact>(
         data_part_name_, logger_name_, serializations_, data_part_storage_,
-        index_granularity_info_, storage_settings_, columns_list, metadata_snapshot,
+        index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns,
         indices_to_recalc, stats_to_recalc_, marks_file_extension_,
         default_codec_, writer_settings, computed_index_granularity);
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index 74cab30064a..d4630d3dd3f 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -62,6 +62,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
+    const VirtualsDescriptionPtr virtual_columns,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
@@ -72,7 +73,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
     return std::make_unique<MergeTreeDataPartWriterWide>(
         data_part_name_, logger_name_, serializations_, data_part_storage_,
         index_granularity_info_, storage_settings_, columns_list,
-        metadata_snapshot, indices_to_recalc, stats_to_recalc_,
+        metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
         marks_file_extension_,
         default_codec_, writer_settings, computed_index_granularity);
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 3f08d8eea21..328e3118ba9 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -18,6 +18,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
+    const VirtualsDescriptionPtr virtual_columns_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
     const Statistics & stats_to_recalc,
     const String & marks_file_extension_,
@@ -27,7 +28,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     : MergeTreeDataPartWriterOnDisk(
         data_part_name_, logger_name_, serializations_,
         data_part_storage_, index_granularity_info_, storage_settings_,
-        columns_list_, metadata_snapshot_,
+        columns_list_, metadata_snapshot_, virtual_columns_,
         indices_to_recalc_, stats_to_recalc, marks_file_extension_,
         default_codec_, settings_, index_granularity_)
     , plain_file(getDataPartStorage().writeFile(
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index 03804ff4966..f62f060fde2 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -19,6 +19,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
+        const VirtualsDescriptionPtr virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 25eb83a82c0..30f01c1acd6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -148,6 +148,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
+    const VirtualsDescriptionPtr virtual_columns_,
     const MergeTreeIndices & indices_to_recalc_,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
@@ -156,7 +157,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     const MergeTreeIndexGranularity & index_granularity_)
     : IMergeTreeDataPartWriter(
         data_part_name_, serializations_, data_part_storage_, index_granularity_info_,
-        storage_settings_, columns_list_, metadata_snapshot_, settings_, index_granularity_)
+        storage_settings_, columns_list_, metadata_snapshot_, virtual_columns_, settings_, index_granularity_)
     , skip_indices(indices_to_recalc_)
     , stats(stats_to_recalc_)
     , marks_file_extension(marks_file_extension_)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index e17724fa1d0..a60fcd43a58 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -109,6 +109,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
+        const VirtualsDescriptionPtr virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index a57bf7d2037..001f09b81b3 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -84,6 +84,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
+    const VirtualsDescriptionPtr virtual_columns_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
@@ -93,7 +94,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
     : MergeTreeDataPartWriterOnDisk(
             data_part_name_, logger_name_, serializations_,
             data_part_storage_, index_granularity_info_, storage_settings_,
-            columns_list_, metadata_snapshot_,
+            columns_list_, metadata_snapshot_, virtual_columns_,
             indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
             default_codec_, settings_, index_granularity_)
 {
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index 5789213c910..8dc488788c6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -29,6 +29,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
+        const VirtualsDescriptionPtr virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 0fe3ee30a0d..5ef967d930a 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -55,7 +55,8 @@ MergedBlockOutputStream::MergedBlockOutputStream(
             data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
             data_part_storage, data_part->index_granularity_info,
             storage_settings,
-            columns_list, metadata_snapshot, skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity);
+            columns_list, metadata_snapshot, data_part->storage.getVirtualsPtr(),
+            skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity);
 }
 
 /// If data is pre-sorted.
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 1c75d81eca5..1d1783b1b43 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -39,6 +39,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
         storage_settings,
         header.getNamesAndTypesList(),
         metadata_snapshot_,
+        data_part->storage.getVirtualsPtr(),
         indices_to_recalc,
         stats_to_recalc_,
         data_part->getMarksFileExtension(),

From 372acbd3fcbb06d9cd650b785b99da346d6ce5c9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 May 2024 14:15:14 +0000
Subject: [PATCH 207/392] Refactor aliases a bit.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 314 +++++++++++-------
 .../02341_analyzer_aliases_basics.reference   |   1 +
 .../02341_analyzer_aliases_basics.sql         |   2 +
 .../0_stateless/02343_analyzer_lambdas.sql    |   8 +
 4 files changed, 204 insertions(+), 121 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 10f2290b34f..e50ad7911a0 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -471,7 +471,6 @@ struct TableExpressionData
         return buffer.str();
     }
 };
-
 class ExpressionsStack
 {
 public:
@@ -586,6 +585,82 @@ private:
     std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
 };
 
+struct ScopeAliases
+{
+    /// Alias name to query expression node
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_before_group_by;
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_after_group_by;
+
+    std::unordered_map<std::string, QueryTreeNodePtr> * alias_name_to_expression_node = nullptr;
+
+    /// Alias name to lambda node
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_lambda_node;
+
+    /// Alias name to table expression node
+    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_table_expression_node;
+
+    /// Expressions like `x as y` where we can't say whether it's a function, expression or table.
+    std::unordered_map<std::string, Identifier> transitive_aliases;
+
+    /// Nodes with duplicated aliases
+    std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
+    std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
+
+    std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
+    {
+        switch (lookup_context)
+        {
+            case IdentifierLookupContext::EXPRESSION: return *alias_name_to_expression_node;
+            case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node;
+            case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node;
+        }
+
+        __builtin_unreachable();
+    }
+
+    enum class FindOption
+    {
+        FIRST_NAME,
+        FULL_NAME,
+    };
+
+    const std::string & getKey(const Identifier & identifier, FindOption find_option)
+    {
+        switch (find_option)
+        {
+            case FindOption::FIRST_NAME: return identifier.front();
+            case FindOption::FULL_NAME: return identifier.getFullName();
+        }
+
+        __builtin_unreachable();
+    }
+
+    QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option)
+    {
+        auto & alias_map = getAliasMap(lookup.lookup_context);
+        const std::string * key = &getKey(lookup.identifier, find_option);
+
+        auto it = alias_map.find(*key);
+        while (it == alias_map.end())
+        {
+            auto jt = transitive_aliases.find(*key);
+            if (jt == transitive_aliases.end())
+                return {};
+
+            key = &(getKey(jt->second, find_option));
+            it = alias_map.find(*key);
+        }
+
+        return &it->second;
+    }
+
+    const QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option) const
+    {
+        return const_cast<ScopeAliases *>(this)->find(lookup, find_option);
+    }
+};
+
+
 /** Projection names is name of query tree node that is used in projection part of query node.
   * Example: SELECT id FROM test_table;
   * `id` is projection name of column node
@@ -731,7 +806,7 @@ struct IdentifierResolveScope
         else if (parent_scope)
             join_use_nulls = parent_scope->join_use_nulls;
 
-        alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by;
+        aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by;
     }
 
     QueryTreeNodePtr scope_node;
@@ -746,17 +821,7 @@ struct IdentifierResolveScope
     /// Argument can be expression like constant, column, function or table expression
     std::unordered_map<std::string, QueryTreeNodePtr> expression_argument_name_to_node;
 
-    /// Alias name to query expression node
-    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_before_group_by;
-    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_expression_node_after_group_by;
-
-    std::unordered_map<std::string, QueryTreeNodePtr> * alias_name_to_expression_node = nullptr;
-
-    /// Alias name to lambda node
-    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_lambda_node;
-
-    /// Alias name to table expression node
-    std::unordered_map<std::string, QueryTreeNodePtr> alias_name_to_table_expression_node;
+    ScopeAliases aliases;
 
     /// Table column name to column node. Valid only during table ALIAS columns resolve.
     ColumnNameToColumnNodeMap column_name_to_column_node;
@@ -767,10 +832,6 @@ struct IdentifierResolveScope
     /// Window name to window node
     std::unordered_map<std::string, QueryTreeNodePtr> window_name_to_window_node;
 
-    /// Nodes with duplicated aliases
-    std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
-    std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
-
     /// Current scope expression in resolve process stack
     ExpressionsStack expressions_in_resolve_process_stack;
 
@@ -889,7 +950,7 @@ struct IdentifierResolveScope
         bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
         expressions_in_resolve_process_stack.push(node);
         if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
-            alias_name_to_expression_node = &alias_name_to_expression_node_before_group_by;
+            aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_before_group_by;
     }
 
     void popExpressionNode()
@@ -897,7 +958,7 @@ struct IdentifierResolveScope
         bool had_aggregate_function = expressions_in_resolve_process_stack.hasAggregateFunction();
         expressions_in_resolve_process_stack.pop();
         if (group_by_use_nulls && had_aggregate_function != expressions_in_resolve_process_stack.hasAggregateFunction())
-            alias_name_to_expression_node = &alias_name_to_expression_node_after_group_by;
+            aliases.alias_name_to_expression_node = &aliases.alias_name_to_expression_node_after_group_by;
     }
 
     /// Dump identifier resolve scope
@@ -916,16 +977,16 @@ struct IdentifierResolveScope
         for (const auto & [alias_name, node] : expression_argument_name_to_node)
             buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n';
 
-        buffer << "Alias name to expression node table size " << alias_name_to_expression_node->size() << '\n';
-        for (const auto & [alias_name, node] : *alias_name_to_expression_node)
+        buffer << "Alias name to expression node table size " << aliases.alias_name_to_expression_node->size() << '\n';
+        for (const auto & [alias_name, node] : *aliases.alias_name_to_expression_node)
             buffer << "Alias name " << alias_name << " expression node " << node->dumpTree() << '\n';
 
-        buffer << "Alias name to function node table size " << alias_name_to_lambda_node.size() << '\n';
-        for (const auto & [alias_name, node] : alias_name_to_lambda_node)
+        buffer << "Alias name to function node table size " << aliases.alias_name_to_lambda_node.size() << '\n';
+        for (const auto & [alias_name, node] : aliases.alias_name_to_lambda_node)
             buffer << "Alias name " << alias_name << " lambda node " << node->formatASTForErrorMessage() << '\n';
 
-        buffer << "Alias name to table expression node table size " << alias_name_to_table_expression_node.size() << '\n';
-        for (const auto & [alias_name, node] : alias_name_to_table_expression_node)
+        buffer << "Alias name to table expression node table size " << aliases.alias_name_to_table_expression_node.size() << '\n';
+        for (const auto & [alias_name, node] : aliases.alias_name_to_table_expression_node)
             buffer << "Alias name " << alias_name << " node " << node->formatASTForErrorMessage() << '\n';
 
         buffer << "CTE name to query node table size " << cte_name_to_query_node.size() << '\n';
@@ -936,8 +997,8 @@ struct IdentifierResolveScope
         for (const auto & [window_name, node] : window_name_to_window_node)
             buffer << "CTE name " << window_name << " node " << node->formatASTForErrorMessage() << '\n';
 
-        buffer << "Nodes with duplicated aliases size " << nodes_with_duplicated_aliases.size() << '\n';
-        for (const auto & node : nodes_with_duplicated_aliases)
+        buffer << "Nodes with duplicated aliases size " << aliases.nodes_with_duplicated_aliases.size() << '\n';
+        for (const auto & node : aliases.nodes_with_duplicated_aliases)
             buffer << "Alias name " << node->getAlias() << " node " << node->formatASTForErrorMessage() << '\n';
 
         buffer << "Expression resolve process stack " << '\n';
@@ -996,8 +1057,8 @@ struct IdentifierResolveScope
 class QueryExpressionsAliasVisitor : public InDepthQueryTreeVisitor<QueryExpressionsAliasVisitor>
 {
 public:
-    explicit QueryExpressionsAliasVisitor(IdentifierResolveScope & scope_)
-        : scope(scope_)
+    explicit QueryExpressionsAliasVisitor(ScopeAliases & aliases_)
+        : aliases(aliases_)
     {}
 
     void visitImpl(QueryTreeNodePtr & node)
@@ -1034,10 +1095,10 @@ public:
 private:
     void addDuplicatingAlias(const QueryTreeNodePtr & node)
     {
-        scope.nodes_with_duplicated_aliases.emplace(node);
+        aliases.nodes_with_duplicated_aliases.emplace(node);
         auto cloned_node = node->clone();
-        scope.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node);
-        scope.nodes_with_duplicated_aliases.emplace(cloned_node);
+        aliases.cloned_nodes_with_duplicated_aliases.emplace_back(cloned_node);
+        aliases.nodes_with_duplicated_aliases.emplace(cloned_node);
     }
 
     void updateAliasesIfNeeded(const QueryTreeNodePtr & node, bool is_lambda_node)
@@ -1053,25 +1114,29 @@ private:
 
         if (is_lambda_node)
         {
-            if (scope.alias_name_to_expression_node->contains(alias))
+            if (aliases.alias_name_to_expression_node->contains(alias))
                 addDuplicatingAlias(node);
 
-            auto [_, inserted] = scope.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
+            auto [_, inserted] = aliases.alias_name_to_lambda_node.insert(std::make_pair(alias, node));
             if (!inserted)
              addDuplicatingAlias(node);
 
             return;
         }
 
-        if (scope.alias_name_to_lambda_node.contains(alias))
-         addDuplicatingAlias(node);
+        if (aliases.alias_name_to_lambda_node.contains(alias))
+            addDuplicatingAlias(node);
 
-        auto [_, inserted] = scope.alias_name_to_expression_node->insert(std::make_pair(alias, node));
+        auto [_, inserted] = aliases.alias_name_to_expression_node->insert(std::make_pair(alias, node));
         if (!inserted)
-         addDuplicatingAlias(node);
+            addDuplicatingAlias(node);
+
+        /// If node is identifier put it into transitive aliases map.
+        if (const auto * identifier = typeid_cast<const IdentifierNode *>(node.get()))
+            aliases.transitive_aliases.insert(std::make_pair(alias, identifier->getIdentifier()));
     }
 
-    IdentifierResolveScope & scope;
+    ScopeAliases & aliases;
 };
 
 class TableExpressionsAliasVisitor : public InDepthQueryTreeVisitor<TableExpressionsAliasVisitor>
@@ -1118,7 +1183,7 @@ private:
             return;
 
         const auto & node_alias = node->getAlias();
-        auto [_, inserted] = scope.alias_name_to_table_expression_node.emplace(node_alias, node);
+        auto [_, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(node_alias, node);
         if (!inserted)
             throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
                 "Multiple table expressions with same alias {}. In scope {}",
@@ -1189,7 +1254,7 @@ public:
             }
             case QueryTreeNodeType::TABLE_FUNCTION:
             {
-                QueryExpressionsAliasVisitor expressions_alias_visitor(scope);
+                QueryExpressionsAliasVisitor expressions_alias_visitor(scope.aliases);
                 resolveTableFunction(node, scope, expressions_alias_visitor, false /*nested_table_function*/);
                 break;
             }
@@ -1864,7 +1929,7 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection(
 
     if (allow_expression_identifiers)
     {
-        for (const auto & [name, expression] : *scope.alias_name_to_expression_node)
+        for (const auto & [name, expression] : *scope.aliases.alias_name_to_expression_node)
         {
             assert(expression);
             auto expression_identifier = Identifier(name);
@@ -1894,13 +1959,13 @@ void QueryAnalyzer::collectScopeValidIdentifiersForTypoCorrection(
     {
         if (allow_function_identifiers)
         {
-            for (const auto & [name, _] : *scope.alias_name_to_expression_node)
+            for (const auto & [name, _] : *scope.aliases.alias_name_to_expression_node)
                 valid_identifiers_result.insert(Identifier(name));
         }
 
         if (allow_table_expression_identifiers)
         {
-            for (const auto & [name, _] : scope.alias_name_to_table_expression_node)
+            for (const auto & [name, _] : scope.aliases.alias_name_to_table_expression_node)
                 valid_identifiers_result.insert(Identifier(name));
         }
     }
@@ -2789,21 +2854,22 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons
 
 bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
 {
-    const auto & identifier_bind_part = identifier_lookup.identifier.front();
+    //const auto & identifier_bind_part = identifier_lookup.identifier.front();
+    return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
 
-    auto get_alias_name_to_node_map = [&]() -> const std::unordered_map<std::string, QueryTreeNodePtr> &
-    {
-        if (identifier_lookup.isExpressionLookup())
-            return *scope.alias_name_to_expression_node;
-        else if (identifier_lookup.isFunctionLookup())
-            return scope.alias_name_to_lambda_node;
+    // auto get_alias_name_to_node_map = [&]() -> const std::unordered_map<std::string, QueryTreeNodePtr> &
+    // {
+    //     if (identifier_lookup.isExpressionLookup())
+    //         return *scope.alias_name_to_expression_node;
+    //     else if (identifier_lookup.isFunctionLookup())
+    //         return scope.alias_name_to_lambda_node;
 
-        return scope.alias_name_to_table_expression_node;
-    };
+    //     return scope.alias_name_to_table_expression_node;
+    // };
 
-    const auto & alias_name_to_node_map = get_alias_name_to_node_map();
+    // const auto & alias_name_to_node_map = get_alias_name_to_node_map();
 
-    return alias_name_to_node_map.contains(identifier_bind_part);
+    // return alias_name_to_node_map.contains(identifier_bind_part);
 }
 
 /** Resolve identifier from scope aliases.
@@ -2853,23 +2919,29 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
 {
     const auto & identifier_bind_part = identifier_lookup.identifier.front();
 
-    auto get_alias_name_to_node_map = [&]() -> std::unordered_map<std::string, QueryTreeNodePtr> &
-    {
-        if (identifier_lookup.isExpressionLookup())
-            return *scope.alias_name_to_expression_node;
-        else if (identifier_lookup.isFunctionLookup())
-            return scope.alias_name_to_lambda_node;
+    // auto get_alias_name_to_node_map = [&]() -> std::unordered_map<std::string, QueryTreeNodePtr> &
+    // {
+    //     if (identifier_lookup.isExpressionLookup())
+    //         return *scope.alias_name_to_expression_node;
+    //     else if (identifier_lookup.isFunctionLookup())
+    //         return scope.alias_name_to_lambda_node;
 
-        return scope.alias_name_to_table_expression_node;
-    };
+    //     return scope.alias_name_to_table_expression_node;
+    // };
 
-    auto & alias_name_to_node_map = get_alias_name_to_node_map();
-    auto it = alias_name_to_node_map.find(identifier_bind_part);
+    // auto & alias_name_to_node_map = get_alias_name_to_node_map();
+    // auto it = alias_name_to_node_map.find(identifier_bind_part);
 
-    if (it == alias_name_to_node_map.end())
+    // if (it == alias_name_to_node_map.end())
+    //     return {};
+
+    auto it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME);
+    if (it == nullptr)
         return {};
 
-    if (!it->second)
+    QueryTreeNodePtr & alias_node = *it;
+
+    if (!alias_node)
         throw Exception(ErrorCodes::LOGICAL_ERROR,
             "Node with alias {} is not valid. In scope {}",
             identifier_bind_part,
@@ -2889,14 +2961,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
         return {};
     }
 
-    auto node_type = it->second->getNodeType();
+    auto node_type = alias_node->getNodeType();
 
     /// Resolve expression if necessary
     if (node_type == QueryTreeNodeType::IDENTIFIER)
     {
-        scope.pushExpressionNode(it->second);
+        scope.pushExpressionNode(alias_node);
 
-        auto & alias_identifier_node = it->second->as<IdentifierNode &>();
+        auto & alias_identifier_node = alias_node->as<IdentifierNode &>();
         auto identifier = alias_identifier_node.getIdentifier();
         auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
         if (!lookup_result.resolved_identifier)
@@ -2912,7 +2984,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
                 getHintsErrorMessageSuffix(hints));
         }
 
-        it->second = lookup_result.resolved_identifier;
+        alias_node = lookup_result.resolved_identifier;
 
         /** During collection of aliases if node is identifier and has alias, we cannot say if it is
           * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification.
@@ -2922,33 +2994,31 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
           * If we resolved identifier node as function, we must remove identifier node alias from
           * expression alias map.
           */
-        if (identifier_lookup.isExpressionLookup())
-            scope.alias_name_to_lambda_node.erase(identifier_bind_part);
-        else if (identifier_lookup.isFunctionLookup())
-            scope.alias_name_to_expression_node->erase(identifier_bind_part);
+        // if (identifier_lookup.isExpressionLookup())
+        //     scope.alises.alias_name_to_lambda_node.erase(identifier_bind_part);
+        // else if (identifier_lookup.isFunctionLookup())
+        //     scope.aliases.alias_name_to_expression_node->erase(identifier_bind_part);
 
         scope.popExpressionNode();
     }
     else if (node_type == QueryTreeNodeType::FUNCTION)
     {
-        resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+        resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
     }
     else if (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)
     {
         if (identifier_resolve_settings.allow_to_resolve_subquery_during_identifier_resolution)
-            resolveExpressionNode(it->second, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/);
+            resolveExpressionNode(alias_node, scope, false /*allow_lambda_expression*/, identifier_lookup.isTableExpressionLookup() /*allow_table_expression*/);
     }
 
-    QueryTreeNodePtr result = it->second;
-
-    if (identifier_lookup.identifier.isCompound() && result)
+    if (identifier_lookup.identifier.isCompound() && alias_node)
     {
         if (identifier_lookup.isExpressionLookup())
         {
             return tryResolveIdentifierFromCompoundExpression(
                 identifier_lookup.identifier,
                 1 /*identifier_bind_size*/,
-                it->second,
+                alias_node,
                 {} /* compound_expression_source */,
                 scope,
                 identifier_resolve_settings.allow_to_check_join_tree /* can_be_not_found */);
@@ -2963,7 +3033,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
         }
     }
 
-    return result;
+    return alias_node;
 }
 
 /** Resolve identifier from table columns.
@@ -4124,10 +4194,12 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
              * SELECT id FROM ( SELECT ... ) AS subquery ARRAY JOIN [0] AS id INNER JOIN second_table USING (id)
              * In the example, identifier `id` should be resolved into one from USING (id) column.
              */
-            auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName());
-            if (alias_it != scope.alias_name_to_expression_node->end() && alias_it->second->getNodeType() == QueryTreeNodeType::COLUMN)
+
+            auto alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME);
+            //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName());
+            if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN)
             {
-                const auto & column_node = alias_it->second->as<ColumnNode &>();
+                const auto & column_node = (*alias_it)->as<ColumnNode &>();
                 if (column_node.getColumnSource()->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
                     prefer_column_name_to_alias = true;
             }
@@ -5232,7 +5304,7 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
             scope.scope_node->formatASTForErrorMessage());
 
     /// Initialize aliases in lambda scope
-    QueryExpressionsAliasVisitor visitor(scope);
+    QueryExpressionsAliasVisitor visitor(scope.aliases);
     visitor.visit(lambda_to_resolve.getExpression());
 
     /** Replace lambda arguments with new arguments.
@@ -5252,8 +5324,8 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
         const auto & lambda_argument_name = lambda_argument_identifier ? lambda_argument_identifier->getIdentifier().getFullName()
                                                                        : lambda_argument_column->getColumnName();
 
-        bool has_expression_node = scope.alias_name_to_expression_node->contains(lambda_argument_name);
-        bool has_alias_node = scope.alias_name_to_lambda_node.contains(lambda_argument_name);
+        bool has_expression_node = scope.aliases.alias_name_to_expression_node->contains(lambda_argument_name);
+        bool has_alias_node = scope.aliases.alias_name_to_lambda_node.contains(lambda_argument_name);
 
         if (has_expression_node || has_alias_node)
         {
@@ -5929,7 +6001,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
             function_names = AggregateFunctionFactory::instance().getAllRegisteredNames();
             possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end());
 
-            for (auto & [name, lambda_node] : scope.alias_name_to_lambda_node)
+            for (auto & [name, lambda_node] : scope.aliases.alias_name_to_lambda_node)
             {
                 if (lambda_node->getNodeType() == QueryTreeNodeType::LAMBDA)
                     possible_function_names.push_back(name);
@@ -6263,7 +6335,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
         result_projection_names.push_back(node_alias);
     }
 
-    bool is_duplicated_alias = scope.nodes_with_duplicated_aliases.contains(node);
+    bool is_duplicated_alias = scope.aliases.nodes_with_duplicated_aliases.contains(node);
     if (is_duplicated_alias)
         scope.non_cached_identifier_lookups_during_expression_resolve.insert({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION});
 
@@ -6287,14 +6359,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
           *
           * To resolve b we need to resolve a.
           */
-        auto it = scope.alias_name_to_expression_node->find(node_alias);
-        if (it != scope.alias_name_to_expression_node->end())
+        auto it = scope.aliases.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.aliases.alias_name_to_expression_node->end())
             node = it->second;
 
         if (allow_lambda_expression)
         {
-            it = scope.alias_name_to_lambda_node.find(node_alias);
-            if (it != scope.alias_name_to_lambda_node.end())
+            it = scope.aliases.alias_name_to_lambda_node.find(node_alias);
+            if (it != scope.aliases.alias_name_to_lambda_node.end())
                 node = it->second;
         }
     }
@@ -6320,15 +6392,15 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                     result_projection_names.push_back(projection_name_it->second);
             }
 
-            if (resolved_identifier_node && !node_alias.empty())
-                scope.alias_name_to_lambda_node.erase(node_alias);
+            // if (resolved_identifier_node && !node_alias.empty())
+            //     scope.alias_name_to_lambda_node.erase(node_alias);
 
             if (!resolved_identifier_node && allow_lambda_expression)
             {
                 resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier;
 
-                if (resolved_identifier_node && !node_alias.empty())
-                    scope.alias_name_to_expression_node->erase(node_alias);
+                // if (resolved_identifier_node && !node_alias.empty())
+                //     scope.alias_name_to_expression_node->erase(node_alias);
             }
 
             if (!resolved_identifier_node && allow_table_expression)
@@ -6569,14 +6641,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
       */
     if (!node_alias.empty() && use_alias_table && !scope.group_by_use_nulls)
     {
-        auto it = scope.alias_name_to_expression_node->find(node_alias);
-        if (it != scope.alias_name_to_expression_node->end())
+        auto it = scope.aliases.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.aliases.alias_name_to_expression_node->end())
             it->second = node;
 
         if (allow_lambda_expression)
         {
-            it = scope.alias_name_to_lambda_node.find(node_alias);
-            if (it != scope.alias_name_to_lambda_node.end())
+            it = scope.aliases.alias_name_to_lambda_node.find(node_alias);
+            if (it != scope.aliases.alias_name_to_lambda_node.end())
                 it->second = node;
         }
     }
@@ -6949,8 +7021,8 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
                 resolved_identifier = resolved_identifier->clone();
 
                 /// Update alias name to table expression map
-                auto table_expression_it = scope.alias_name_to_table_expression_node.find(from_table_identifier_alias);
-                if (table_expression_it != scope.alias_name_to_table_expression_node.end())
+                auto table_expression_it = scope.aliases.alias_name_to_table_expression_node.find(from_table_identifier_alias);
+                if (table_expression_it != scope.aliases.alias_name_to_table_expression_node.end())
                     table_expression_it->second = resolved_identifier;
 
                 auto table_expression_modifiers = from_table_identifier.getTableExpressionModifiers();
@@ -7149,7 +7221,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
             alias_column_resolve_scope.context = scope.context;
 
             /// Initialize aliases in alias column scope
-            QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope);
+            QueryExpressionsAliasVisitor visitor(alias_column_resolve_scope.aliases);
             visitor.visit(alias_column_to_resolve->getExpression());
 
             resolveExpressionNode(alias_column_resolve_scope.scope_node,
@@ -7519,7 +7591,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     for (auto & array_join_expression : array_join_nodes)
     {
         auto array_join_expression_alias = array_join_expression->getAlias();
-        if (!array_join_expression_alias.empty() && scope.alias_name_to_expression_node->contains(array_join_expression_alias))
+        if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias))
             throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
                 "ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
                 array_join_expression->formatASTForErrorMessage(),
@@ -7613,8 +7685,8 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     array_join_nodes = std::move(array_join_column_expressions);
     for (auto & array_join_column_expression : array_join_nodes)
     {
-        auto it = scope.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
-        if (it != scope.alias_name_to_expression_node->end())
+        auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
+        if (it != scope.aliases.alias_name_to_expression_node->end())
         {
             auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
             auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
@@ -7911,7 +7983,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
         if (alias_name.empty())
             return;
 
-        auto [it, inserted] = scope.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node);
+        auto [it, inserted] = scope.aliases.alias_name_to_table_expression_node.emplace(alias_name, table_expression_node);
         if (!inserted)
             throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
                 "Duplicate aliases {} for table expressions in FROM section are not allowed. Try to register {}. Already registered {}.",
@@ -7980,7 +8052,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of QUALIFY");
 
     /// Initialize aliases in query node scope
-    QueryExpressionsAliasVisitor visitor(scope);
+    QueryExpressionsAliasVisitor visitor(scope.aliases);
 
     if (query_node_typed.hasWith())
         visitor.visit(query_node_typed.getWithNode());
@@ -8098,7 +8170,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         table_expressions_visitor.visit(query_node_typed.getJoinTree());
 
         initializeQueryJoinTreeNode(query_node_typed.getJoinTree(), scope);
-        scope.alias_name_to_table_expression_node.clear();
+        scope.aliases.alias_name_to_table_expression_node.clear();
 
         resolveQueryJoinTreeNode(query_node_typed.getJoinTree(), scope, visitor);
     }
@@ -8148,10 +8220,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
         /// Clone is needed cause aliases share subtrees.
         /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type
         /// See 03023_group_by_use_nulls_analyzer_crashes
-        for (auto & [key, node] : scope.alias_name_to_expression_node_before_group_by)
-            scope.alias_name_to_expression_node_after_group_by[key] = node->clone();
+        for (auto & [key, node] : scope.aliases.alias_name_to_expression_node_before_group_by)
+            scope.aliases.alias_name_to_expression_node_after_group_by[key] = node->clone();
 
-        scope.alias_name_to_expression_node = &scope.alias_name_to_expression_node_after_group_by;
+        scope.aliases.alias_name_to_expression_node = &scope.aliases.alias_name_to_expression_node_after_group_by;
     }
 
     if (query_node_typed.hasHaving())
@@ -8223,7 +8295,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
       * After scope nodes are resolved, we can compare node with duplicate alias with
       * node from scope alias table.
       */
-    for (const auto & node_with_duplicated_alias : scope.cloned_nodes_with_duplicated_aliases)
+    for (const auto & node_with_duplicated_alias : scope.aliases.cloned_nodes_with_duplicated_aliases)
     {
         auto node = node_with_duplicated_alias;
         auto node_alias = node->getAlias();
@@ -8234,8 +8306,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
         bool has_node_in_alias_table = false;
 
-        auto it = scope.alias_name_to_expression_node->find(node_alias);
-        if (it != scope.alias_name_to_expression_node->end())
+        auto it = scope.aliases.alias_name_to_expression_node->find(node_alias);
+        if (it != scope.aliases.alias_name_to_expression_node->end())
         {
             has_node_in_alias_table = true;
 
@@ -8248,8 +8320,8 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                     scope.scope_node->formatASTForErrorMessage());
         }
 
-        it = scope.alias_name_to_lambda_node.find(node_alias);
-        if (it != scope.alias_name_to_lambda_node.end())
+        it = scope.aliases.alias_name_to_lambda_node.find(node_alias);
+        if (it != scope.aliases.alias_name_to_lambda_node.end())
         {
             has_node_in_alias_table = true;
 
@@ -8294,10 +8366,10 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
 
     /// Remove aliases from expression and lambda nodes
 
-    for (auto & [_, node] : *scope.alias_name_to_expression_node)
+    for (auto & [_, node] : *scope.aliases.alias_name_to_expression_node)
         node->removeAlias();
 
-    for (auto & [_, node] : scope.alias_name_to_lambda_node)
+    for (auto & [_, node] : scope.aliases.alias_name_to_lambda_node)
         node->removeAlias();
 
     query_node_typed.resolveProjectionColumns(std::move(projection_columns));
diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference
index 3733d6b6084..e39cdce92b0 100644
--- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference
+++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.reference
@@ -17,3 +17,4 @@ Alias conflict with identifier inside expression
 Alias setting prefer_column_name_to_alias
 0
 Value
+/a/b/c
diff --git a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql
index 52a1cd1dae8..467073fc4e8 100644
--- a/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql
+++ b/tests/queries/0_stateless/02341_analyzer_aliases_basics.sql
@@ -48,3 +48,5 @@ WITH id AS value SELECT value FROM test_table;
 SET prefer_column_name_to_alias = 0;
 
 DROP TABLE test_table;
+
+WITH path('clickhouse.com/a/b/c') AS x SELECT x AS path;
diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql
index 0c257cf6f18..25928acb2c3 100644
--- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql
+++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql
@@ -93,3 +93,11 @@ SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1
 
 DROP TABLE test_table_tuple;
 DROP TABLE test_table;
+
+WITH x -> (lambda(x) + 1) AS lambda
+SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD }
+
+WITH
+    x -> (lambda1(x) + 1) AS lambda,
+    lambda AS lambda1
+SELECT lambda(1); -- {serverError UNSUPPORTED_METHOD }

From d4430b583c4e4531ad1372fd3e40ff6bad5a414d Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 21 May 2024 16:19:14 +0200
Subject: [PATCH 208/392] Create snapshot

---
 utils/keeper-bench/Runner.cpp | 100 +++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 50 deletions(-)

diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index 0050230b6ec..a625a7f157d 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -628,7 +628,11 @@ struct ZooKeeperRequestFromLogReader
                 set_request->path = current_block->getPath(idx_in_block);
                 set_request->data = current_block->getData(idx_in_block);
                 if (auto version = current_block->getVersion(idx_in_block))
-                    set_request->version = *version;
+                {
+                    /// we just need to make sure that the request with version that need to fail, fail when replaying
+                    if (request_from_log.expected_result == Coordination::Error::ZBADVERSION)
+                        set_request->version = std::numeric_limits<int32_t>::max();
+                }
                 request_from_log.request = set_request;
                 break;
             }
@@ -637,7 +641,11 @@ struct ZooKeeperRequestFromLogReader
                 auto remove_request = std::make_shared<Coordination::ZooKeeperRemoveRequest>();
                 remove_request->path = current_block->getPath(idx_in_block);
                 if (auto version = current_block->getVersion(idx_in_block))
-                    remove_request->version = *version;
+                {
+                    /// we just need to make sure that the request with version that need to fail, fail when replaying
+                    if (request_from_log.expected_result == Coordination::Error::ZBADVERSION)
+                        remove_request->version = std::numeric_limits<int32_t>::max();
+                }
                 request_from_log.request = remove_request;
                 break;
             }
@@ -647,7 +655,11 @@ struct ZooKeeperRequestFromLogReader
                 auto check_request = std::make_shared<Coordination::ZooKeeperCheckRequest>();
                 check_request->path = current_block->getPath(idx_in_block);
                 if (auto version = current_block->getVersion(idx_in_block))
-                    check_request->version = *version;
+                {
+                    /// we just need to make sure that the request with version that need to fail, fail when replaying
+                    if (request_from_log.expected_result == Coordination::Error::ZBADVERSION)
+                        check_request->version = std::numeric_limits<int32_t>::max();
+                }
                 if (op_num == Coordination::OpNum::CheckNotExists)
                     check_request->not_exists = true;
                 request_from_log.request = check_request;
@@ -791,10 +803,12 @@ struct SetupNodeCollector
         if (!request_from_log.expected_result.has_value())
             return;
 
+
         auto process_request = [&](const Coordination::ZooKeeperRequest & request, const auto expected_result)
         {
             const auto & path = request.getPath();
-            if (processed_paths.contains(path))
+
+            if (nodes_created_during_replay.contains(path))
                 return;
 
             auto op_num = request.getOpNum();
@@ -804,64 +818,43 @@ struct SetupNodeCollector
                 if (expected_result == Coordination::Error::ZNODEEXISTS)
                 {
                     addExpectedNode(path);
-                    processed_paths.insert(path);
                 }
                 else if (expected_result == Coordination::Error::ZOK)
                 {
+                    nodes_created_during_replay.insert(path);
                     /// we need to make sure ancestors exist
                     auto position = path.find_last_of('/');
                     if (position != 0)
                     {
                         auto parent_path = path.substr(0, position);
-                        if (!processed_paths.contains(parent_path))
-                        {
-                            addExpectedNode(parent_path);
-                            processed_paths.insert(parent_path);
-                        }
+                        addExpectedNode(parent_path);
                     }
-
-                    processed_paths.insert(path);
                 }
             }
             else if (op_num == Coordination::OpNum::Remove)
             {
-                if (expected_result == Coordination::Error::ZOK)
-                {
+                if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION)
                     addExpectedNode(path);
-                    processed_paths.insert(path);
-                }
             }
             else if (op_num == Coordination::OpNum::Set)
             {
-                if (expected_result == Coordination::Error::ZOK)
-                {
+                if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION)
                     addExpectedNode(path);
-                    processed_paths.insert(path);
-                }
             }
             else if (op_num == Coordination::OpNum::Check)
             {
-                if (expected_result == Coordination::Error::ZOK)
-                {
+                if (expected_result == Coordination::Error::ZOK || expected_result == Coordination::Error::ZBADVERSION)
                     addExpectedNode(path);
-                    processed_paths.insert(path);
-                }
             }
             else if (op_num == Coordination::OpNum::CheckNotExists)
             {
-                if (expected_result == Coordination::Error::ZNODEEXISTS)
-                {
+                if (expected_result == Coordination::Error::ZNODEEXISTS || expected_result == Coordination::Error::ZBADVERSION)
                     addExpectedNode(path);
-                    processed_paths.insert(path);
-                }
             }
             else if (request.isReadRequest())
             {
                 if (expected_result == Coordination::Error::ZOK)
-                {
                     addExpectedNode(path);
-                    processed_paths.insert(path);
-                }
             }
         };
 
@@ -940,7 +933,7 @@ struct SetupNodeCollector
     std::mutex nodes_mutex;
     DB::KeeperContextPtr keeper_context;
     Coordination::KeeperStoragePtr initial_storage;
-    std::unordered_set<std::string> processed_paths;
+    std::unordered_set<std::string> nodes_created_during_replay;
     std::optional<Coordination::KeeperSnapshotManager> snapshot_manager;
 };
 
@@ -979,23 +972,23 @@ void requestFromLogExecutor(std::shared_ptr<ConcurrentBoundedQueue<RequestFromLo
                 if (*expected_result != response.error)
                     stats.unexpected_results.fetch_add(1, std::memory_order_relaxed);
 
-                if (*expected_result != response.error)
-                {
-                    std::cerr << fmt::format(
-                        "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result)
-                              << std::endl;
+                //if (*expected_result != response.error)
+                //{
+                //    std::cerr << fmt::format(
+                //        "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result)
+                //              << std::endl;
 
-                    if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
-                    {
-                        std::string subresponses;
-                        for (size_t i = 0; i < multi_response->responses.size(); ++i)
-                        {
-                            subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error);
-                        }
+                //    if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
+                //    {
+                //        std::string subresponses;
+                //        for (size_t i = 0; i < multi_response->responses.size(); ++i)
+                //        {
+                //            subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error);
+                //        }
 
-                        std::cerr << "Subresponses\n" << subresponses << std::endl;
-                    }
-                }
+                //        std::cerr << "Subresponses\n" << subresponses << std::endl;
+                //    }
+                //}
             }
 
             request_promise->set_value();
@@ -1049,7 +1042,7 @@ void Runner::runBenchmarkFromLog()
 
     std::unordered_map<uint64_t, std::shared_ptr<ConcurrentBoundedQueue<RequestFromLog>>> executor_id_to_queue;
 
-    SCOPE_EXIT({
+    SCOPE_EXIT_SAFE({
         for (const auto & [executor_id, executor_queue] : executor_id_to_queue)
             executor_queue->finish();
 
@@ -1262,8 +1255,15 @@ Runner::~Runner()
     if (pool)
         pool->wait();
 
-    auto connection = getConnection(connection_infos[0], 0);
-    benchmark_context.cleanup(*connection);
+    try
+    {
+        auto connection = getConnection(connection_infos[0], 0);
+        benchmark_context.cleanup(*connection);
+    }
+    catch (...)
+    {
+        DB::tryLogCurrentException("While trying to clean nodes");
+    }
 }
 
 namespace

From 23eaa0de40d92d61e453a86dfa7c1a38b5d67b75 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 May 2024 14:28:19 +0000
Subject: [PATCH 209/392] Fix style.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index e50ad7911a0..7ecb91e7972 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -615,7 +615,7 @@ struct ScopeAliases
             case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node;
         }
 
-        __builtin_unreachable();
+        UNREACHABLE();
     }
 
     enum class FindOption
@@ -632,7 +632,7 @@ struct ScopeAliases
             case FindOption::FULL_NAME: return identifier.getFullName();
         }
 
-        __builtin_unreachable();
+        UNREACHABLE();
     }
 
     QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option)

From dd9bb8fe9cc2d3187906cd65e0757ae29c67f032 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 16:54:28 +0200
Subject: [PATCH 210/392] Add tests

---
 .../02931_max_num_to_warn.reference           |  2 +
 .../0_stateless/02931_max_num_to_warn.sql     | 43 ++++++++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.reference b/tests/queries/0_stateless/02931_max_num_to_warn.reference
index 7de998eebfa..419149b0bd2 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.reference
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.reference
@@ -1,3 +1,5 @@
 The number of attached tables is more than 5
+The number of attached views is more than 5
+The number of attached dictionaries is more than 5
 The number of attached databases is more than 2
 The number of active parts is more than 10
diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index 23f04816d5a..4087a536cd0 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -13,6 +13,41 @@ CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_9 (id
 CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_10 (id Int32, str String) Engine=Memory;
 CREATE TABLE IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_11 (id Int32, str String) Engine=Memory;
 
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_1 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_1;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_2 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_2;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_3 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_3;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_4 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_4;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_5 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_5;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_6 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_6;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_7 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_7;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_8 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_8;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_9 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_9;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_10 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_10;
+CREATE VIEW IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_view_11 AS SELECT * FROM test_max_num_to_warn_02931.test_max_num_to_warn_11;
+
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_1 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_1'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_2 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_2'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_3 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_3'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_4 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_4'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_5 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_5'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_6 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_6'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_7 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_7'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_8 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_8'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_9 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_9'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_10 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_10'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_11 (id Int32, str String) PRIMARY KEY id
+SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_11'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
+
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_1;
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_2;
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_3;
@@ -37,7 +72,13 @@ INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_9 VALUES (1, 'Hello'
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_10 VALUES (1, 'Hello');
 INSERT INTO test_max_num_to_warn_02931.test_max_num_to_warn_11 VALUES (1, 'Hello');
 
-SELECT * FROM system.warnings where message in ('The number of attached tables is more than 5', 'The number of attached databases is more than 2', 'The number of active parts is more than 10');
+SELECT * FROM system.warnings where message in (
+    'The number of attached tables is more than 5',
+    'The number of attached views is more than 5',
+    'The number of attached dictionaries is more than 5',
+    'The number of attached databases is more than 2',
+    'The number of active parts is more than 10'
+);
 
 DROP DATABASE IF EXISTS test_max_num_to_warn_02931;
 DROP DATABASE IF EXISTS test_max_num_to_warn_1;

From f1f8a35bab0e9dc46aa46faa4c3be7609b77a509 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 May 2024 15:03:16 +0000
Subject: [PATCH 211/392] Fix #64136

---
 src/Interpreters/Cache/QueryCache.cpp         | 26 ++++++++++++----
 src/Interpreters/Cache/QueryCache.h           |  3 +-
 src/Interpreters/executeQuery.cpp             |  4 +--
 .../02494_query_cache_use_database.reference  |  2 ++
 .../02494_query_cache_use_database.sql        | 30 +++++++++++++++++++
 5 files changed, 56 insertions(+), 9 deletions(-)
 create mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.reference
 create mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.sql

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index fafe50c170f..2fddbc0b044 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -177,6 +177,22 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
     return transformed_ast;
 }
 
+IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
+{
+    ast = removeQueryCacheSettings(ast);
+
+    /// Hash the AST, it must consider aliases (issue #56258)
+    constexpr bool ignore_aliases = false;
+    IAST::Hash ast_hash = ast->getTreeHash(ignore_aliases);
+
+    /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different tables (issue #64136)
+    IAST::Hash cur_database_hash = CityHash_v1_0_2::CityHash128(current_database.data(), current_database.size());
+    UInt64 low_combined = ast_hash.low64 ^ cur_database_hash.low64;
+    UInt64 high_combined = ast_hash.high64 ^ cur_database_hash.high64;
+
+    return {low_combined, high_combined};
+}
+
 String queryStringFromAST(ASTPtr ast)
 {
     WriteBufferFromOwnString buf;
@@ -186,17 +202,15 @@ String queryStringFromAST(ASTPtr ast)
 
 }
 
-/// Hashing of ASTs must consider aliases (issue #56258)
-static constexpr bool ignore_aliases = false;
-
 QueryCache::Key::Key(
     ASTPtr ast_,
+    String current_database,
     Block header_,
     std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
     bool is_shared_,
     std::chrono::time_point<std::chrono::system_clock> expires_at_,
     bool is_compressed_)
-    : ast_hash(removeQueryCacheSettings(ast_)->getTreeHash(ignore_aliases))
+    : ast_hash(calculateAstHash(ast_, current_database))
     , header(header_)
     , user_id(user_id_)
     , current_user_roles(current_user_roles_)
@@ -207,8 +221,8 @@ QueryCache::Key::Key(
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
-    : QueryCache::Key(ast_, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST or user name
+QueryCache::Key::Key(ASTPtr ast_, String current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
+    : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
 {
 }
 
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index 814cad37f82..c234ea3d464 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -88,6 +88,7 @@ public:
 
         /// Ctor to construct a Key for writing into query cache.
         Key(ASTPtr ast_,
+            String current_database,
             Block header_,
             std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
             bool is_shared_,
@@ -95,7 +96,7 @@ public:
             bool is_compressed);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
-        Key(ASTPtr ast_, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
+        Key(ASTPtr ast_, String current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
 
         bool operator==(const Key & other) const;
     };
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index f1f72a4ea4a..90e6406c792 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1102,7 +1102,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getUserID(), context->getCurrentRoles());
+                    QueryCache::Key key(ast, context->getCurrentDatabase(), context->getUserID(), context->getCurrentRoles());
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1225,7 +1225,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                             && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
                         {
                             QueryCache::Key key(
-                                ast, res.pipeline.getHeader(),
+                                ast, context->getCurrentDatabase(), res.pipeline.getHeader(),
                                 context->getUserID(), context->getCurrentRoles(),
                                 settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.reference b/tests/queries/0_stateless/02494_query_cache_use_database.reference
new file mode 100644
index 00000000000..1191247b6d9
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_use_database.reference
@@ -0,0 +1,2 @@
+1
+2
diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.sql b/tests/queries/0_stateless/02494_query_cache_use_database.sql
new file mode 100644
index 00000000000..df560f82ebb
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_use_database.sql
@@ -0,0 +1,30 @@
+-- Tags: no-parallel, no-fasttest
+-- Tag no-fasttest: Depends on OpenSSL
+-- Tag no-parallel: Messes with internal cache
+
+-- Test for issue #64136
+
+SYSTEM DROP QUERY CACHE;
+
+DROP DATABASE IF EXISTS db1;
+DROP DATABASE IF EXISTS db2;
+
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+
+CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a);
+CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a);
+
+INSERT INTO db1.tab values(1);
+INSERT INTO db2.tab values(2);
+
+USE db1;
+SELECT * FROM tab SETTINGS use_query_cache=1;
+
+USE db2;
+SELECT * FROM tab SETTINGS use_query_cache=1;
+
+DROP DATABASE db1;
+DROP DATABASE db2;
+
+SYSTEM DROP QUERY CACHE;

From 3dbf32a558458b50bafb017d45b83446ef0ec2e8 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 17:03:43 +0200
Subject: [PATCH 212/392] Remove dict creation

---
 tests/queries/0_stateless/02931_max_num_to_warn.sql | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/queries/0_stateless/02931_max_num_to_warn.sql b/tests/queries/0_stateless/02931_max_num_to_warn.sql
index 4087a536cd0..1c96e017646 100644
--- a/tests/queries/0_stateless/02931_max_num_to_warn.sql
+++ b/tests/queries/0_stateless/02931_max_num_to_warn.sql
@@ -45,8 +45,6 @@ CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_
 SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_9'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
 CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_10 (id Int32, str String) PRIMARY KEY id
 SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_10'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
-CREATE DICTIONARY IF NOT EXISTS test_max_num_to_warn_02931.test_max_num_to_warn_dict_11 (id Int32, str String) PRIMARY KEY id
-SOURCE(CLICKHOUSE(DB 'test_max_num_to_warn_02931' TABLE 'test_max_num_to_warn_11'))LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000);
 
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_1;
 CREATE DATABASE IF NOT EXISTS test_max_num_to_warn_2;

From ac7da1cc388edf03fd189bd24376c4a571c7b12a Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Tue, 21 May 2024 12:57:05 +0200
Subject: [PATCH 213/392] CI: cancel running PR wf after adding to MQ

---
 .github/workflows/merge_queue.yml |   3 +
 tests/ci/ci.py                    |  45 ++++++++++--
 tests/ci/ci_metadata.py           | 112 ++++++++++++++++++++++++++++++
 tests/ci/github_helper.py         |  19 +++++
 4 files changed, 172 insertions(+), 7 deletions(-)
 create mode 100644 tests/ci/ci_metadata.py

diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml
index 1b6cc320ec4..97aa0db4cdb 100644
--- a/.github/workflows/merge_queue.yml
+++ b/.github/workflows/merge_queue.yml
@@ -22,6 +22,9 @@ jobs:
           clear-repository: true # to ensure correct digests
           fetch-depth: 0 # to get version
           filter: tree:0
+      - name: Cancel PR workflow
+        run: |
+          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
       - name: Python unit tests
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 3a616c8aad6..046550c62f8 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -44,6 +44,7 @@ from env_helper import (
     REPORT_PATH,
     S3_BUILDS_BUCKET,
     TEMP_PATH,
+    GITHUB_RUN_ID,
 )
 from get_robot_token import get_best_robot_token
 from git_helper import GIT_PREFIX, Git
@@ -52,6 +53,7 @@ from github_helper import GitHub
 from pr_info import PRInfo
 from report import ERROR, SUCCESS, BuildResult, JobReport
 from s3_helper import S3Helper
+from ci_metadata import CiMetadata
 from version_helper import get_version_from_repo
 
 # pylint: disable=too-many-lines
@@ -66,12 +68,12 @@ class PendingState:
 class CiCache:
     """
     CI cache is a bunch of records. Record is a file stored under special location on s3.
-    The file name has following format
+    The file name has a format:
 
         <RECORD_TYPE>_[<ATTRIBUTES>]--<JOB_NAME>_<JOB_DIGEST>_<BATCH>_<NUM_BATCHES>.ci
 
     RECORD_TYPE:
-        SUCCESSFUL - for successfuly finished jobs
+        SUCCESSFUL - for successful jobs
         PENDING - for pending jobs
 
     ATTRIBUTES:
@@ -991,7 +993,11 @@ def normalize_check_name(check_name: str) -> str:
 
 
 def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
-    # FIXME: consider switching to sub_parser for configure, pre, run, post actions
+    parser.add_argument(
+        "--cancel-previous-run",
+        action="store_true",
+        help="Action that cancels previous running PR workflow if PR added into the Merge Queue",
+    )
     parser.add_argument(
         "--configure",
         action="store_true",
@@ -1000,17 +1006,19 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
     parser.add_argument(
         "--update-gh-statuses",
         action="store_true",
-        help="Action that recreate success GH statuses for jobs that finished successfully in past and will be skipped this time",
+        help="Action that recreate success GH statuses for jobs that finished successfully in past and will be "
+        "skipped this time",
     )
     parser.add_argument(
         "--pre",
         action="store_true",
-        help="Action that executes prerequesetes for the job provided in --job-name",
+        help="Action that executes prerequisites for the job provided in --job-name",
     )
     parser.add_argument(
         "--run",
         action="store_true",
-        help="Action that executes run action for specified --job-name. run_command must be configured for a given job name.",
+        help="Action that executes run action for specified --job-name. run_command must be configured for a given "
+        "job name.",
     )
     parser.add_argument(
         "--post",
@@ -1088,7 +1096,8 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         "--rebuild-all-binaries",
         action="store_true",
         default=False,
-        help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in any case, used in --configure action (for release branches)",
+        help="[DEPRECATED. to be removed, once no wf use it] will create run config without skipping build jobs in "
+        "any case, used in --configure action (for release branches)",
     )
     parser.add_argument(
         "--commit-message",
@@ -1902,6 +1911,15 @@ def _get_ext_check_name(check_name: str) -> str:
     return check_name_with_group
 
 
+def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None:
+    run_id = CiMetadata(s3, pr_number).run_id
+    if not run_id:
+        print("ERROR: FIX IT: Run id has not been found!")
+    else:
+        print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]")
+        GitHub.cancel_wf(run_id)
+
+
 def main() -> int:
     logging.basicConfig(level=logging.INFO)
     exit_code = 0
@@ -1930,6 +1948,12 @@ def main() -> int:
 
     ### CONFIGURE action: start
     if args.configure:
+        if CI and pr_info.is_pr:
+            # store meta on s3 (now we need it only for PRs)
+            meta = CiMetadata(s3, pr_info.number)
+            meta.run_id = int(GITHUB_RUN_ID)
+            meta.push_meta()
+
         ci_options = CiOptions.create_from_pr_message(
             args.commit_message or None, update_from_api=True
         )
@@ -2222,6 +2246,13 @@ def main() -> int:
         assert indata, "Run config must be provided via --infile"
         _update_gh_statuses_action(indata=indata, s3=s3)
 
+    ### CANCEL PREVIOUS WORKFLOW RUN
+    elif args.cancel_previous_run:
+        assert (
+            pr_info.is_merge_queue
+        ), "Currently it's supposed to be used in MQ wf to cancel running PR wf if any"
+        _cancel_pr_wf(s3, pr_info.merged_pr)
+
     ### print results
     _print_results(result, args.outfile, args.pretty)
 
diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py
new file mode 100644
index 00000000000..5856e9a8501
--- /dev/null
+++ b/tests/ci/ci_metadata.py
@@ -0,0 +1,112 @@
+from pathlib import Path
+from typing import Optional
+
+from env_helper import (
+    S3_BUILDS_BUCKET,
+    TEMP_PATH,
+)
+from s3_helper import S3Helper
+
+
+# pylint: disable=too-many-lines
+
+
+class CiMetadata:
+    """
+    CI Metadata class owns data like workflow run_id for a given pr, etc.
+    Goal is to have everything we need to manage workflows on S3 and rely on GH api as little as possible
+    """
+
+    _S3_PREFIX = "CI_meta_v1"
+    _LOCAL_PATH = Path(TEMP_PATH) / "ci_meta"
+    _FILE_SUFFIX = ".cimd"
+    _FILENAME_RUN_ID = "run_id" + _FILE_SUFFIX
+
+    def __init__(
+        self,
+        s3: S3Helper,
+        pr_number: Optional[int] = None,
+        sha: Optional[str] = None,
+        git_ref: Optional[str] = None,
+    ):
+        assert pr_number or (sha and git_ref)
+
+        self.sha = sha
+        self.pr_number = pr_number
+        self.git_ref = git_ref
+        self.s3 = s3
+        self.run_id = 0
+
+        if self.pr_number:
+            self.s3_path = f"{self._S3_PREFIX}/PRs/{self.pr_number}/"
+        else:
+            self.s3_path = f"{self._S3_PREFIX}/{self.git_ref}/{self.sha}/"
+
+        self._updated = False
+
+        if not self._LOCAL_PATH.exists():
+            self._LOCAL_PATH.mkdir(parents=True, exist_ok=True)
+
+    def fetch_meta(self):
+        """
+        Fetches meta from s3
+        """
+
+        # clean up
+        for file in self._LOCAL_PATH.glob("*" + self._FILE_SUFFIX):
+            file.unlink()
+
+        _ = self.s3.download_files(
+            bucket=S3_BUILDS_BUCKET,
+            s3_path=self.s3_path,
+            file_suffix=self._FILE_SUFFIX,
+            local_directory=self._LOCAL_PATH,
+        )
+
+        meta_files = Path(self._LOCAL_PATH).rglob("*" + self._FILE_SUFFIX)
+        for file_name in meta_files:
+            path_in_str = str(file_name)
+            with open(path_in_str, "r", encoding="utf-8") as f:
+                # Read all lines in the file
+                lines = f.readlines()
+                assert len(lines) == 1
+            if file_name.name == self._FILENAME_RUN_ID:
+                self.run_id = int(lines[0])
+
+        self._updated = True
+        return self
+
+    def push_meta(
+        self,
+    ) -> None:
+        """
+        Uploads meta on s3
+        """
+        assert self.run_id
+        print("Storing workflow meta on s3")
+
+        local_file = self._LOCAL_PATH / self._FILENAME_RUN_ID
+        with open(local_file, "w", encoding="utf-8") as file:
+            file.write(f"{self.run_id}\n")
+
+        _ = self.s3.upload_file(
+            bucket=S3_BUILDS_BUCKET,
+            file_path=local_file,
+            s3_path=self.s3_path + local_file.name,
+        )
+
+
+if __name__ == "__main__":
+    # TEST:
+    s3 = S3Helper()
+    a = CiMetadata(s3, 12345, "deadbeaf", "test_branch")
+    a.run_id = 111
+    a.push_meta()
+    b = CiMetadata(s3, 12345, "deadbeaf", "test_branch")
+    assert b.fetch_meta().run_id == a.run_id
+
+    a = CiMetadata(s3, 0, "deadbeaf", "test_branch")
+    a.run_id = 112
+    a.push_meta()
+    b = CiMetadata(s3, 0, "deadbeaf", "test_branch")
+    assert b.fetch_meta().run_id == a.run_id
diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py
index ae1eaf4c06a..81603c66bae 100644
--- a/tests/ci/github_helper.py
+++ b/tests/ci/github_helper.py
@@ -9,6 +9,7 @@ from time import sleep
 from typing import List, Optional, Tuple, Union
 
 import github
+import requests
 
 # explicit reimport
 # pylint: disable=useless-import-alias
@@ -21,6 +22,9 @@ from github.NamedUser import NamedUser as NamedUser
 from github.PullRequest import PullRequest as PullRequest
 from github.Repository import Repository as Repository
 
+from env_helper import GITHUB_REPOSITORY
+from get_robot_token import get_best_robot_token
+
 # pylint: enable=useless-import-alias
 
 CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
@@ -260,3 +264,18 @@ class GitHub(github.Github):
     def retries(self, value: int) -> None:
         assert isinstance(value, int)
         self._retries = value
+
+    # minimalistic static methods not using pygithub
+    @staticmethod
+    def cancel_wf(run_id, strict=False):
+        token = get_best_robot_token()
+        headers = {"Authorization": f"token {token}"}
+        url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/actions/runs/{run_id}/cancel"
+        try:
+            response = requests.post(url, headers=headers, timeout=10)
+            response.raise_for_status()
+            print(f"NOTE: Workflow [{run_id}] has been cancelled")
+        except Exception as ex:
+            print("ERROR: Got exception executing wf cancel request", ex)
+            if strict:
+                raise ex

From f815b4e037bb1ecd938ad659660f4d05326d0b7d Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 17:15:55 +0200
Subject: [PATCH 214/392] Fix style

---
 src/Databases/DatabaseLazy.cpp    | 14 ++++++++++----
 src/Databases/DatabasesCommon.cpp | 14 ++++++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index a27e69c7e63..c95d690f331 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -174,12 +174,18 @@ bool DatabaseLazy::empty() const
     return tables_cache.empty();
 }
 
-static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) {
-    if (storage->isView()) {
+static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage)
+{
+    if (storage->isView())
+    {
         return CurrentMetrics::AttachedView;
-    } else if (storage->isDictionary()) {
+    }
+    else if (storage->isDictionary())
+    {
         return CurrentMetrics::AttachedDictionary;
-    } else {
+    }
+    else
+    {
         return CurrentMetrics::AttachedTable;
     }
 }
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index 03a8feb845f..ff721e8e5c4 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -255,12 +255,18 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con
 }
 
 
-static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage) {
-    if (storage->isView()) {
+static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage)
+{
+    if (storage->isView())
+    {
         return CurrentMetrics::AttachedView;
-    } else if (storage->isDictionary()) {
+    }
+    else if (storage->isDictionary())
+    {
         return CurrentMetrics::AttachedDictionary;
-    } else {
+    }
+    else
+    {
         return CurrentMetrics::AttachedTable;
     }
 }

From 0106f558fb9040c97fcb7691dc5d72a144ad637b Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 17:19:52 +0200
Subject: [PATCH 215/392] Update limits

---
 .../en/operations/server-configuration-parameters/settings.md | 4 ++--
 src/Core/ServerSettings.h                                     | 4 ++--
 src/Interpreters/Context.cpp                                  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 4d239309886..a5fe74fd0c6 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -563,7 +563,7 @@ Default value: 5000
 
 ## max\_view\_num\_to\_warn {#max-view-num-to-warn}
 If the number of attached views exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
-Default value: 5000
+Default value: 10000
 
 **Example**
 
@@ -573,7 +573,7 @@ Default value: 5000
 
 ## max\_dictionary\_num\_to\_warn {#max-dictionary-num-to-warn}
 If the number of attached dictionaries exceeds the specified value, clickhouse server will add warning messages to `system.warnings` table.
-Default value: 5000
+Default value: 1000
 
 **Example**
 
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index af96ca3a557..ea0b155b22d 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -97,8 +97,8 @@ namespace DB
     M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \
     M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \
     M(UInt64, max_table_num_to_warn, 5000lu, "If number of tables is greater than this value, server will create a warning that will displayed to user.", 0) \
-    M(UInt64, max_view_num_to_warn, 5000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \
-    M(UInt64, max_dictionary_num_to_warn, 5000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_view_num_to_warn, 10000lu, "If number of views is greater than this value, server will create a warning that will displayed to user.", 0) \
+    M(UInt64, max_dictionary_num_to_warn, 1000lu, "If number of dictionaries is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, max_database_num_to_warn, 1000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, max_part_num_to_warn, 100000lu, "If number of databases is greater than this value, server will create a warning that will displayed to user.", 0) \
     M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 4c5df8ef4ea..e1d82a8f604 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -361,8 +361,8 @@ struct ContextSharedPart : boost::noncopyable
     /// No lock required for format_schema_path modified only during initialization
     std::atomic_size_t max_database_num_to_warn = 1000lu;
     std::atomic_size_t max_table_num_to_warn = 5000lu;
-    std::atomic_size_t max_view_num_to_warn = 5000lu;
-    std::atomic_size_t max_dictionary_num_to_warn = 5000lu;
+    std::atomic_size_t max_view_num_to_warn = 10000lu;
+    std::atomic_size_t max_dictionary_num_to_warn = 1000lu;
     std::atomic_size_t max_part_num_to_warn = 100000lu;
     String format_schema_path;                              /// Path to a directory that contains schema files used by input formats.
     String google_protos_path; /// Path to a directory that contains the proto files for the well-known Protobuf types.

From 828885c66c8a06d24c34b0d92c6cddda3525b30f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 May 2024 17:20:52 +0200
Subject: [PATCH 216/392] Fix applyNewSettings

---
 .../AzureBlobStorage/AzureObjectStorage.cpp              | 4 +++-
 .../ObjectStorages/AzureBlobStorage/AzureObjectStorage.h | 3 ++-
 src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp  | 5 +++--
 src/Disks/ObjectStorages/Cached/CachedObjectStorage.h    | 3 ++-
 src/Disks/ObjectStorages/DiskObjectStorage.cpp           | 2 +-
 src/Disks/ObjectStorages/IObjectStorage.h                | 9 +++++++--
 src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp    | 5 -----
 src/Disks/ObjectStorages/Local/LocalObjectStorage.h      | 5 -----
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp          | 5 +++--
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h            | 3 ++-
 src/Disks/ObjectStorages/Web/WebObjectStorage.cpp        | 5 -----
 src/Disks/ObjectStorages/Web/WebObjectStorage.h          | 5 -----
 src/Storages/ObjectStorage/StorageObjectStorage.cpp      | 5 ++---
 13 files changed, 25 insertions(+), 34 deletions(-)

diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
index c09cb5e24e1..e7ecf7cd515 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@@ -398,7 +398,9 @@ void AzureObjectStorage::copyObject( /// NOLINT
     dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options);
 }
 
-void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
+void AzureObjectStorage::applyNewSettings(
+    const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
+    ContextPtr context, const ApplyNewSettingsOptions &)
 {
     auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context);
     settings.set(std::move(new_settings));
diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
index c38b5906f4e..e09f5e6753d 100644
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@@ -143,7 +143,8 @@ public:
     void applyNewSettings(
         const Poco::Util::AbstractConfiguration & config,
         const std::string & config_prefix,
-        ContextPtr context) override;
+        ContextPtr context,
+        const ApplyNewSettingsOptions & options) override;
 
     String getObjectsNamespace() const override { return object_namespace ; }
 
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index c834ef56644..f2f33684fde 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -192,9 +192,10 @@ void CachedObjectStorage::shutdown()
 }
 
 void CachedObjectStorage::applyNewSettings(
-    const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
+    const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
+    ContextPtr context, const ApplyNewSettingsOptions & options)
 {
-    object_storage->applyNewSettings(config, config_prefix, context);
+    object_storage->applyNewSettings(config, config_prefix, context, options);
 }
 
 String CachedObjectStorage::getObjectsNamespace() const
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
index ed78eb90ef4..a4d263e92eb 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@@ -91,7 +91,8 @@ public:
     void applyNewSettings(
         const Poco::Util::AbstractConfiguration & config,
         const std::string & config_prefix,
-        ContextPtr context) override;
+        ContextPtr context,
+        const ApplyNewSettingsOptions & options) override;
 
     String getObjectsNamespace() const override;
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index f6980d1e8f1..27e0cc78a38 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -536,7 +536,7 @@ void DiskObjectStorage::applyNewSettings(
 {
     /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name
     const auto config_prefix = "storage_configuration.disks." + name;
-    object_storage->applyNewSettings(config, config_prefix, context_);
+    object_storage->applyNewSettings(config, config_prefix, context_, IObjectStorage::ApplyNewSettingsOptions{ .allow_client_change = true });
 
     {
         std::unique_lock lock(resource_mutex);
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index 5724ae8929c..d4ac6ea0239 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -199,10 +199,15 @@ public:
     virtual void startup() = 0;
 
     /// Apply new settings, in most cases reiniatilize client and some other staff
+    struct ApplyNewSettingsOptions
+    {
+        bool allow_client_change = true;
+    };
     virtual void applyNewSettings(
-        const Poco::Util::AbstractConfiguration &,
+        const Poco::Util::AbstractConfiguration & /* config */,
         const std::string & /*config_prefix*/,
-        ContextPtr) {}
+        ContextPtr /* context */,
+        const ApplyNewSettingsOptions & /* options */) {}
 
     /// Sometimes object storages have something similar to chroot or namespace, for example
     /// buckets in S3. If object storage doesn't have any namepaces return empty string.
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
index fa27e08f404..a247d86ddce 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@@ -222,11 +222,6 @@ std::unique_ptr<IObjectStorage> LocalObjectStorage::cloneObjectStorage(
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage");
 }
 
-void LocalObjectStorage::applyNewSettings(
-    const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
-{
-}
-
 ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
 {
     constexpr size_t key_name_total_size = 32;
diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
index 4c667818c88..371cd37f8b2 100644
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
@@ -73,11 +73,6 @@ public:
 
     void startup() override;
 
-    void applyNewSettings(
-        const Poco::Util::AbstractConfiguration & config,
-        const std::string & config_prefix,
-        ContextPtr context) override;
-
     String getObjectsNamespace() const override { return ""; }
 
     std::unique_ptr<IObjectStorage> cloneObjectStorage(
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 7891be64b06..d18468411ea 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -572,7 +572,8 @@ void S3ObjectStorage::startup()
 void S3ObjectStorage::applyNewSettings(
     const Poco::Util::AbstractConfiguration & config,
     const std::string & config_prefix,
-    ContextPtr context)
+    ContextPtr context,
+    const ApplyNewSettingsOptions & options)
 {
     auto new_s3_settings = getSettings(config, config_prefix, context);
     if (!static_headers.empty())
@@ -586,7 +587,7 @@ void S3ObjectStorage::applyNewSettings(
         new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
 
     auto current_s3_settings = s3_settings.get();
-    if (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3)
+    if (options.allow_client_change && (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3))
     {
         auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri);
         client.set(std::move(new_client));
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 74bc5bef3c7..1fff6d67e23 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -149,7 +149,8 @@ public:
     void applyNewSettings(
         const Poco::Util::AbstractConfiguration & config,
         const std::string & config_prefix,
-        ContextPtr context) override;
+        ContextPtr context,
+        const ApplyNewSettingsOptions & options) override;
 
     std::string getObjectsNamespace() const override { return uri.bucket; }
 
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
index 69f6137cd2d..e837e056acc 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@@ -344,11 +344,6 @@ void WebObjectStorage::startup()
 {
 }
 
-void WebObjectStorage::applyNewSettings(
-    const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
-{
-}
-
 ObjectMetadata WebObjectStorage::getObjectMetadata(const std::string & /* path */) const
 {
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for {}", getName());
diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
index b8ab510a6fb..9d3b9a3a8f0 100644
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@@ -72,11 +72,6 @@ public:
 
     void startup() override;
 
-    void applyNewSettings(
-        const Poco::Util::AbstractConfiguration & config,
-        const std::string & config_prefix,
-        ContextPtr context) override;
-
     String getObjectsNamespace() const override { return ""; }
 
     std::unique_ptr<IObjectStorage> cloneObjectStorage(
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index c45752c10f5..ba91f3038b6 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -87,9 +87,8 @@ bool StorageObjectStorage::supportsSubsetOfColumns(const ContextPtr & context) c
 
 void StorageObjectStorage::updateConfiguration(ContextPtr context)
 {
-    /// FIXME: we should be able to update everything apart from client if static_configuration == true.
-    if (!configuration->isStaticConfiguration())
-        object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context);
+    IObjectStorage::ApplyNewSettingsOptions options{ .allow_client_change = !configuration->isStaticConfiguration() };
+    object_storage->applyNewSettings(context->getConfigRef(), configuration->getTypeName() + ".", context, options);
 }
 
 namespace

From a38bb095d800686c27cdf45275af7dc7a5dde149 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 May 2024 18:12:22 +0200
Subject: [PATCH 217/392] Disallow write and truncate if archive

---
 .../ObjectStorage/StorageObjectStorage.cpp         | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index ba91f3038b6..b38636e9144 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -242,6 +242,13 @@ SinkToStoragePtr StorageObjectStorage::write(
     const auto sample_block = metadata_snapshot->getSampleBlock();
     const auto & settings = configuration->getQuerySettings(local_context);
 
+    if (configuration->isArchive())
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Path '{}' contains archive. Write into archive is not supported",
+                        configuration->getPath());
+    }
+
     if (configuration->withGlobsIgnorePartitionWildcard())
     {
         throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED,
@@ -289,6 +296,13 @@ void StorageObjectStorage::truncate(
     ContextPtr /* context */,
     TableExclusiveLockHolder & /* table_holder */)
 {
+    if (configuration->isArchive())
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                        "Path '{}' contains archive. Table cannot be truncated",
+                        configuration->getPath());
+    }
+
     if (configuration->withGlobs())
     {
         throw Exception(

From 2bf5f0e0fdb6e4ccffad95964622b5da9107ba5b Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 May 2024 16:13:29 +0000
Subject: [PATCH 218/392] Fix style.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 7ecb91e7972..52cd6207dde 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -614,8 +614,6 @@ struct ScopeAliases
             case IdentifierLookupContext::FUNCTION: return alias_name_to_lambda_node;
             case IdentifierLookupContext::TABLE_EXPRESSION: return alias_name_to_table_expression_node;
         }
-
-        UNREACHABLE();
     }
 
     enum class FindOption
@@ -631,8 +629,6 @@ struct ScopeAliases
             case FindOption::FIRST_NAME: return identifier.front();
             case FindOption::FULL_NAME: return identifier.getFullName();
         }
-
-        UNREACHABLE();
     }
 
     QueryTreeNodePtr * find(IdentifierLookup lookup, FindOption find_option)

From 3c4fb4f3b632ed4480e730536cb3fe976ca831d0 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 May 2024 16:22:13 +0000
Subject: [PATCH 219/392] Incorporate review feedback

---
 src/Interpreters/Cache/QueryCache.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 2fddbc0b044..e30da7f233d 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -182,15 +182,14 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
     ast = removeQueryCacheSettings(ast);
 
     /// Hash the AST, it must consider aliases (issue #56258)
-    constexpr bool ignore_aliases = false;
-    IAST::Hash ast_hash = ast->getTreeHash(ignore_aliases);
+    SipHash hash;
+    ast->updateTreeHash(hash, /*ignore_aliases=*/ false);
 
-    /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different tables (issue #64136)
-    IAST::Hash cur_database_hash = CityHash_v1_0_2::CityHash128(current_database.data(), current_database.size());
-    UInt64 low_combined = ast_hash.low64 ^ cur_database_hash.low64;
-    UInt64 high_combined = ast_hash.high64 ^ cur_database_hash.high64;
+    /// Also hash the database specified via SQL `USE db`, otherwise identifiers in same query (AST) may mean different columns in different
+    /// tables (issue #64136)
+    hash.update(current_database);
 
-    return {low_combined, high_combined};
+    return getSipHash128AsPair(hash);
 }
 
 String queryStringFromAST(ASTPtr ast)

From 532fe901293968b8dc4fa49299ff09079a9b3cd2 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 May 2024 18:32:19 +0200
Subject: [PATCH 220/392] Remove redundant includes

---
 src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index b38eb722df5..1c244b1ca36 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -1,10 +1,7 @@
 #pragma once
-
-// #include <Interpreters/Cluster.h>
 #include <Storages/IStorageCluster.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
-// #include <TableFunctions/TableFunctionObjectStorageCluster.h>
 
 namespace DB
 {

From 96715f611bd54127f43f29123b9a06757d3d7daa Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 May 2024 18:43:53 +0200
Subject: [PATCH 221/392] Apply change from PR #63642
 (https://github.com/ClickHouse/ClickHouse/pull/63642)

---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index b38636e9144..dba4aedf7b7 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -131,7 +131,7 @@ public:
 
     void applyFilters(ActionDAGNodes added_filter_nodes) override
     {
-        filter_actions_dag = ActionsDAG::buildFilterActionsDAG(added_filter_nodes.nodes);
+        SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
         const ActionsDAG::Node * predicate = nullptr;
         if (filter_actions_dag)
             predicate = filter_actions_dag->getOutputs().at(0);

From c1920130bb308e2d329117113ddf6ada3da2b908 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Tue, 21 May 2024 19:28:49 +0200
Subject: [PATCH 222/392] Apply changes from PR #62120

---
 .../ObjectStorageIteratorAsync.cpp            |  1 -
 .../ObjectStorage/StorageObjectStorage.cpp    | 18 +++++++++--
 .../StorageObjectStorageSource.cpp            | 31 ++++++++++++++++---
 .../StorageObjectStorageSource.h              |  7 ++++-
 src/Storages/S3Queue/StorageS3Queue.cpp       |  1 +
 5 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
index 3fb615b2a5c..0420de0f8dd 100644
--- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
+++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
@@ -93,7 +93,6 @@ std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIterator
     }, Priority{});
 }
 
-
 bool IObjectStorageIteratorAsync::isValid()
 {
     if (!is_initialized)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index dba4aedf7b7..5de7f41b4f7 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -141,14 +141,28 @@ public:
     void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
     {
         createIterator(nullptr);
+
         Pipes pipes;
         auto context = getContext();
+        const size_t max_threads = context->getSettingsRef().max_threads;
+        size_t estimated_keys_count = iterator_wrapper->estimatedKeysCount();
+
+        if (estimated_keys_count > 1)
+            num_streams = std::min(num_streams, estimated_keys_count);
+        else
+        {
+            /// The amount of keys (zero) was probably underestimated.
+            /// We will keep one stream for this particular case.
+            num_streams = 1;
+        }
+
+        const size_t max_parsing_threads = num_streams >= max_threads ? 1 : (max_threads / std::max(num_streams, 1ul));
 
         for (size_t i = 0; i < num_streams; ++i)
         {
             auto source = std::make_shared<StorageObjectStorageSource>(
                 getName(), object_storage, configuration, info, format_settings,
-                context, max_block_size, iterator_wrapper, need_only_count);
+                context, max_block_size, iterator_wrapper, max_parsing_threads, need_only_count);
 
             source->setKeyCondition(filter_actions_dag, context);
             pipes.emplace_back(std::move(source));
@@ -175,7 +189,7 @@ private:
     const String name;
     const bool need_only_count;
     const size_t max_block_size;
-    const size_t num_streams;
+    size_t num_streams;
     const bool distributed_processing;
 
     void createIterator(const ActionsDAG::Node * predicate)
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index d3b67876224..8d946f515a3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -48,6 +48,7 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     ContextPtr context_,
     UInt64 max_block_size_,
     std::shared_ptr<IIterator> file_iterator_,
+    size_t max_parsing_threads_,
     bool need_only_count_)
     : SourceWithKeyCondition(info.source_header, false)
     , WithContext(context_)
@@ -57,6 +58,7 @@ StorageObjectStorageSource::StorageObjectStorageSource(
     , format_settings(format_settings_)
     , max_block_size(max_block_size_)
     , need_only_count(need_only_count_)
+    , max_parsing_threads(max_parsing_threads_)
     , read_from_format_info(info)
     , create_reader_pool(std::make_shared<ThreadPool>(
         CurrentMetrics::StorageObjectStorageThreads,
@@ -277,8 +279,6 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     else
     {
         CompressionMethod compression_method;
-        const auto max_parsing_threads = need_only_count ? std::optional<size_t>(1) : std::nullopt;
-
         if (auto object_info_in_archive = dynamic_cast<const ArchiveIterator::ObjectInfoInArchive *>(object_info.get()))
         {
             compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method);
@@ -292,9 +292,17 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
         }
 
         auto input_format = FormatFactory::instance().getInput(
-            configuration->format, *read_buf, read_from_format_info.format_header,
-            getContext(), max_block_size, format_settings, max_parsing_threads,
-            std::nullopt, /* is_remote_fs */ true, compression_method);
+            configuration->format,
+            *read_buf,
+            read_from_format_info.format_header,
+            getContext(),
+            max_block_size,
+            format_settings,
+            need_only_count ? 1 : max_parsing_threads,
+            std::nullopt,
+            true/* is_remote_fs */,
+            compression_method,
+            need_only_count);
 
         if (key_condition)
             input_format->setKeyCondition(key_condition);
@@ -440,6 +448,19 @@ StorageObjectStorageSource::GlobIterator::GlobIterator(
     }
 }
 
+size_t StorageObjectStorageSource::GlobIterator::estimatedKeysCount()
+{
+    if (object_infos.empty() && !is_finished && object_storage_iterator->isValid())
+    {
+        /// 1000 files were listed, and we cannot make any estimation of _how many more_ there are (because we list bucket lazily);
+        /// If there are more objects in the bucket, limiting the number of streams is the last thing we may want to do
+        /// as it would lead to serious slow down of the execution, since objects are going
+        /// to be fetched sequentially rather than in-parallel with up to <max_threads> times.
+        return std::numeric_limits<size_t>::max();
+    }
+    return object_infos.size();
+}
+
 StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::nextImpl(size_t processor)
 {
     std::lock_guard lock(next_mutex);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index fb0ad3e32f1..8dbb31fdfba 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -37,6 +37,7 @@ public:
         ContextPtr context_,
         UInt64 max_block_size_,
         std::shared_ptr<IIterator> file_iterator_,
+        size_t max_parsing_threads_,
         bool need_only_count_);
 
     ~StorageObjectStorageSource() override;
@@ -64,6 +65,7 @@ protected:
     const std::optional<FormatSettings> format_settings;
     const UInt64 max_block_size;
     const bool need_only_count;
+    const size_t max_parsing_threads;
     const ReadFromFormatInfo read_from_format_info;
     const std::shared_ptr<ThreadPool> create_reader_pool;
 
@@ -165,12 +167,13 @@ public:
 
     ~GlobIterator() override = default;
 
-    size_t estimatedKeysCount() override { return object_infos.size(); }
+    size_t estimatedKeysCount() override;
 
 private:
     ObjectInfoPtr nextImpl(size_t processor) override;
     ObjectInfoPtr nextImplUnlocked(size_t processor);
     void createFilterAST(const String & any_key);
+    void fillBufferForKey(const std::string & uri_key);
 
     const ObjectStoragePtr object_storage;
     const ConfigurationPtr configuration;
@@ -184,6 +187,8 @@ private:
     ActionsDAGPtr filter_dag;
     ObjectStorageIteratorPtr object_storage_iterator;
     bool recursive{false};
+    std::vector<String> expanded_keys;
+    std::vector<String>::iterator expanded_keys_iter;
 
     std::unique_ptr<re2::RE2> matcher;
 
diff --git a/src/Storages/S3Queue/StorageS3Queue.cpp b/src/Storages/S3Queue/StorageS3Queue.cpp
index 867f22ef5fe..f8eb288921c 100644
--- a/src/Storages/S3Queue/StorageS3Queue.cpp
+++ b/src/Storages/S3Queue/StorageS3Queue.cpp
@@ -359,6 +359,7 @@ std::shared_ptr<StorageS3QueueSource> StorageS3Queue::createSource(
         local_context,
         max_block_size,
         file_iterator,
+        local_context->getSettingsRef().max_download_threads,
         false);
 
     auto file_deleter = [=, this](const std::string & path) mutable

From dc749325df1fa7f4d686beddd7551c30b881a0fc Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 May 2024 17:31:13 +0000
Subject: [PATCH 223/392] Faaaaaaaaaster

---
 src/Interpreters/Cache/QueryCache.cpp | 4 ++--
 src/Interpreters/Cache/QueryCache.h   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index e30da7f233d..4b10bfd3dcd 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -203,7 +203,7 @@ String queryStringFromAST(ASTPtr ast)
 
 QueryCache::Key::Key(
     ASTPtr ast_,
-    String current_database,
+    const String & current_database,
     Block header_,
     std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
     bool is_shared_,
@@ -220,7 +220,7 @@ QueryCache::Key::Key(
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, String current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
+QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
     : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
 {
 }
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index c234ea3d464..b5b6f477137 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -88,7 +88,7 @@ public:
 
         /// Ctor to construct a Key for writing into query cache.
         Key(ASTPtr ast_,
-            String current_database,
+            const String & current_database,
             Block header_,
             std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
             bool is_shared_,
@@ -96,7 +96,7 @@ public:
             bool is_compressed);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
-        Key(ASTPtr ast_, String current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
+        Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
 
         bool operator==(const Key & other) const;
     };

From 3ff53b8a0f5b62c7d64aaff263211ec060cd3ba7 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 19:38:30 +0200
Subject: [PATCH 224/392] Change double quotes in import

---
 src/Databases/DatabaseLazy.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index c95d690f331..b5535ff2a74 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -10,7 +10,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/IStorage.h>
-#include "Common/CurrentMetrics.h"
+#include <Common/CurrentMetrics.h>
 #include <Common/escapeForFileName.h>
 
 #include <Common/logger_useful.h>

From 24805423544afd3e5c47a736f0da3e47dedac293 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Tue, 21 May 2024 19:42:03 +0200
Subject: [PATCH 225/392] Order imports

---
 src/Databases/DatabaseLazy.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index b5535ff2a74..7b47a1a2423 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -1,3 +1,10 @@
+#include <base/sort.h>
+#include <iomanip>
+#include <filesystem>
+#include <Common/CurrentMetrics.h>
+#include <Common/escapeForFileName.h>
+#include <Common/logger_useful.h>
+#include <Common/scope_guard_safe.h>
 #include <Core/Settings.h>
 #include <Databases/DatabaseFactory.h>
 #include <Databases/DatabaseLazy.h>
@@ -10,14 +17,7 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/IStorage.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/escapeForFileName.h>
 
-#include <Common/logger_useful.h>
-#include <Common/scope_guard_safe.h>
-#include <base/sort.h>
-#include <iomanip>
-#include <filesystem>
 
 namespace fs = std::filesystem;
 

From 9f71988f01aa70acccac5e1c178f1cbcb8dc74ae Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Tue, 21 May 2024 17:44:40 +0000
Subject: [PATCH 226/392] Fix tests

---
 src/Columns/ColumnDynamic.h                                 | 6 +++---
 .../0_stateless/03039_dynamic_all_merge_algorithms_1.sh     | 2 +-
 .../0_stateless/03039_dynamic_all_merge_algorithms_2.sh     | 2 +-
 .../0_stateless/03151_dynamic_type_scale_max_types.sql      | 3 +++
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index 40e8e350733..8aece765308 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -96,13 +96,13 @@ public:
 
     MutableColumnPtr cloneEmpty() const override
     {
-        /// Keep current dynamic structure.
-        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics);
+        /// Keep current dynamic structure but not statistics.
+        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types);
     }
 
     MutableColumnPtr cloneResized(size_t size) const override
     {
-        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics);
+        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types);
     }
 
     size_t size() const override
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
index 0941f2da369..9cfd2294c8d 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
 
 
 function test()
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
index f067a99ca19..02362012960 100755
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
+++ b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
@@ -7,7 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1  --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
 
 
 function test()
diff --git a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
index 04322fc4f0c..632f3504fdb 100644
--- a/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
+++ b/tests/queries/0_stateless/03151_dynamic_type_scale_max_types.sql
@@ -1,4 +1,7 @@
 SET allow_experimental_dynamic_type=1;
+set min_compress_block_size = 585572, max_compress_block_size = 373374, max_block_size = 60768, max_joined_block_size_rows = 18966, max_insert_threads = 5, max_threads = 50, max_read_buffer_size = 708232, connect_timeout_with_failover_ms = 2000, connect_timeout_with_failover_secure_ms = 3000, idle_connection_timeout = 36000, use_uncompressed_cache = true, stream_like_engine_allow_direct_select = true, replication_wait_for_inactive_replica_timeout = 30, compile_aggregate_expressions = false, min_count_to_compile_aggregate_expression = 0, compile_sort_description = false, group_by_two_level_threshold = 1000000, group_by_two_level_threshold_bytes = 12610083, enable_memory_bound_merging_of_aggregation_results = false, min_chunk_bytes_for_parallel_parsing = 18769830, merge_tree_coarse_index_granularity = 12, min_bytes_to_use_direct_io = 10737418240, min_bytes_to_use_mmap_io = 10737418240, log_queries = true, insert_quorum_timeout = 60000, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.05000000074505806, http_response_buffer_size = 294986, fsync_metadata = true, http_send_timeout = 60., http_receive_timeout = 60., opentelemetry_start_trace_probability = 0.10000000149011612, max_bytes_before_external_group_by = 1, max_bytes_before_external_sort = 10737418240, max_bytes_before_remerge_sort = 1326536545, max_untracked_memory = 1048576, memory_profiler_step = 1048576, log_comment = '03151_dynamic_type_scale_max_types.sql', send_logs_level = 'fatal', prefer_localhost_replica = false, optimize_read_in_order = false, optimize_aggregation_in_order = true, aggregation_in_order_max_block_bytes = 27069500, read_in_order_two_level_merge_threshold = 75, allow_introspection_functions = true, database_atomic_wait_for_drop_and_detach_synchronously = true, remote_filesystem_read_method = 'read', local_filesystem_read_prefetch = true, remote_filesystem_read_prefetch = false, merge_tree_compact_parts_min_granules_to_multibuffer_read = 119, async_insert_busy_timeout_max_ms = 5000, read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true, filesystem_cache_segments_batch_size = 10, use_page_cache_for_disks_without_file_cache = true, page_cache_inject_eviction = true, allow_prefetched_read_pool_for_remote_filesystem = false, filesystem_prefetch_step_marks = 50, filesystem_prefetch_min_bytes_for_single_read_task = 16777216, filesystem_prefetch_max_memory_usage = 134217728, filesystem_prefetches_limit = 10, optimize_sorting_by_input_stream_properties = false, allow_experimental_dynamic_type = true, session_timezone = 'Africa/Khartoum', prefer_warmed_unmerged_parts_seconds = 2;
+
+drop table if exists to_table;
 
 CREATE TABLE to_table
 (

From 51afec49107864e97eb36f9e5760efd1e11bfea8 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Tue, 21 May 2024 17:59:26 +0000
Subject: [PATCH 227/392] Fixing test.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 52cd6207dde..cfea45732db 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -637,6 +637,10 @@ struct ScopeAliases
         const std::string * key = &getKey(lookup.identifier, find_option);
 
         auto it = alias_map.find(*key);
+
+        if (it == alias_map.end() && lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION)
+            return {};
+
         while (it == alias_map.end())
         {
             auto jt = transitive_aliases.find(*key);
@@ -4191,7 +4195,7 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
              * In the example, identifier `id` should be resolved into one from USING (id) column.
              */
 
-            auto alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME);
+            auto * alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME);
             //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName());
             if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN)
             {

From c9d29213d8e6af3569fef6be235f0074888a0261 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 21 May 2024 21:04:28 +0200
Subject: [PATCH 228/392] Update InterpreterCreateQuery.cpp

---
 src/Interpreters/InterpreterCreateQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 4fdd804452d..541717f1c04 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1493,7 +1493,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
 
     validateVirtualColumns(*res);
 
-    if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
+    if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()) && mode <= LoadingStrictnessLevel::CREATE)
     {
         throw Exception(ErrorCodes::ILLEGAL_COLUMN,
             "Cannot create table with column of type Object, "

From 42efc4e2f641b1abec484a36aa32b2cc97e6b49d Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Tue, 21 May 2024 21:31:52 +0200
Subject: [PATCH 229/392] Pass column position to compact part writer

---
 src/Storages/MergeTree/IMergeTreeDataPart.h   |  1 +
 .../MergeTree/IMergeTreeDataPartWriter.cpp    |  4 +++-
 .../MergeTree/IMergeTreeDataPartWriter.h      |  2 ++
 .../MergeTree/MergeTreeDataPartCompact.cpp    | 21 +++++++++----------
 .../MergeTree/MergedBlockOutputStream.cpp     |  8 +++----
 .../MergedColumnOnlyOutputStream.cpp          |  1 +
 6 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index f4889d64179..15c8760141a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -186,6 +186,7 @@ public:
     /// take place, you must take original name of column for this part from
     /// storage and pass it to this method.
     std::optional<size_t> getColumnPosition(const String & column_name) const;
+    const NameToNumber & getColumnPositions() const { return column_name_to_position; }
 
     /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
     /// If no checksums are present returns the name of the first physically existing column.
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index 27da53de9b0..e8792be6293 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -115,6 +115,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
+        const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
         const VirtualsDescriptionPtr virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -151,6 +152,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
+        const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
         const VirtualsDescriptionPtr virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -162,7 +164,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
 {
     if (part_type == MergeTreeDataPartType::Compact)
         return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
-            index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
+            index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
             marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
     else if (part_type == MergeTreeDataPartType::Wide)
         return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 5dcc7ddc599..8eb546c4f2c 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -69,6 +69,7 @@ protected:
 };
 
 using MergeTreeDataPartWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
+using ColumnPositions = std::unordered_map<std::string, size_t>;
 
 MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         MergeTreeDataPartType part_type,
@@ -79,6 +80,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const MergeTreeIndexGranularityInfo & index_granularity_info_,
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
+        const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
         const VirtualsDescriptionPtr virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index 332b7d04f7f..98eda5573ce 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -55,6 +55,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
     const MergeTreeIndexGranularityInfo & index_granularity_info_,
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list,
+    const ColumnPositions & column_positions,
     const StorageMetadataPtr & metadata_snapshot,
     const VirtualsDescriptionPtr virtual_columns,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
@@ -64,19 +65,17 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
     const MergeTreeWriterSettings & writer_settings,
     const MergeTreeIndexGranularity & computed_index_granularity)
 {
-////// TODO: fix the order of columns
-////
-////    NamesAndTypesList ordered_columns_list;
-////    std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
-////        [this](const auto & column) { return getColumnPosition(column.name) != std::nullopt; });
-////
-////    /// Order of writing is important in compact format
-////    ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
-////        { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
-////
+    NamesAndTypesList ordered_columns_list;
+    std::copy_if(columns_list.begin(), columns_list.end(), std::back_inserter(ordered_columns_list),
+        [&column_positions](const auto & column) { return column_positions.contains(column.name); });
+
+    /// Order of writing is important in compact format
+    ordered_columns_list.sort([&column_positions](const auto & lhs, const auto & rhs)
+        { return column_positions.at(lhs.name) < column_positions.at(rhs.name); });
+
     return std::make_unique<MergeTreeDataPartWriterCompact>(
         data_part_name_, logger_name_, serializations_, data_part_storage_,
-        index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns,
+        index_granularity_info_, storage_settings_, ordered_columns_list, metadata_snapshot, virtual_columns,
         indices_to_recalc, stats_to_recalc_, marks_file_extension_,
         default_codec_, writer_settings, computed_index_granularity);
 }
diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
index 5ef967d930a..ee5c197336d 100644
--- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp
@@ -44,8 +44,6 @@ MergedBlockOutputStream::MergedBlockOutputStream(
     if (data_part->isStoredOnDisk())
         data_part_storage->createDirectories();
 
-//    /// We should write version metadata on part creation to distinguish it from parts that were created without transaction.
-//    TransactionID tid = txn ? txn->tid : Tx::PrehistoricTID;
     /// NOTE do not pass context for writing to system.transactions_info_log,
     /// because part may have temporary name (with temporary block numbers). Will write it later.
     data_part->version.setCreationTID(tid, nullptr);
@@ -55,7 +53,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
             data_part->name, data_part->storage.getLogName(), data_part->getSerializations(),
             data_part_storage, data_part->index_granularity_info,
             storage_settings,
-            columns_list, metadata_snapshot, data_part->storage.getVirtualsPtr(),
+            columns_list, data_part->getColumnPositions(), metadata_snapshot, data_part->storage.getVirtualsPtr(),
             skip_indices, statistics, data_part->getMarksFileExtension(), default_codec, writer_settings, computed_index_granularity);
 }
 
@@ -243,9 +241,9 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis
 
         if (new_part->storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
         {
-            if (auto file = new_part->partition.store(//storage,
+            if (auto file = new_part->partition.store(
                 new_part->storage.getInMemoryMetadataPtr(), new_part->storage.getContext(),
-             new_part->getDataPartStorage(), checksums))
+                new_part->getDataPartStorage(), checksums))
                 written_files.emplace_back(std::move(file));
 
             if (new_part->minmax_idx->initialized)
diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
index 1d1783b1b43..674a9bd498f 100644
--- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
+++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp
@@ -38,6 +38,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
         data_part_storage, data_part->index_granularity_info,
         storage_settings,
         header.getNamesAndTypesList(),
+        data_part->getColumnPositions(),
         metadata_snapshot_,
         data_part->storage.getVirtualsPtr(),
         indices_to_recalc,

From bb0b135c3642d2972fddc9c4e4a584dd5e246f9f Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 06:18:43 +0200
Subject: [PATCH 230/392] Do not decrement counter if table pointer is nut in
 lazy database detachtable

---
 src/Databases/DatabaseLazy.cpp    | 34 ++++++--------------------
 src/Databases/DatabasesCommon.cpp | 40 +++++++------------------------
 src/Storages/Utils.cpp            | 28 ++++++++++++++++++++++
 src/Storages/Utils.h              |  7 ++++++
 4 files changed, 51 insertions(+), 58 deletions(-)
 create mode 100644 src/Storages/Utils.cpp
 create mode 100644 src/Storages/Utils.h

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 7b47a1a2423..c2fd184f8bc 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -1,3 +1,5 @@
+#include <Databases/DatabaseLazy.h>
+
 #include <base/sort.h>
 #include <iomanip>
 #include <filesystem>
@@ -7,7 +9,6 @@
 #include <Common/scope_guard_safe.h>
 #include <Core/Settings.h>
 #include <Databases/DatabaseFactory.h>
-#include <Databases/DatabaseLazy.h>
 #include <Databases/DatabaseOnDisk.h>
 #include <Databases/DatabasesCommon.h>
 #include <Interpreters/Context.h>
@@ -17,19 +18,12 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/IStorage.h>
+#include <Storages/Utils.h>
 
 
 namespace fs = std::filesystem;
 
 
-namespace CurrentMetrics
-{
-    extern const Metric AttachedTable;
-    extern const Metric AttachedView;
-    extern const Metric AttachedDictionary;
-}
-
-
 namespace DB
 {
 
@@ -174,22 +168,6 @@ bool DatabaseLazy::empty() const
     return tables_cache.empty();
 }
 
-static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage)
-{
-    if (storage->isView())
-    {
-        return CurrentMetrics::AttachedView;
-    }
-    else if (storage->isDictionary())
-    {
-        return CurrentMetrics::AttachedDictionary;
-    }
-    else
-    {
-        return CurrentMetrics::AttachedTable;
-    }
-}
-
 void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &)
 {
     LOG_DEBUG(log, "Attach table {}.", backQuote(table_name));
@@ -203,7 +181,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
         throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
-    CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1);
+    CurrentMetrics::add(getAttachedCounterForStorage(table), 1);
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@@ -219,7 +197,9 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
-        CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1);
+        if (res != nullptr) {
+            CurrentMetrics::sub(getAttachedCounterForStorage(res), 1);
+        }
     }
     return res;
 }
diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp
index ff721e8e5c4..5fee14ecc2a 100644
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@@ -1,4 +1,10 @@
 #include <Databases/DatabasesCommon.h>
+
+#include <Backups/BackupEntriesCollector.h>
+#include <Backups/RestorerFromBackup.h>
+#include <Common/typeid_cast.h>
+#include <Common/CurrentMetrics.h>
+#include <Common/escapeForFileName.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DatabaseCatalog.h>
@@ -8,19 +14,8 @@
 #include <Parsers/formatAST.h>
 #include <Storages/StorageDictionary.h>
 #include <Storages/StorageFactory.h>
-#include <Common/typeid_cast.h>
-#include <Common/CurrentMetrics.h>
-#include <Common/escapeForFileName.h>
+#include <Storages/Utils.h>
 #include <TableFunctions/TableFunctionFactory.h>
-#include <Backups/BackupEntriesCollector.h>
-#include <Backups/RestorerFromBackup.h>
-
-namespace CurrentMetrics
-{
-    extern const Metric AttachedTable;
-    extern const Metric AttachedView;
-    extern const Metric AttachedDictionary;
-}
 
 
 namespace DB
@@ -254,23 +249,6 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con
     return detachTableUnlocked(table_name);
 }
 
-
-static CurrentMetrics::Metric get_attached_count_metric_for_storage(const StoragePtr & storage)
-{
-    if (storage->isView())
-    {
-        return CurrentMetrics::AttachedView;
-    }
-    else if (storage->isDictionary())
-    {
-        return CurrentMetrics::AttachedDictionary;
-    }
-    else
-    {
-        return CurrentMetrics::AttachedTable;
-    }
-}
-
 StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name)
 {
     StoragePtr res;
@@ -282,7 +260,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n
     res = it->second;
     tables.erase(it);
     res->is_detached = true;
-    CurrentMetrics::sub(get_attached_count_metric_for_storage(res), 1);
+    CurrentMetrics::sub(getAttachedCounterForStorage(res), 1);
 
     auto table_id = res->getStorageID();
     if (table_id.hasUUID())
@@ -323,7 +301,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c
     /// It is important to reset is_detached here since in case of RENAME in
     /// non-Atomic database the is_detached is set to true before RENAME.
     table->is_detached = false;
-    CurrentMetrics::add(get_attached_count_metric_for_storage(table), 1);
+    CurrentMetrics::add(getAttachedCounterForStorage(table), 1);
 }
 
 void DatabaseWithOwnTablesBase::shutdown()
diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp
new file mode 100644
index 00000000000..670d6a242e8
--- /dev/null
+++ b/src/Storages/Utils.cpp
@@ -0,0 +1,28 @@
+#include <Storages/Utils.h>
+#include <Storages/IStorage.h>
+
+namespace CurrentMetrics
+{
+    extern const Metric AttachedTable;
+    extern const Metric AttachedView;
+    extern const Metric AttachedDictionary;
+}
+
+namespace DB {
+
+    CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage)
+    {
+        if (storage->isView())
+        {
+            return CurrentMetrics::AttachedView;
+        }
+        else if (storage->isDictionary())
+        {
+            return CurrentMetrics::AttachedDictionary;
+        }
+        else
+        {
+            return CurrentMetrics::AttachedTable;
+        }
+    }
+}    
diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h
new file mode 100644
index 00000000000..ffb8479d633
--- /dev/null
+++ b/src/Storages/Utils.h
@@ -0,0 +1,7 @@
+#include <Common/CurrentMetrics.h>
+#include <Storages/IStorage_fwd.h>
+
+namespace DB
+{
+    CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage);
+}

From 3f46e4e4305693c9542001fb9e718f2fb098a137 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Wed, 22 May 2024 04:35:06 +0000
Subject: [PATCH 231/392] better exception message in delete table with
 projection

---
 src/Interpreters/InterpreterDeleteQuery.cpp       | 15 ++++++++++++++-
 src/Storages/IStorage.h                           |  3 +++
 src/Storages/MergeTree/IMergeTreeDataPart.h       |  2 ++
 src/Storages/MergeTree/MergeTreeData.cpp          | 15 +++++++++++++++
 src/Storages/MergeTree/MergeTreeData.h            |  2 ++
 .../03161_lightweight_delete_projection.reference |  0
 .../03161_lightweight_delete_projection.sql       | 15 +++++++++++++++
 7 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03161_lightweight_delete_projection.reference
 create mode 100644 tests/queries/0_stateless/03161_lightweight_delete_projection.sql

diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp
index ee774994145..9cfb8e486cb 100644
--- a/src/Interpreters/InterpreterDeleteQuery.cpp
+++ b/src/Interpreters/InterpreterDeleteQuery.cpp
@@ -25,6 +25,7 @@ namespace ErrorCodes
     extern const int TABLE_IS_READ_ONLY;
     extern const int SUPPORT_IS_DISABLED;
     extern const int BAD_ARGUMENTS;
+    extern const int NOT_IMPLEMENTED;
 }
 
 
@@ -107,7 +108,19 @@ BlockIO InterpreterDeleteQuery::execute()
     }
     else
     {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName());
+        /// Currently just better exception for the case of a table with projection,
+        /// can act differently according to the setting.
+        if (table->hasProjection())
+        {
+            throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                "DELETE query is not supported for table {} as it has projections. "
+                "User should drop all the projections manually before running the query",
+                table->getStorageID().getFullTableName());
+        }
+
+        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            "DELETE query is not supported for table {}",
+            table->getStorageID().getFullTableName());
     }
 }
 
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 87a04c3fcc6..37613704c6a 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -259,6 +259,9 @@ public:
     /// Return true if storage can execute lightweight delete mutations.
     virtual bool supportsLightweightDelete() const { return false; }
 
+    /// Return true if storage has any projection.
+    virtual bool hasProjection() const { return false; }
+
     /// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete
     /// because those are internally translated into 'ALTER UDPATE' mutations.
     virtual bool supportsDelete() const { return false; }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index c380f99060e..f38a80455c4 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -442,6 +442,8 @@ public:
 
     bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); }
 
+    bool hasProjection() const { return !projection_parts.empty(); }
+
     bool hasBrokenProjection(const String & projection_name) const;
 
     /// Return true, if all projections were loaded successfully and none was marked as broken.
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 167160db317..1f7e0a19b3a 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -6133,6 +6133,21 @@ bool MergeTreeData::supportsLightweightDelete() const
     return true;
 }
 
+bool MergeTreeData::hasProjection() const
+{
+    auto lock = lockParts();
+    for (const auto & part : data_parts_by_info)
+    {
+        if (part->getState() == MergeTreeDataPartState::Outdated
+            || part->getState() == MergeTreeDataPartState::Deleting)
+            continue;
+
+        if (part->hasProjection())
+            return true;
+    }
+    return false;
+}
+
 MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const
 {
     ProjectionPartsVector res;
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 2f9283659e3..ff93c7c5ae4 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -438,6 +438,8 @@ public:
 
     bool supportsLightweightDelete() const override;
 
+    bool hasProjection() const override;
+
     bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
 
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override;
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.reference b/tests/queries/0_stateless/03161_lightweight_delete_projection.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03161_lightweight_delete_projection.sql b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
new file mode 100644
index 00000000000..cd29fae8fd7
--- /dev/null
+++ b/tests/queries/0_stateless/03161_lightweight_delete_projection.sql
@@ -0,0 +1,15 @@
+
+DROP TABLE IF EXISTS users;
+
+CREATE TABLE users (
+    uid Int16,
+    name String,
+    age Int16,
+    projection p1 (select count(), age group by age)
+) ENGINE = MergeTree order by uid;
+
+INSERT INTO users VALUES (1231, 'John', 33);
+INSERT INTO users VALUES (6666, 'Ksenia', 48);
+INSERT INTO users VALUES (8888, 'Alice', 50);
+
+DELETE FROM users WHERE 1; -- { serverError NOT_IMPLEMENTED }

From 04de82e96524b88f168b5be18195863e1cf4b18b Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 06:35:25 +0200
Subject: [PATCH 232/392] Fix style

---
 src/Databases/DatabaseLazy.cpp | 3 ++-
 src/Storages/Utils.cpp         | 2 +-
 src/Storages/Utils.h           | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index c2fd184f8bc..b9c61400eb3 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -197,7 +197,8 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
-        if (res != nullptr) {
+        if (res != nullptr)
+        {
             CurrentMetrics::sub(getAttachedCounterForStorage(res), 1);
         }
     }
diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp
index 670d6a242e8..df86ef15cff 100644
--- a/src/Storages/Utils.cpp
+++ b/src/Storages/Utils.cpp
@@ -25,4 +25,4 @@ namespace DB {
             return CurrentMetrics::AttachedTable;
         }
     }
-}    
+}
diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h
index ffb8479d633..3e92f6247c6 100644
--- a/src/Storages/Utils.h
+++ b/src/Storages/Utils.h
@@ -1,3 +1,5 @@
+#pragma once
+
 #include <Common/CurrentMetrics.h>
 #include <Storages/IStorage_fwd.h>
 

From a8fe7294d2e39b00f24fce5077b2a3a6ae63bf01 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 07:06:19 +0200
Subject: [PATCH 233/392] Do not distinguish resource types for lazy database

---
 src/Databases/DatabaseLazy.cpp | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index b9c61400eb3..003943fbbe4 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -18,11 +18,15 @@
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Storages/IStorage.h>
-#include <Storages/Utils.h>
 
 
 namespace fs = std::filesystem;
 
+namespace CurrentMetrics
+{
+    extern const Metric AttachedTable;
+}
+
 
 namespace DB
 {
@@ -181,7 +185,8 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
         throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name));
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
-    CurrentMetrics::add(getAttachedCounterForStorage(table), 1);
+    CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
+    
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)
@@ -197,10 +202,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta
         if (it->second.expiration_iterator != cache_expiration_queue.end())
             cache_expiration_queue.erase(it->second.expiration_iterator);
         tables_cache.erase(it);
-        if (res != nullptr)
-        {
-            CurrentMetrics::sub(getAttachedCounterForStorage(res), 1);
-        }
+        CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1);
     }
     return res;
 }

From 49529a1af9e15c1f3b6cda267034b93a48ce7e8a Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 07:18:17 +0200
Subject: [PATCH 234/392] Remove trailing whitespace

---
 src/Databases/DatabaseLazy.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index 003943fbbe4..f0a56a0243d 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -186,7 +186,6 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n
 
     it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name);
     CurrentMetrics::add(CurrentMetrics::AttachedTable, 1);
-    
 }
 
 StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name)

From 7be50ee90d688567a88152a324dc783369acde48 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 07:26:24 +0200
Subject: [PATCH 235/392] Add missing newline~

---
 src/Databases/DatabaseLazy.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp
index f0a56a0243d..e72834eddbe 100644
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@@ -22,6 +22,7 @@
 
 namespace fs = std::filesystem;
 
+
 namespace CurrentMetrics
 {
     extern const Metric AttachedTable;

From a0ad4a96c72525b0fb2e9ac9a8b70c88d847b56b Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 07:34:38 +0200
Subject: [PATCH 236/392] Add yet more missing newlines

---
 src/Storages/Utils.cpp | 2 ++
 src/Storages/Utils.h   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp
index df86ef15cff..b0c06f5ccf6 100644
--- a/src/Storages/Utils.cpp
+++ b/src/Storages/Utils.cpp
@@ -1,6 +1,7 @@
 #include <Storages/Utils.h>
 #include <Storages/IStorage.h>
 
+
 namespace CurrentMetrics
 {
     extern const Metric AttachedTable;
@@ -8,6 +9,7 @@ namespace CurrentMetrics
     extern const Metric AttachedDictionary;
 }
 
+
 namespace DB {
 
     CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage)
diff --git a/src/Storages/Utils.h b/src/Storages/Utils.h
index 3e92f6247c6..c86c2a4c341 100644
--- a/src/Storages/Utils.h
+++ b/src/Storages/Utils.h
@@ -3,6 +3,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Storages/IStorage_fwd.h>
 
+
 namespace DB
 {
     CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage);

From 8869094c9986906034f3368a2cdeee179a7976b1 Mon Sep 17 00:00:00 2001
From: Francisco Javier Jurado Moreno
 <9376816+Beetelbrox@users.noreply.github.com>
Date: Wed, 22 May 2024 07:42:05 +0200
Subject: [PATCH 237/392] Move opening brackets to its own line

---
 src/Storages/Utils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/Utils.cpp b/src/Storages/Utils.cpp
index b0c06f5ccf6..ff73888e19d 100644
--- a/src/Storages/Utils.cpp
+++ b/src/Storages/Utils.cpp
@@ -10,8 +10,8 @@ namespace CurrentMetrics
 }
 
 
-namespace DB {
-
+namespace DB
+{
     CurrentMetrics::Metric getAttachedCounterForStorage(const StoragePtr & storage)
     {
         if (storage->isView())

From 12ce276b8af09da46cb89ed9e2e15bb9ceff758a Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 22 May 2024 08:51:41 +0200
Subject: [PATCH 238/392] clang-tidy fix

---
 src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 8 ++++----
 src/Storages/MergeTree/IMergeTreeDataPartWriter.h   | 4 ++--
 src/Storages/MergeTree/MergeTreeDataPartCompact.cpp | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWide.cpp    | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index e8792be6293..891ba1b9660 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -52,7 +52,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
-    const VirtualsDescriptionPtr virtual_columns_,
+    const VirtualsDescriptionPtr & virtual_columns_,
     const MergeTreeWriterSettings & settings_,
     const MergeTreeIndexGranularity & index_granularity_)
     : data_part_name(data_part_name_)
@@ -117,7 +117,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
         const NamesAndTypesList & columns_list,
         const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
-        const VirtualsDescriptionPtr virtual_columns,
+        const VirtualsDescriptionPtr & virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
@@ -134,7 +134,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
-        const VirtualsDescriptionPtr virtual_columns,
+        const VirtualsDescriptionPtr & virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
@@ -154,7 +154,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const NamesAndTypesList & columns_list,
         const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
-        const VirtualsDescriptionPtr virtual_columns,
+        const VirtualsDescriptionPtr & virtual_columns,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension_,
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
index 8eb546c4f2c..f04beb37ebb 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h
@@ -30,7 +30,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list_,
         const StorageMetadataPtr & metadata_snapshot_,
-        const VirtualsDescriptionPtr virtual_columns_,
+        const VirtualsDescriptionPtr & virtual_columns_,
         const MergeTreeWriterSettings & settings_,
         const MergeTreeIndexGranularity & index_granularity_ = {});
 
@@ -82,7 +82,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
         const NamesAndTypesList & columns_list,
         const ColumnPositions & column_positions,
         const StorageMetadataPtr & metadata_snapshot,
-        const VirtualsDescriptionPtr virtual_columns_,
+        const VirtualsDescriptionPtr & virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index 98eda5573ce..4a160e5e229 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -57,7 +57,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
     const NamesAndTypesList & columns_list,
     const ColumnPositions & column_positions,
     const StorageMetadataPtr & metadata_snapshot,
-    const VirtualsDescriptionPtr virtual_columns,
+    const VirtualsDescriptionPtr & virtual_columns,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index d4630d3dd3f..149f86cef00 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -62,7 +62,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list,
     const StorageMetadataPtr & metadata_snapshot,
-    const VirtualsDescriptionPtr virtual_columns,
+    const VirtualsDescriptionPtr & virtual_columns,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,

From 58e655e07b128c4dfd26ffe60ad9d9ee285b3fa9 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Wed, 22 May 2024 07:24:42 +0000
Subject: [PATCH 239/392] Incorporate review feedback

---
 programs/keeper-client/Commands.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp
index 3c649cad0d3..860840a2d06 100644
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@@ -10,8 +10,8 @@ namespace DB
 
 namespace ErrorCodes
 {
+    extern const int LOGICAL_ERROR;
     extern const int KEEPER_EXCEPTION;
-    extern const int UNEXPECTED_ZOOKEEPER_ERROR;
 }
 
 bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@@ -442,7 +442,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
             new_members = query->args[1].safeGet<String>();
             break;
         default:
-            throw Exception(ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR, "Unexpected operation: {}", operation);
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation);
     }
 
     auto response = client->zookeeper->reconfig(joining, leaving, new_members);

From 376282dd6dce879008f0f0295402bc197d2b1e39 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <svtrifonov@gmail.com>
Date: Wed, 22 May 2024 09:58:31 +0200
Subject: [PATCH 240/392] Revert "Prevent conversion to Replicated if zookeeper
 path already exists"

---
 src/Databases/DatabaseOrdinary.cpp            | 14 ----
 .../configs/config.d/clusters.xml             |  2 +-
 ...sters_zk_path.xml => clusters_unusual.xml} |  2 +-
 .../test_unusual_path.py                      |  6 +-
 .../test_zk_path.py                           | 69 -------------------
 5 files changed, 5 insertions(+), 88 deletions(-)
 rename tests/integration/test_modify_engine_on_restart/configs/config.d/{clusters_zk_path.xml => clusters_unusual.xml} (80%)
 delete mode 100644 tests/integration/test_modify_engine_on_restart/test_zk_path.py

diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp
index 58fa7f01947..5d36f1cc3d6 100644
--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@@ -76,20 +76,6 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex
     String replica_path = server_settings.default_replica_path;
     String replica_name = server_settings.default_replica_name;
 
-    /// Check that replica path doesn't exist
-    Macros::MacroExpansionInfo info;
-    StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid);
-    info.table_id = table_id;
-    info.expand_special_macros_only = false;
-
-    String zookeeper_path = context->getMacros()->expand(replica_path, info);
-    if (context->getZooKeeper()->exists(zookeeper_path))
-        throw Exception(
-            ErrorCodes::LOGICAL_ERROR,
-            "Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.",
-            zookeeper_path, backQuote(table_id.getFullTableName())
-        );
-
     auto args = std::make_shared<ASTExpressionList>();
     args->children.push_back(std::make_shared<ASTLiteral>(replica_path));
     args->children.push_back(std::make_shared<ASTLiteral>(replica_name));
diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml
index c8bbb7f3530..d3a9d4fb8f0 100644
--- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml
+++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters.xml
@@ -19,4 +19,4 @@
         <shard>01</shard>
 </macros>
 
-</clickhouse>
+</clickhouse>
\ No newline at end of file
diff --git a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml
similarity index 80%
rename from tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml
rename to tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml
index ba13cd87031..812291335b8 100644
--- a/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_zk_path.xml
+++ b/tests/integration/test_modify_engine_on_restart/configs/config.d/clusters_unusual.xml
@@ -15,6 +15,6 @@
         <shard>01</shard>
 </macros>
 
-<default_replica_path>/clickhouse/'/{database}/{table}/{uuid}</default_replica_path>
+<default_replica_path>/lol/kek/'/{uuid}</default_replica_path>
 
 </clickhouse>
diff --git a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py
index 20d2c29257b..e82f48e8b34 100644
--- a/tests/integration/test_modify_engine_on_restart/test_unusual_path.py
+++ b/tests/integration/test_modify_engine_on_restart/test_unusual_path.py
@@ -6,7 +6,7 @@ cluster = ClickHouseCluster(__file__)
 ch1 = cluster.add_instance(
     "ch1",
     main_configs=[
-        "configs/config.d/clusters_zk_path.xml",
+        "configs/config.d/clusters_unusual.xml",
         "configs/config.d/distributed_ddl.xml",
     ],
     with_zookeeper=True,
@@ -63,7 +63,7 @@ def check_tables():
         )
         .strip()
         .startswith(
-            "ReplicatedReplacingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', D)"
+            "ReplicatedReplacingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', D)"
         )
     )
     assert (
@@ -73,7 +73,7 @@ def check_tables():
         )
         .strip()
         .startswith(
-            "ReplicatedVersionedCollapsingMergeTree(\\'/clickhouse/\\\\\\'/{database}/{table}/{uuid}\\', \\'{replica}\\', Sign, Version)"
+            "ReplicatedVersionedCollapsingMergeTree(\\'/lol/kek/\\\\\\'/{uuid}\\', \\'{replica}\\', Sign, Version)"
         )
     )
 
diff --git a/tests/integration/test_modify_engine_on_restart/test_zk_path.py b/tests/integration/test_modify_engine_on_restart/test_zk_path.py
deleted file mode 100644
index dd633ad0810..00000000000
--- a/tests/integration/test_modify_engine_on_restart/test_zk_path.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import pytest
-from test_modify_engine_on_restart.common import (
-    get_table_path,
-    set_convert_flags,
-)
-from helpers.cluster import ClickHouseCluster
-
-cluster = ClickHouseCluster(__file__)
-ch1 = cluster.add_instance(
-    "ch1",
-    main_configs=[
-        "configs/config.d/clusters_zk_path.xml",
-        "configs/config.d/distributed_ddl.xml",
-    ],
-    with_zookeeper=True,
-    macros={"replica": "node1"},
-    stay_alive=True,
-)
-
-database_name = "modify_engine_zk_path"
-
-
-@pytest.fixture(scope="module")
-def started_cluster():
-    try:
-        cluster.start()
-        yield cluster
-
-    finally:
-        cluster.shutdown()
-
-
-def q(node, query):
-    return node.query(database=database_name, sql=query)
-
-
-def test_modify_engine_fails_if_zk_path_exists(started_cluster):
-    ch1.query("CREATE DATABASE " + database_name)
-
-    q(
-        ch1,
-        "CREATE TABLE already_exists_1 ( A Int64, D Date, S String ) ENGINE MergeTree() PARTITION BY toYYYYMM(D) ORDER BY A;",
-    )
-    uuid = q(
-        ch1,
-        f"SELECT uuid FROM system.tables WHERE table = 'already_exists_1' and database = '{database_name}'",
-    ).strip("'[]\n")
-
-    q(
-        ch1,
-        f"CREATE TABLE already_exists_2 ( A Int64, D Date, S String ) ENGINE ReplicatedMergeTree('/clickhouse/\\'/{database_name}/already_exists_1/{uuid}', 'r2') PARTITION BY toYYYYMM(D) ORDER BY A;",
-    )
-
-    set_convert_flags(ch1, database_name, ["already_exists_1"])
-
-    table_data_path = get_table_path(ch1, "already_exists_1", database_name)
-
-    ch1.stop_clickhouse()
-    ch1.start_clickhouse(retry_start=False, expected_to_fail=True)
-
-    # Check if we can cancel convertation
-    ch1.exec_in_container(
-        [
-            "bash",
-            "-c",
-            f"rm {table_data_path}convert_to_replicated",
-        ]
-    )
-    ch1.start_clickhouse()

From 7f46eae7b4961b3d58e2d592bc42ba5a32297f7c Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 22 May 2024 11:31:01 +0200
Subject: [PATCH 241/392] clang-tidy fix

---
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h   | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp  | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h    | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp    | 2 +-
 src/Storages/MergeTree/MergeTreeDataPartWriterWide.h      | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 328e3118ba9..2d86e0f0770 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -18,7 +18,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
-    const VirtualsDescriptionPtr virtual_columns_,
+    const VirtualsDescriptionPtr & virtual_columns_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
     const Statistics & stats_to_recalc,
     const String & marks_file_extension_,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
index f62f060fde2..ebf96c1ebb2 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@@ -19,7 +19,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
-        const VirtualsDescriptionPtr virtual_columns_,
+        const VirtualsDescriptionPtr & virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 30f01c1acd6..0a8920790e0 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -148,7 +148,7 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
-    const VirtualsDescriptionPtr virtual_columns_,
+    const VirtualsDescriptionPtr & virtual_columns_,
     const MergeTreeIndices & indices_to_recalc_,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index a60fcd43a58..0c31cabc8c4 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -109,7 +109,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot_,
-        const VirtualsDescriptionPtr virtual_columns_,
+        const VirtualsDescriptionPtr & virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 001f09b81b3..9df6cc5e2f7 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -84,7 +84,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
     const MergeTreeSettingsPtr & storage_settings_,
     const NamesAndTypesList & columns_list_,
     const StorageMetadataPtr & metadata_snapshot_,
-    const VirtualsDescriptionPtr virtual_columns_,
+    const VirtualsDescriptionPtr & virtual_columns_,
     const std::vector<MergeTreeIndexPtr> & indices_to_recalc_,
     const Statistics & stats_to_recalc_,
     const String & marks_file_extension_,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index 8dc488788c6..63205775c58 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -29,7 +29,7 @@ public:
         const MergeTreeSettingsPtr & storage_settings_,
         const NamesAndTypesList & columns_list,
         const StorageMetadataPtr & metadata_snapshot,
-        const VirtualsDescriptionPtr virtual_columns_,
+        const VirtualsDescriptionPtr & virtual_columns_,
         const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
         const Statistics & stats_to_recalc_,
         const String & marks_file_extension,

From 8dd52a26257a9dc11723e5a87507f6815f4fb818 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 21 May 2024 18:42:14 +0200
Subject: [PATCH 242/392] Ignore allow_suspicious_primary_key on ATTACH and
 verify on ALTER

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
Co-authored-by: Alexander Tokmakov <tavplubix@clickhouse.com>
---
 src/Storages/MergeTree/MergeTreeData.cpp      | 12 +++++++++++
 src/Storages/MergeTree/MergeTreeData.h        |  2 ++
 .../MergeTree/registerStorageMergeTree.cpp    | 20 ++++--------------
 src/Storages/StorageMergeTree.cpp             |  8 +++++--
 src/Storages/StorageReplicatedMergeTree.cpp   | 12 +++++++++--
 ...03020_order_by_SimpleAggregateFunction.sql | 21 ++++++++++++++++---
 6 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 527dac01b71..13d59d671ea 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -25,6 +25,7 @@
 #include <Storages/MergeTree/RangesInDataPart.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Core/QueryProcessingStage.h>
+#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
 #include <DataTypes/DataTypeEnum.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 #include <DataTypes/DataTypeTuple.h>
@@ -188,6 +189,7 @@ namespace ErrorCodes
     extern const int CANNOT_SCHEDULE_TASK;
     extern const int LIMIT_EXCEEDED;
     extern const int CANNOT_FORGET_PARTITION;
+    extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY;
 }
 
 static void checkSuspiciousIndices(const ASTFunction * index_function)
@@ -8538,6 +8540,16 @@ void MergeTreeData::unloadPrimaryKeys()
     }
 }
 
+void MergeTreeData::verifySortingKey(const KeyDescription & sorting_key)
+{
+    /// Aggregate functions already forbidden, but SimpleAggregateFunction are not
+    for (const auto & data_type : sorting_key.data_types)
+    {
+        if (dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(data_type->getCustomName()))
+            throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName());
+    }
+}
+
 bool updateAlterConversionsMutations(const MutationCommands & commands, std::atomic<ssize_t> & alter_conversions_mutations, bool remove)
 {
     for (const auto & command : commands)
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 2f9283659e3..062f967bb93 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -736,6 +736,8 @@ public:
         const ASTPtr & new_settings,
         AlterLockHolder & table_lock_holder);
 
+    static void verifySortingKey(const KeyDescription & sorting_key);
+
     /// Should be called if part data is suspected to be corrupted.
     /// Has the ability to check all other parts
     /// which reside on the same disk of the suspicious part.
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 4244ccccfe0..d234103e52b 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -14,7 +14,6 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTSetQuery.h>
 
-#include <DataTypes/DataTypeCustomSimpleAggregateFunction.h>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 
 #include <Interpreters/Context.h>
@@ -32,7 +31,6 @@ namespace ErrorCodes
     extern const int UNKNOWN_STORAGE;
     extern const int NO_REPLICA_NAME_GIVEN;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int DATA_TYPE_CANNOT_BE_USED_IN_KEY;
 }
 
 
@@ -113,16 +111,6 @@ static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_
     return ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns", &columns_stat));
 }
 
-static void verifySortingKey(const KeyDescription & sorting_key)
-{
-    /// Aggregate functions already forbidden, but SimpleAggregateFunction are not
-    for (const auto & data_type : sorting_key.data_types)
-    {
-        if (dynamic_cast<const DataTypeCustomSimpleAggregateFunction *>(data_type->getCustomName()))
-            throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_KEY, "Column with type {} is not allowed in key expression", data_type->getCustomName()->getName());
-    }
-}
-
 /// Returns whether a new syntax is used to define a table engine, i.e. MergeTree() PRIMARY KEY ... PARTITION BY ... SETTINGS ...
 /// instead of MergeTree(MergeTree(date, [sample_key], primary_key).
 static bool isExtendedStorageDef(const ASTCreateQuery & query)
@@ -678,8 +666,8 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         /// column if sorting key will be changed.
         metadata.sorting_key = KeyDescription::getSortingKeyFromAST(
             args.storage_def->order_by->ptr(), metadata.columns, context, merging_param_key_arg);
-        if (!local_settings.allow_suspicious_primary_key)
-            verifySortingKey(metadata.sorting_key);
+        if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE)
+            MergeTreeData::verifySortingKey(metadata.sorting_key);
 
         /// If primary key explicitly defined, than get it from AST
         if (args.storage_def->primary_key)
@@ -792,8 +780,8 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         /// column if sorting key will be changed.
         metadata.sorting_key
             = KeyDescription::getSortingKeyFromAST(engine_args[arg_num], metadata.columns, context, merging_param_key_arg);
-        if (!local_settings.allow_suspicious_primary_key)
-            verifySortingKey(metadata.sorting_key);
+        if (!local_settings.allow_suspicious_primary_key && args.mode <= LoadingStrictnessLevel::CREATE)
+            MergeTreeData::verifySortingKey(metadata.sorting_key);
 
         /// In old syntax primary_key always equals to sorting key.
         metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, context);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 9144ef7c0f7..ea698775298 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -333,17 +333,21 @@ void StorageMergeTree::alter(
 
     auto table_id = getStorageID();
     auto old_storage_settings = getSettings();
+    const auto & query_settings = local_context->getSettingsRef();
 
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
     StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
 
-    auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, local_context->getSettingsRef().materialize_ttl_after_modify, local_context);
+    auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, query_settings.materialize_ttl_after_modify, local_context);
     if (!maybe_mutation_commands.empty())
         delayMutationOrThrowIfNeeded(nullptr, local_context);
 
     Int64 mutation_version = -1;
     commands.apply(new_metadata, local_context);
 
+    if (!query_settings.allow_suspicious_primary_key)
+        MergeTreeData::verifySortingKey(new_metadata.sorting_key);
+
     /// This alter can be performed at new_metadata level only
     if (commands.isSettingsAlter())
     {
@@ -396,7 +400,7 @@ void StorageMergeTree::alter(
             resetObjectColumnsFromActiveParts(parts_lock);
         }
 
-        if (!maybe_mutation_commands.empty() && local_context->getSettingsRef().alter_sync > 0)
+        if (!maybe_mutation_commands.empty() && query_settings.alter_sync > 0)
             waitForMutation(mutation_version, false);
     }
 
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 378b81c6d18..e0a24ceac4d 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -6027,6 +6027,7 @@ void StorageReplicatedMergeTree::alter(
     assertNotReadonly();
 
     auto table_id = getStorageID();
+    const auto & query_settings = query_context->getSettingsRef();
 
     if (commands.isSettingsAlter())
     {
@@ -6054,6 +6055,13 @@ void StorageReplicatedMergeTree::alter(
         return;
     }
 
+    if (!query_settings.allow_suspicious_primary_key)
+    {
+        StorageInMemoryMetadata future_metadata = getInMemoryMetadata();
+        commands.apply(future_metadata, query_context);
+
+        MergeTreeData::verifySortingKey(future_metadata.sorting_key);
+    }
 
     auto ast_to_str = [](ASTPtr query) -> String
     {
@@ -6186,7 +6194,7 @@ void StorageReplicatedMergeTree::alter(
 
         auto maybe_mutation_commands = commands.getMutationCommands(
             *current_metadata,
-            query_context->getSettingsRef().materialize_ttl_after_modify,
+            query_settings.materialize_ttl_after_modify,
             query_context);
 
         bool have_mutation = !maybe_mutation_commands.empty();
@@ -6309,7 +6317,7 @@ void StorageReplicatedMergeTree::alter(
     {
         LOG_DEBUG(log, "Metadata changes applied. Will wait for data changes.");
         merge_selecting_task->schedule();
-        waitMutation(*mutation_znode, query_context->getSettingsRef().alter_sync);
+        waitMutation(*mutation_znode, query_settings.alter_sync);
         LOG_DEBUG(log, "Data changes applied.");
     }
 }
diff --git a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql
index f1727cb9e5c..fee42d1abc6 100644
--- a/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql
+++ b/tests/queries/0_stateless/03020_order_by_SimpleAggregateFunction.sql
@@ -1,6 +1,6 @@
 set allow_suspicious_primary_key = 0;
 
-DROP TABLE IF EXISTS data;
+drop table if exists data;
 
 create table data (key Int, value AggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY }
 create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY }
@@ -12,7 +12,22 @@ create table data (key Int, value AggregateFunction(sum, UInt64)) engine=Aggrega
 create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY }
 
 set allow_suspicious_primary_key = 1;
-
 create table data (key Int, value SimpleAggregateFunction(sum, UInt64)) engine=AggregatingMergeTree() primary key value order by (value, key);
 
-DROP TABLE data;
+-- ATTACH should work regardless allow_suspicious_primary_key
+set allow_suspicious_primary_key = 0;
+detach table data;
+attach table data;
+drop table data;
+
+-- ALTER AggregatingMergeTree
+create table data (key Int) engine=AggregatingMergeTree() order by (key);
+alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY }
+alter table data add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1;
+drop table data;
+
+-- ALTER ReplicatedAggregatingMergeTree
+create table data_rep (key Int) engine=ReplicatedAggregatingMergeTree('/tables/{database}', 'r1') order by (key);
+alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value); -- { serverError DATA_TYPE_CANNOT_BE_USED_IN_KEY }
+alter table data_rep add column value SimpleAggregateFunction(sum, UInt64), modify order by (key, value) settings allow_suspicious_primary_key=1;
+drop table data_rep;

From d5d8d689748fbc125c37381fd9680c32468e07d0 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Wed, 22 May 2024 13:06:56 +0200
Subject: [PATCH 243/392] Remove unused storage_snapshot field

---
 src/Processors/QueryPlan/ReadFromMergeTree.cpp      | 6 +++---
 src/Storages/MergeTree/MergeTreeSelectProcessor.cpp | 2 --
 src/Storages/MergeTree/MergeTreeSelectProcessor.h   | 2 --
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 6f0fa55c349..503031eb04b 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -381,7 +381,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
         auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
 
         auto processor = std::make_unique<MergeTreeSelectProcessor>(
-            pool, std::move(algorithm), storage_snapshot, prewhere_info,
+            pool, std::move(algorithm), prewhere_info,
             actions_settings, block_size_copy, reader_settings);
 
         auto source = std::make_shared<MergeTreeSource>(std::move(processor));
@@ -480,7 +480,7 @@ Pipe ReadFromMergeTree::readFromPool(
         auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
 
         auto processor = std::make_unique<MergeTreeSelectProcessor>(
-            pool, std::move(algorithm), storage_snapshot, prewhere_info,
+            pool, std::move(algorithm), prewhere_info,
             actions_settings, block_size_copy, reader_settings);
 
         auto source = std::make_shared<MergeTreeSource>(std::move(processor));
@@ -592,7 +592,7 @@ Pipe ReadFromMergeTree::readInOrder(
             algorithm = std::make_unique<MergeTreeInOrderSelectAlgorithm>(i);
 
         auto processor = std::make_unique<MergeTreeSelectProcessor>(
-            pool, std::move(algorithm), storage_snapshot, prewhere_info,
+            pool, std::move(algorithm), prewhere_info,
             actions_settings, block_size, reader_settings);
 
         processor->addPartLevelToChunk(isQueryWithFinal());
diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
index fce733d47b7..78b67de1a7e 100644
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@@ -26,14 +26,12 @@ namespace ErrorCodes
 MergeTreeSelectProcessor::MergeTreeSelectProcessor(
     MergeTreeReadPoolPtr pool_,
     MergeTreeSelectAlgorithmPtr algorithm_,
-    const StorageSnapshotPtr & storage_snapshot_,
     const PrewhereInfoPtr & prewhere_info_,
     const ExpressionActionsSettings & actions_settings_,
     const MergeTreeReadTask::BlockSizeParams & block_size_params_,
     const MergeTreeReaderSettings & reader_settings_)
     : pool(std::move(pool_))
     , algorithm(std::move(algorithm_))
-    , storage_snapshot(storage_snapshot_)
     , prewhere_info(prewhere_info_)
     , actions_settings(actions_settings_)
     , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps))
diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h
index 6b663e0fd36..8f41f5deacb 100644
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h
@@ -41,7 +41,6 @@ public:
     MergeTreeSelectProcessor(
         MergeTreeReadPoolPtr pool_,
         MergeTreeSelectAlgorithmPtr algorithm_,
-        const StorageSnapshotPtr & storage_snapshot_,
         const PrewhereInfoPtr & prewhere_info_,
         const ExpressionActionsSettings & actions_settings_,
         const MergeTreeReadTask::BlockSizeParams & block_size_params_,
@@ -71,7 +70,6 @@ private:
 
     const MergeTreeReadPoolPtr pool;
     const MergeTreeSelectAlgorithmPtr algorithm;
-    const StorageSnapshotPtr storage_snapshot;
 
     const PrewhereInfoPtr prewhere_info;
     const ExpressionActionsSettings actions_settings;

From 5f01b14e0dc2f9a96d1c06cd2f9fb0112209ab59 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Wed, 22 May 2024 12:00:29 +0200
Subject: [PATCH 244/392] add prints

---
 tests/ci/ci.py          | 4 ++--
 tests/ci/ci_metadata.py | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 046550c62f8..40f5617f165 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1912,9 +1912,9 @@ def _get_ext_check_name(check_name: str) -> str:
 
 
 def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None:
-    run_id = CiMetadata(s3, pr_number).run_id
+    run_id = CiMetadata(s3, pr_number).fetch_meta().run_id
     if not run_id:
-        print("ERROR: FIX IT: Run id has not been found!")
+        print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!")
     else:
         print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]")
         GitHub.cancel_wf(run_id)
diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py
index 5856e9a8501..82d44cf1adc 100644
--- a/tests/ci/ci_metadata.py
+++ b/tests/ci/ci_metadata.py
@@ -6,6 +6,7 @@ from env_helper import (
     TEMP_PATH,
 )
 from s3_helper import S3Helper
+from ci_utils import GHActions
 
 
 # pylint: disable=too-many-lines
@@ -83,7 +84,10 @@ class CiMetadata:
         Uploads meta on s3
         """
         assert self.run_id
-        print("Storing workflow meta on s3")
+        GHActions.print_in_group(
+            f"Storing workflow metadata: PR [{self.pr_number}]",
+            [f"run_id: {self.run_id}"],
+        )
 
         local_file = self._LOCAL_PATH / self._FILENAME_RUN_ID
         with open(local_file, "w", encoding="utf-8") as file:

From 5c47b091144e24ee1fbd6627186e7965c9ad233e Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 22 May 2024 13:18:51 +0200
Subject: [PATCH 245/392] Ignore text_log for Keeper

---
 programs/keeper/Keeper.cpp      | 5 +++++
 programs/keeper/Keeper.h        | 2 ++
 src/Loggers/Loggers.cpp         | 2 +-
 src/Loggers/Loggers.h           | 4 ++++
 src/Loggers/OwnSplitChannel.cpp | 9 +++++----
 src/Loggers/OwnSplitChannel.h   | 1 -
 6 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 267b725b02b..dba5c2b7d2a 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -182,6 +182,11 @@ std::string Keeper::getDefaultConfigFileName() const
     return "keeper_config.xml";
 }
 
+bool Keeper::allowTextLog() const
+{
+    return false;
+}
+
 void Keeper::handleCustomArguments(const std::string & arg, [[maybe_unused]] const std::string & value) // NOLINT
 {
     if (arg == "force-recovery")
diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h
index f889ffa595b..c449c40b610 100644
--- a/programs/keeper/Keeper.h
+++ b/programs/keeper/Keeper.h
@@ -65,6 +65,8 @@ protected:
 
     std::string getDefaultConfigFileName() const override;
 
+    bool allowTextLog() const override;
+
 private:
     Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const;
 
diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp
index 4b17469f4d7..0bd4b94d999 100644
--- a/src/Loggers/Loggers.cpp
+++ b/src/Loggers/Loggers.cpp
@@ -263,7 +263,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
         }
     }
 #ifndef WITHOUT_TEXT_LOG
-    if (config.has("text_log"))
+    if (allowTextLog() && config.has("text_log"))
     {
         String text_log_level_str = config.getString("text_log.level", "trace");
         int text_log_level = Poco::Logger::parseLevel(text_log_level_str);
diff --git a/src/Loggers/Loggers.h b/src/Loggers/Loggers.h
index 9eff731a4c5..9923d66ebcb 100644
--- a/src/Loggers/Loggers.h
+++ b/src/Loggers/Loggers.h
@@ -23,6 +23,10 @@ public:
     /// Close log files. On next log write files will be reopened.
     void closeLogs(Poco::Logger & logger);
 
+    virtual ~Loggers() = default;
+
+protected:
+    virtual bool allowTextLog() const { return true; }
 
 private:
     Poco::AutoPtr<Poco::FileChannel> log_file;
diff --git a/src/Loggers/OwnSplitChannel.cpp b/src/Loggers/OwnSplitChannel.cpp
index fee33781c27..dc51a13e01f 100644
--- a/src/Loggers/OwnSplitChannel.cpp
+++ b/src/Loggers/OwnSplitChannel.cpp
@@ -107,6 +107,10 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
         [[maybe_unused]] bool push_result = logs_queue->emplace(std::move(columns));
     }
 
+    auto text_log_locked = text_log.lock();
+    if (!text_log_locked)
+        return;
+
     /// Also log to system.text_log table, if message is not too noisy
     auto text_log_max_priority_loaded = text_log_max_priority.load(std::memory_order_relaxed);
     if (text_log_max_priority_loaded && msg.getPriority() <= text_log_max_priority_loaded)
@@ -146,10 +150,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg)
 
 #undef SET_VALUE_IF_EXISTS
 
-        std::shared_ptr<SystemLogQueue<TextLogElement>> text_log_locked{};
-        text_log_locked = text_log.lock();
-        if (text_log_locked)
-            text_log_locked->push(std::move(elem));
+        text_log_locked->push(std::move(elem));
     }
 #endif
 }
diff --git a/src/Loggers/OwnSplitChannel.h b/src/Loggers/OwnSplitChannel.h
index b75554eefc4..7ca27cf6584 100644
--- a/src/Loggers/OwnSplitChannel.h
+++ b/src/Loggers/OwnSplitChannel.h
@@ -1,7 +1,6 @@
 #pragma once
 
 #include <atomic>
-#include <vector>
 #include <map>
 #include <Poco/AutoPtr.h>
 #include <Poco/Channel.h>

From 03fc077be7d8576c4e3e550842f2fd7c6d06a78f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 22 May 2024 14:12:37 +0200
Subject: [PATCH 246/392] Fxi

---
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp           | 2 +-
 src/Storages/ObjectStorage/ReadBufferIterator.cpp         | 6 +++---
 src/Storages/ObjectStorage/StorageObjectStorage.cpp       | 1 -
 src/Storages/ObjectStorage/StorageObjectStorageSource.cpp | 4 ++--
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index d18468411ea..c07313b52db 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -575,7 +575,7 @@ void S3ObjectStorage::applyNewSettings(
     ContextPtr context,
     const ApplyNewSettingsOptions & options)
 {
-    auto new_s3_settings = getSettings(config, config_prefix, context);
+    auto new_s3_settings = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
     if (!static_headers.empty())
     {
         new_s3_settings->auth_settings.headers.insert(
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index e065de16e55..5a8a4735fe1 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -145,7 +145,7 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
     auto context = getContext();
 
     const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
-    auto impl = object_storage->readObject(StoredObject(), context->getReadSettings());
+    auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings());
 
     const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
     const auto zstd_window_log_max = static_cast<int>(context->getSettingsRef().zstd_window_log_max);
@@ -258,10 +258,10 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         std::unique_ptr<ReadBuffer> read_buf;
         CompressionMethod compression_method;
         using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive;
-        if (auto object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
+        if (const auto * object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
         {
             compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
-            auto & archive_reader = object_info_in_archive->archive_reader;
+            const auto & archive_reader = object_info_in_archive->archive_reader;
             read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
         }
         else
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 5de7f41b4f7..2c8e60b49d0 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -17,7 +17,6 @@
 #include <Storages/VirtualColumnUtils.h>
 #include <Storages/ObjectStorage/Utils.h>
 #include <Storages/NamedCollectionsHelpers.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSink.h>
 #include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 #include <Storages/ObjectStorage/ReadBufferIterator.h>
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 8d946f515a3..a2b3ca5b69e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -279,10 +279,10 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
     else
     {
         CompressionMethod compression_method;
-        if (auto object_info_in_archive = dynamic_cast<const ArchiveIterator::ObjectInfoInArchive *>(object_info.get()))
+        if (const auto * object_info_in_archive = dynamic_cast<const ArchiveIterator::ObjectInfoInArchive *>(object_info.get()))
         {
             compression_method = chooseCompressionMethod(configuration->getPathInArchive(), configuration->compression_method);
-            auto & archive_reader = object_info_in_archive->archive_reader;
+            const auto & archive_reader = object_info_in_archive->archive_reader;
             read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
         }
         else

From 6942ae0c1e6204d8ee91b8e69e88be85ec289620 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Mon, 13 May 2024 12:00:52 +0000
Subject: [PATCH 247/392] Fix SimpleSquashingChunksTransform
 (02115_rewrite_local_join_right_distribute_table)

---
 src/Processors/IInflatingTransform.cpp        | 22 +++++---
 src/Processors/IInflatingTransform.h          |  8 ++-
 .../Transforms/ArrayJoinTransform.cpp         |  4 +-
 .../Transforms/ArrayJoinTransform.h           |  2 +-
 .../Transforms/SquashingChunksTransform.cpp   | 52 +++++++------------
 .../Transforms/SquashingChunksTransform.h     | 12 ++---
 ...rite_local_join_right_distribute_table.sql |  4 --
 7 files changed, 48 insertions(+), 56 deletions(-)

diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp
index ffa5b55dc76..bc0b3e8459e 100644
--- a/src/Processors/IInflatingTransform.cpp
+++ b/src/Processors/IInflatingTransform.cpp
@@ -45,8 +45,13 @@ IInflatingTransform::Status IInflatingTransform::prepare()
     {
         if (input.isFinished())
         {
-            output.finish();
-            return Status::Finished;
+            if (is_finished)
+            {
+                output.finish();
+                return Status::Finished;
+            }
+            is_finished = true;
+            return Status::Ready;
         }
 
         input.setNeeded();
@@ -71,16 +76,17 @@ void IInflatingTransform::work()
 
         current_chunk = generate();
         generated = true;
-        can_generate = canGenerate();
+        can_generate = canGenerate(is_finished);
     }
     else
     {
-        if (!has_input)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read");
+        if (has_input)
+        {
+            consume(std::move(current_chunk));
+            has_input = false;
+        }
 
-        consume(std::move(current_chunk));
-        has_input = false;
-        can_generate = canGenerate();
+        can_generate = canGenerate(is_finished);
     }
 }
 
diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h
index 0ad12f6cd65..3f832b0e5bc 100644
--- a/src/Processors/IInflatingTransform.h
+++ b/src/Processors/IInflatingTransform.h
@@ -10,13 +10,14 @@ namespace DB
 ///    for (chunk : input_chunks)
 ///    {
 ///        transform.consume(chunk);
-///
 ///        while (transform.canGenerate())
 ///        {
 ///            transformed_chunk = transform.generate();
 ///            ... (process transformed chunk)
 ///        }
 ///    }
+///    while (transform.canGenerate(true))
+///        ... (process remaining data)
 ///
 class IInflatingTransform : public IProcessor
 {
@@ -30,7 +31,7 @@ protected:
     bool can_generate = false;
 
     virtual void consume(Chunk chunk) = 0;
-    virtual bool canGenerate() = 0;
+    virtual bool canGenerate(bool is_read_finished) = 0;
     virtual Chunk generate() = 0;
 
 public:
@@ -41,6 +42,9 @@ public:
 
     InputPort & getInputPort() { return input; }
     OutputPort & getOutputPort() { return output; }
+
+    /// canGenerate can flush data when input is finished.
+    bool is_finished = false;
 };
 
 }
diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp
index 1304434d74e..b7a6ba85963 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.cpp
+++ b/src/Processors/Transforms/ArrayJoinTransform.cpp
@@ -38,14 +38,14 @@ void ArrayJoinTransform::consume(Chunk chunk)
 }
 
 
-bool ArrayJoinTransform::canGenerate()
+bool ArrayJoinTransform::canGenerate(bool)
 {
     return result_iterator && result_iterator->hasNext();
 }
 
 Chunk ArrayJoinTransform::generate()
 {
-    if (!canGenerate())
+    if (!canGenerate(false))
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in ArrayJoinTransform");
 
     auto block = result_iterator->next();
diff --git a/src/Processors/Transforms/ArrayJoinTransform.h b/src/Processors/Transforms/ArrayJoinTransform.h
index 4219135982d..de291a0422f 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.h
+++ b/src/Processors/Transforms/ArrayJoinTransform.h
@@ -26,7 +26,7 @@ public:
 
 protected:
     void consume(Chunk chunk) override;
-    bool canGenerate() override;
+    bool canGenerate(bool is_read_finished) override;
     Chunk generate() override;
 
 private:
diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index 0d69b6e0a8d..b79987161fd 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -56,49 +56,35 @@ void SquashingChunksTransform::work()
 
 SimpleSquashingChunksTransform::SimpleSquashingChunksTransform(
     const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes)
-    : ISimpleTransform(header, header, true), squashing(min_block_size_rows, min_block_size_bytes)
+    : IInflatingTransform(header, header), squashing(min_block_size_rows, min_block_size_bytes)
 {
 }
 
-void SimpleSquashingChunksTransform::transform(Chunk & chunk)
+void SimpleSquashingChunksTransform::consume(Chunk chunk)
 {
-    if (!finished)
-    {
-        if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())))
-            chunk.setColumns(block.getColumns(), block.rows());
-    }
-    else
-    {
-        if (chunk.hasRows())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost");
-
-        auto block = squashing.add({});
-        chunk.setColumns(block.getColumns(), block.rows());
-    }
+    current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()));
 }
 
-IProcessor::Status SimpleSquashingChunksTransform::prepare()
+Chunk SimpleSquashingChunksTransform::generate()
 {
-    if (!finished && input.isFinished())
-    {
-        if (output.isFinished())
-            return Status::Finished;
+    if (!current_block)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
 
-        if (!output.canPush())
-            return Status::PortFull;
+    Chunk result(current_block.getColumns(), current_block.rows());
+    current_block.clear();
+    return result;
+}
 
-        if (has_output)
-        {
-            output.pushData(std::move(output_data));
-            has_output = false;
-            return Status::PortFull;
-        }
 
-        finished = true;
-        /// On the next call to transform() we will return all data buffered in `squashing` (if any)
-        return Status::Ready;
-    }
-    return ISimpleTransform::prepare();
+bool SimpleSquashingChunksTransform::canGenerate(bool is_read_finished)
+{
+    if (current_block)
+        return true;
+
+    if (is_read_finished)
+        current_block = squashing.add({});
+
+    return bool(current_block);
 }
 
 }
diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h
index f82e9e46a61..d0316c39a43 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.h
+++ b/src/Processors/Transforms/SquashingChunksTransform.h
@@ -2,6 +2,7 @@
 
 #include <Interpreters/SquashingTransform.h>
 #include <Processors/ISimpleTransform.h>
+#include <Processors/IInflatingTransform.h>
 #include <Processors/Sinks/SinkToStorage.h>
 
 namespace DB
@@ -29,7 +30,7 @@ private:
 };
 
 /// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port.
-class SimpleSquashingChunksTransform : public ISimpleTransform
+class SimpleSquashingChunksTransform : public IInflatingTransform
 {
 public:
     explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes);
@@ -37,14 +38,13 @@ public:
     String getName() const override { return "SimpleSquashingTransform"; }
 
 protected:
-    void transform(Chunk &) override;
-
-    IProcessor::Status prepare() override;
+    void consume(Chunk chunk) override;
+    bool canGenerate(bool is_read_finished) override;
+    Chunk generate() override;
 
 private:
     SquashingTransform squashing;
 
-    /// When consumption is finished we need to release the final chunk regardless of its size.
-    bool finished = false;
+    Block current_block;
 };
 }
diff --git a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql
index 2ab324df787..d5ab82ba064 100644
--- a/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql
+++ b/tests/queries/0_stateless/02115_rewrite_local_join_right_distribute_table.sql
@@ -23,10 +23,6 @@ select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a ORDER BY t1.a;
 
 SELECT '-';
 
--- make sure data is fully written when reading from distributed
-optimize table t1_local final;
-optimize table t2_local final;
-
 set distributed_product_mode = 'global';
 select * from t1_all t1 where t1.a in (select t2.a from t2_all t2);
 explain syntax select t1.* from t1_all t1 join t2_all t2 on t1.a = t2.a;

From 3f4f253c39b7118aab95b20af900d79cf1065cad Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Mon, 20 May 2024 08:09:55 +0000
Subject: [PATCH 248/392] Enable keep_free_space_bytes for metadata storage

---
 .../ObjectStorages/MetadataStorageFactory.cpp |  4 ++-
 ...02963_test_flexible_disk_configuration.sql | 26 +++++++++++++++++--
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
index 4a3e8a37d28..ab7c2069b43 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
@@ -99,8 +99,10 @@ void registerMetadataStorageFromDisk(MetadataStorageFactory & factory)
     {
         auto metadata_path = config.getString(config_prefix + ".metadata_path",
                                               fs::path(Context::getGlobalContextInstance()->getPath()) / "disks" / name / "");
+        auto metadata_keep_free_space_bytes = config.getUInt64(config_prefix + ".metadata_keep_free_space_bytes", 0);
+
         fs::create_directories(metadata_path);
-        auto metadata_disk = std::make_shared<DiskLocal>(name + "-metadata", metadata_path, 0, config, config_prefix);
+        auto metadata_disk = std::make_shared<DiskLocal>(name + "-metadata", metadata_path, metadata_keep_free_space_bytes, config, config_prefix);
         auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix);
         return std::make_shared<MetadataStorageFromDisk>(metadata_disk, key_compatibility_prefix);
     });
diff --git a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql
index 552291b2f83..8f67cd7e030 100644
--- a/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql
+++ b/tests/queries/0_stateless/02963_test_flexible_disk_configuration.sql
@@ -30,6 +30,28 @@ settings disk=disk(name='test2',
 drop table test;
 create table test (a Int32) engine = MergeTree() order by tuple()
 settings disk=disk(name='test3',
+                   type = object_storage,
+                   object_storage_type = s3,
+                   metadata_storage_type = local,
+                   metadata_keep_free_space_bytes = 1024,
+                   endpoint = 'http://localhost:11111/test/common/',
+                   access_key_id = clickhouse,
+                   secret_access_key = clickhouse);
+drop table test;
+
+create table test (a Int32) engine = MergeTree() order by tuple()
+settings disk=disk(name='test4',
+                   type = object_storage,
+                   object_storage_type = s3,
+                   metadata_storage_type = local,
+                   metadata_keep_free_space_bytes = 0,
+                   endpoint = 'http://localhost:11111/test/common/',
+                   access_key_id = clickhouse,
+                   secret_access_key = clickhouse);
+drop table test;
+
+create table test (a Int32) engine = MergeTree() order by tuple()
+settings disk=disk(name='test5',
                    type = object_storage,
                    object_storage_type = s3,
                    metadata_type = lll,
@@ -38,7 +60,7 @@ settings disk=disk(name='test3',
                    secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG }
 
 create table test (a Int32) engine = MergeTree() order by tuple()
-settings disk=disk(name='test4',
+settings disk=disk(name='test6',
                    type = object_storage,
                    object_storage_type = kkk,
                    metadata_type = local,
@@ -47,7 +69,7 @@ settings disk=disk(name='test4',
                    secret_access_key = clickhouse); -- { serverError UNKNOWN_ELEMENT_IN_CONFIG }
 
 create table test (a Int32) engine = MergeTree() order by tuple()
-settings disk=disk(name='test5',
+settings disk=disk(name='test7',
                    type = kkk,
                    object_storage_type = s3,
                    metadata_type = local,

From e055de32bedb80dff96bd0f8809e967dafe1c0cb Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Mon, 20 May 2024 08:11:48 +0000
Subject: [PATCH 249/392] Add docs

---
 docs/en/operations/storing-data.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 9b316960750..53ecd66396d 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -421,6 +421,7 @@ Other parameters:
 * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
 * `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
 * `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
+* `metadata_keep_free_space_bytes` - the amount of free metadata disk space to be reserved.
 
 Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
 

From 6e605030d14d1ddba62d97d42a47067d08a78d8b Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Tue, 21 May 2024 11:55:39 +0000
Subject: [PATCH 250/392] Trigger Ci


From b899bd07cfdee3a2919583482c0da2354bbb348a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 22 May 2024 16:12:33 +0200
Subject: [PATCH 251/392] Better

---
 utils/keeper-bench/Runner.cpp | 90 +++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 36 deletions(-)

diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp
index a625a7f157d..ed7e09685f0 100644
--- a/utils/keeper-bench/Runner.cpp
+++ b/utils/keeper-bench/Runner.cpp
@@ -4,30 +4,28 @@
 
 #include <Coordination/CoordinationSettings.h>
 #include <Coordination/KeeperContext.h>
+#include <Coordination/KeeperSnapshotManager.h>
 #include <Coordination/KeeperStorage.h>
-#include "Common/ConcurrentBoundedQueue.h"
-#include "Common/Exception.h"
-#include "Common/ZooKeeper/IKeeper.h"
-#include "Common/ZooKeeper/ZooKeeperArgs.h"
-#include "Common/ZooKeeper/ZooKeeperCommon.h"
-#include "Common/ZooKeeper/ZooKeeperConstants.h"
-#include <Common/EventNotifier.h>
-#include <Common/Config/ConfigProcessor.h>
-#include "Coordination/KeeperSnapshotManager.h"
-#include "Core/ColumnWithTypeAndName.h"
-#include "Core/ColumnsWithTypeAndName.h"
+#include <Core/ColumnWithTypeAndName.h>
+#include <Core/ColumnsWithTypeAndName.h>
 #include <Disks/DiskLocal.h>
-#include "IO/ReadBuffer.h"
-#include "IO/ReadBufferFromFile.h"
-#include "base/Decimal.h"
-#include "base/types.h"
-#include <Processors/Formats/IInputFormat.h>
+#include <Formats/ReadSchemaUtils.h>
+#include <Formats/registerFormats.h>
+#include <IO/ReadBuffer.h>
+#include <IO/ReadBufferFromFile.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/copyData.h>
-#include <Formats/ReadSchemaUtils.h>
-#include <Formats/registerFormats.h>
 #include <Interpreters/Context.h>
+#include <Processors/Formats/IInputFormat.h>
+#include <Common/ConcurrentBoundedQueue.h>
+#include <Common/Config/ConfigProcessor.h>
+#include <Common/EventNotifier.h>
+#include <Common/Exception.h>
+#include <Common/ZooKeeper/IKeeper.h>
+#include <Common/ZooKeeper/ZooKeeperArgs.h>
+#include <Common/ZooKeeper/ZooKeeperCommon.h>
+#include <Common/ZooKeeper/ZooKeeperConstants.h>
 
 
 namespace CurrentMetrics
@@ -884,6 +882,7 @@ struct SetupNodeCollector
         if (initial_storage->container.contains(path))
             return;
 
+        new_nodes = true;
         std::cerr << "Adding expected node " << path << std::endl;
 
         Coordination::Requests create_ops;
@@ -923,11 +922,19 @@ struct SetupNodeCollector
 
     void generateSnapshot()
     {
-        std::cerr << "Generating snapshot with starting data" << std::endl;
         std::lock_guard lock(nodes_mutex);
+        if (!new_nodes)
+        {
+            std::cerr << "No new nodes added" << std::endl;
+            return;
+        }
+
+        std::cerr << "Generating snapshot with starting data" << std::endl;
         DB::SnapshotMetadataPtr snapshot_meta = std::make_shared<DB::SnapshotMetadata>(initial_storage->getZXID(), 1, std::make_shared<nuraft::cluster_config>());
         DB::KeeperStorageSnapshot snapshot(initial_storage.get(), snapshot_meta);
         snapshot_manager->serializeSnapshotToDisk(snapshot);
+
+        new_nodes = false;
     }
 
     std::mutex nodes_mutex;
@@ -935,6 +942,7 @@ struct SetupNodeCollector
     Coordination::KeeperStoragePtr initial_storage;
     std::unordered_set<std::string> nodes_created_during_replay;
     std::optional<Coordination::KeeperSnapshotManager> snapshot_manager;
+    bool new_nodes = false;
 };
 
 void dumpStats(std::string_view type, const RequestFromLogStats::Stats & stats_for_type)
@@ -972,23 +980,25 @@ void requestFromLogExecutor(std::shared_ptr<ConcurrentBoundedQueue<RequestFromLo
                 if (*expected_result != response.error)
                     stats.unexpected_results.fetch_add(1, std::memory_order_relaxed);
 
-                //if (*expected_result != response.error)
-                //{
-                //    std::cerr << fmt::format(
-                //        "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result)
-                //              << std::endl;
+#if 0
+                if (*expected_result != response.error)
+                {
+                    std::cerr << fmt::format(
+                        "Unexpected result for {}\ngot {}, expected {}\n", request->toString(), response.error, *expected_result)
+                              << std::endl;
 
-                //    if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
-                //    {
-                //        std::string subresponses;
-                //        for (size_t i = 0; i < multi_response->responses.size(); ++i)
-                //        {
-                //            subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error);
-                //        }
+                    if (const auto * multi_response = dynamic_cast<const Coordination::ZooKeeperMultiResponse *>(&response))
+                    {
+                        std::string subresponses;
+                        for (size_t i = 0; i < multi_response->responses.size(); ++i)
+                        {
+                            subresponses += fmt::format("{} = {}\n", i, multi_response->responses[i]->error);
+                        }
 
-                //        std::cerr << "Subresponses\n" << subresponses << std::endl;
-                //    }
-                //}
+                        std::cerr << "Subresponses\n" << subresponses << std::endl;
+                    }
+                }
+#endif
             }
 
             request_promise->set_value();
@@ -1048,8 +1058,16 @@ void Runner::runBenchmarkFromLog()
 
         pool->wait();
 
-        dumpStats("Write", stats.write_requests);
-        dumpStats("Read", stats.read_requests);
+
+        if (setup_nodes_collector)
+        {
+            setup_nodes_collector->generateSnapshot();
+        }
+        else
+        {
+            dumpStats("Write", stats.write_requests);
+            dumpStats("Read", stats.read_requests);
+        }
     });
 
     auto push_request = [&](RequestFromLog request)

From e05305692eaf0a5a6cab6d72196b9575ccf56fa6 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Wed, 22 May 2024 16:33:01 +0200
Subject: [PATCH 252/392] Fix encrypted

---
 src/Disks/DiskEncrypted.h | 5 +++++
 src/Disks/IDisk.h         | 9 +++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 27000dcc8af..27cf3096344 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -350,6 +350,11 @@ public:
         return delegate;
     }
 
+    ObjectStoragePtr getObjectStorage() override
+    {
+        return delegate->getObjectStorage();
+    }
+
 private:
     String wrappedPath(const String & path) const
     {
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 614fe413503..b59e5b7f558 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -116,13 +116,18 @@ public:
     /// Default constructor.
     IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
         : name(name_)
-        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, config.getUInt(config_prefix + ".thread_pool_size", 16))
+        , copying_thread_pool(
+              CurrentMetrics::IDiskCopierThreads,
+              CurrentMetrics::IDiskCopierThreadsActive,
+              CurrentMetrics::IDiskCopierThreadsScheduled,
+              config.getUInt(config_prefix + ".thread_pool_size", 16))
     {
     }
 
     explicit IDisk(const String & name_)
         : name(name_)
-        , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16)
+        , copying_thread_pool(
+              CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, CurrentMetrics::IDiskCopierThreadsScheduled, 16)
     {
     }
 

From 39eef359dbc142c53d9f0162a36f0fee74e5edcc Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 22 May 2024 14:39:13 +0000
Subject: [PATCH 253/392] Add IInflatingTransform::getRemaining instead of flag
 in canGenerate

---
 src/Processors/IInflatingTransform.cpp        | 21 ++++++++++------
 src/Processors/IInflatingTransform.h          |  7 +++---
 .../Transforms/ArrayJoinTransform.cpp         |  4 +--
 .../Transforms/ArrayJoinTransform.h           |  2 +-
 .../Transforms/SquashingChunksTransform.cpp   | 25 +++++++++----------
 .../Transforms/SquashingChunksTransform.h     |  7 +++---
 6 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp
index bc0b3e8459e..a59eda0feb2 100644
--- a/src/Processors/IInflatingTransform.cpp
+++ b/src/Processors/IInflatingTransform.cpp
@@ -76,17 +76,24 @@ void IInflatingTransform::work()
 
         current_chunk = generate();
         generated = true;
-        can_generate = canGenerate(is_finished);
+        can_generate = canGenerate();
+    }
+    else if (is_finished)
+    {
+        if (can_generate || generated || has_input)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot finish work because it has generated data or has input data");
+
+        current_chunk = getRemaining();
+        generated = !current_chunk.empty();
     }
     else
     {
-        if (has_input)
-        {
-            consume(std::move(current_chunk));
-            has_input = false;
-        }
+        if (!has_input)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read");
 
-        can_generate = canGenerate(is_finished);
+        consume(std::move(current_chunk));
+        has_input = false;
+        can_generate = canGenerate();
     }
 }
 
diff --git a/src/Processors/IInflatingTransform.h b/src/Processors/IInflatingTransform.h
index 3f832b0e5bc..0cb7fc06cc4 100644
--- a/src/Processors/IInflatingTransform.h
+++ b/src/Processors/IInflatingTransform.h
@@ -16,8 +16,8 @@ namespace DB
 ///            ... (process transformed chunk)
 ///        }
 ///    }
-///    while (transform.canGenerate(true))
-///        ... (process remaining data)
+///    transformed_chunk = transform.getRemaining();
+///    ... (process remaining data)
 ///
 class IInflatingTransform : public IProcessor
 {
@@ -31,8 +31,9 @@ protected:
     bool can_generate = false;
 
     virtual void consume(Chunk chunk) = 0;
-    virtual bool canGenerate(bool is_read_finished) = 0;
+    virtual bool canGenerate() = 0;
     virtual Chunk generate() = 0;
+    virtual Chunk getRemaining() { return {}; }
 
 public:
     IInflatingTransform(Block input_header, Block output_header);
diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp
index b7a6ba85963..1304434d74e 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.cpp
+++ b/src/Processors/Transforms/ArrayJoinTransform.cpp
@@ -38,14 +38,14 @@ void ArrayJoinTransform::consume(Chunk chunk)
 }
 
 
-bool ArrayJoinTransform::canGenerate(bool)
+bool ArrayJoinTransform::canGenerate()
 {
     return result_iterator && result_iterator->hasNext();
 }
 
 Chunk ArrayJoinTransform::generate()
 {
-    if (!canGenerate(false))
+    if (!canGenerate())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in ArrayJoinTransform");
 
     auto block = result_iterator->next();
diff --git a/src/Processors/Transforms/ArrayJoinTransform.h b/src/Processors/Transforms/ArrayJoinTransform.h
index de291a0422f..4219135982d 100644
--- a/src/Processors/Transforms/ArrayJoinTransform.h
+++ b/src/Processors/Transforms/ArrayJoinTransform.h
@@ -26,7 +26,7 @@ public:
 
 protected:
     void consume(Chunk chunk) override;
-    bool canGenerate(bool is_read_finished) override;
+    bool canGenerate() override;
     Chunk generate() override;
 
 private:
diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index b79987161fd..267490dc89e 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -62,29 +62,28 @@ SimpleSquashingChunksTransform::SimpleSquashingChunksTransform(
 
 void SimpleSquashingChunksTransform::consume(Chunk chunk)
 {
-    current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()));
+    Block current_block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()));
+    squashed_chunk.setColumns(current_block.getColumns(), current_block.rows());
 }
 
 Chunk SimpleSquashingChunksTransform::generate()
 {
-    if (!current_block)
+    if (squashed_chunk.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
 
-    Chunk result(current_block.getColumns(), current_block.rows());
-    current_block.clear();
-    return result;
+    return std::move(squashed_chunk);
 }
 
-
-bool SimpleSquashingChunksTransform::canGenerate(bool is_read_finished)
+bool SimpleSquashingChunksTransform::canGenerate()
 {
-    if (current_block)
-        return true;
+    return !squashed_chunk.empty();
+}
 
-    if (is_read_finished)
-        current_block = squashing.add({});
-
-    return bool(current_block);
+Chunk SimpleSquashingChunksTransform::getRemaining()
+{
+    Block current_block = squashing.add({});
+    squashed_chunk.setColumns(current_block.getColumns(), current_block.rows());
+    return std::move(squashed_chunk);
 }
 
 }
diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h
index d0316c39a43..8c30a6032e4 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.h
+++ b/src/Processors/Transforms/SquashingChunksTransform.h
@@ -39,12 +39,13 @@ public:
 
 protected:
     void consume(Chunk chunk) override;
-    bool canGenerate(bool is_read_finished) override;
+    bool canGenerate() override;
     Chunk generate() override;
+    Chunk getRemaining() override;
 
 private:
     SquashingTransform squashing;
-
-    Block current_block;
+    Chunk squashed_chunk;
 };
+
 }

From 7e0e953ec9913435505d75285d1e5244c869a797 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 21 May 2024 17:01:16 +0000
Subject: [PATCH 254/392] Add debug logging to EmbeddedRocksDBBulkSink

---
 src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 13 ++++++++-----
 src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h   |  2 +-
 src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp  | 16 ++++++++--------
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp
index 7094578a9cc..0baa234e7a3 100644
--- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp
+++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp
@@ -155,7 +155,7 @@ std::vector<Chunk> EmbeddedRocksDBBulkSink::squash(Chunk chunk)
     return {};
 }
 
-std::pair<ColumnString::Ptr, ColumnString::Ptr> EmbeddedRocksDBBulkSink::serializeChunks(const std::vector<Chunk> & input_chunks) const
+std::pair<ColumnString::Ptr, ColumnString::Ptr> EmbeddedRocksDBBulkSink::serializeChunks(std::vector<Chunk> && input_chunks) const
 {
     auto serialized_key_column = ColumnString::create();
     auto serialized_value_column = ColumnString::create();
@@ -168,7 +168,7 @@ std::pair<ColumnString::Ptr, ColumnString::Ptr> EmbeddedRocksDBBulkSink::seriali
         WriteBufferFromVector<ColumnString::Chars> writer_key(serialized_key_data);
         WriteBufferFromVector<ColumnString::Chars> writer_value(serialized_value_data);
 
-        for (const auto & chunk : input_chunks)
+        for (auto && chunk : input_chunks)
         {
             const auto & columns = chunk.getColumns();
             auto rows = chunk.getNumRows();
@@ -193,13 +193,14 @@ std::pair<ColumnString::Ptr, ColumnString::Ptr> EmbeddedRocksDBBulkSink::seriali
 
 void EmbeddedRocksDBBulkSink::consume(Chunk chunk_)
 {
-    std::vector<Chunk> to_written = squash(std::move(chunk_));
+    std::vector<Chunk> chunks_to_write = squash(std::move(chunk_));
 
-    if (to_written.empty())
+    if (chunks_to_write.empty())
         return;
 
-    auto [serialized_key_column, serialized_value_column] = serializeChunks(to_written);
+    auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write));
     auto sst_file_path = getTemporarySSTFilePath();
+    LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path);
     if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok())
         throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString());
 
@@ -209,6 +210,7 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_)
     if (auto status = storage.rocksdb_ptr->IngestExternalFile({sst_file_path}, ingest_options); !status.ok())
         throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString());
 
+    LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "SST file {} has been ingested", sst_file_path);
     if (fs::exists(sst_file_path))
         (void)fs::remove(sst_file_path);
 }
@@ -237,4 +239,5 @@ bool EmbeddedRocksDBBulkSink::isEnoughSize(const Chunk & chunk) const
 {
     return chunk.getNumRows() >= min_block_size_rows;
 }
+
 }
diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h
index 19ce1e3b83e..46193b152ca 100644
--- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h
+++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h
@@ -49,7 +49,7 @@ private:
     bool isEnoughSize(const std::vector<Chunk> & input_chunks) const;
     bool isEnoughSize(const Chunk & chunk) const;
     /// Serialize chunks to rocksdb key-value pairs
-    std::pair<ColumnString::Ptr, ColumnString::Ptr> serializeChunks(const std::vector<Chunk> & input_chunks) const;
+    std::pair<ColumnString::Ptr, ColumnString::Ptr> serializeChunks(std::vector<Chunk> && input_chunks) const;
 
     StorageEmbeddedRocksDB & storage;
     StorageMetadataPtr metadata_snapshot;
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 1a9aa6d0f41..e00cea27c49 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -316,6 +316,7 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt
 
 void StorageEmbeddedRocksDB::drop()
 {
+    std::lock_guard lock(rocksdb_ptr_mx);
     rocksdb_ptr->Close();
     rocksdb_ptr = nullptr;
 }
@@ -463,18 +464,13 @@ void StorageEmbeddedRocksDB::initDB()
     {
         rocksdb::DB * db;
         if (read_only)
-        {
             status = rocksdb::DB::OpenForReadOnly(merged, rocksdb_dir, &db);
-        }
         else
-        {
             status = rocksdb::DB::Open(merged, rocksdb_dir, &db);
-        }
+
         if (!status.ok())
-        {
-            throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}",
-                rocksdb_dir, status.ToString());
-        }
+            throw Exception(ErrorCodes::ROCKSDB_ERROR, "Failed to open rocksdb path at: {}: {}", rocksdb_dir, status.ToString());
+
         rocksdb_ptr = std::unique_ptr<rocksdb::DB>(db);
     }
 }
@@ -589,8 +585,12 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write(
     const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr  query_context, bool /*async_insert*/)
 {
     if (getSettings().optimize_for_bulk_insert)
+    {
+        LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using bulk insert");
         return std::make_shared<EmbeddedRocksDBBulkSink>(query_context, *this, metadata_snapshot);
+    }
 
+    LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using regular insert");
     return std::make_shared<EmbeddedRocksDBSink>(*this, metadata_snapshot);
 }
 

From 7314689712549c1c2bf528fc8ef7638a2eb77ddf Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 22 May 2024 11:04:17 +0000
Subject: [PATCH 255/392] Store logger in StorageEmbeddedRocksDB

---
 src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 5 +++--
 src/Storages/RocksDB/StorageEmbeddedRocksDB.h   | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index e00cea27c49..c3b7ae64c7e 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -189,6 +189,7 @@ StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_,
     , rocksdb_dir(std::move(rocksdb_dir_))
     , ttl(ttl_)
     , read_only(read_only_)
+    , log(getLogger(fmt::format("StorageEmbeddedRocksDB ({})", getStorageID().getNameForLogs())))
 {
     setInMemoryMetadata(metadata_);
     setSettings(std::move(settings_));
@@ -586,11 +587,11 @@ SinkToStoragePtr StorageEmbeddedRocksDB::write(
 {
     if (getSettings().optimize_for_bulk_insert)
     {
-        LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using bulk insert");
+        LOG_DEBUG(log, "Using bulk insert");
         return std::make_shared<EmbeddedRocksDBBulkSink>(query_context, *this, metadata_snapshot);
     }
 
-    LOG_DEBUG(getLogger("StorageEmbeddedRocksDB"), "Using regular insert");
+    LOG_DEBUG(log, "Using regular insert");
     return std::make_shared<EmbeddedRocksDBSink>(*this, metadata_snapshot);
 }
 
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
index 9fc58ea6b38..61592398954 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h
@@ -124,5 +124,7 @@ private:
     bool read_only;
 
     void initDB();
+
+    LoggerPtr log;
 };
 }

From 6f4a8bf2ea5bff2afd619f1bad8b034b325bcbfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Wed, 22 May 2024 17:32:01 +0200
Subject: [PATCH 256/392] Simplify test

---
 .../03033_final_undefined_last_mark.reference    |  4 ++--
 .../03033_final_undefined_last_mark.sql          | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
index bf0a25f24e4..a30b755709b 100644
--- a/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.reference
@@ -1,2 +1,2 @@
-GOOD	11338881281426660955	14765404159170880511
-GOOD	11338881281426660955	14765404159170880511
+Disabled	11338881281426660955	14765404159170880511
+Enabled	11338881281426660955	14765404159170880511
diff --git a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
index 2c13da42ca4..25a30a365a5 100644
--- a/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
+++ b/tests/queries/0_stateless/03033_final_undefined_last_mark.sql
@@ -1,23 +1,23 @@
 -- Tags: no-random-settings, no-random-merge-tree-settings
 
+DROP TABLE IF EXISTS account_test;
+
 CREATE TABLE account_test
 (
     `id` UInt64,
     `row_ver` UInt64,
 )
 ENGINE = ReplacingMergeTree(row_ver)
-PARTITION BY id % 64
 ORDER BY id
-SETTINGS index_granularity = 512, index_granularity_bytes = 0,
+SETTINGS index_granularity = 16, index_granularity_bytes = 0,
          min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0,
          min_rows_for_compact_part = 0, min_bytes_for_compact_part = 0;
 
-INSERT INTO account_test
-	SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 50000;
+SYSTEM STOP MERGES account_test;
 
-INSERT INTO account_test
-    SELECT * FROM (SELECT * FROM generateRandom('id UInt64, row_ver UInt64',1234) LIMIT 1000) WHERE row_ver > 14098131981223776000;
+INSERT INTO account_test VALUES (11338881281426660955,717769962224129342),(12484100559155738267,7950971667203174918),(7603729260199571867,3255798127676911942),(7023543111808724827,911615979861855126),(10293135086416484571,3264379259750736572),(15561193439904316763,8419819469587131454),(17632407413882870235,7252071832370181502),(17009726455991851227,7525297506591593939),(12392078953873778779,8473049173389293961),(15283366022689446555,11692491360262171467),(9087459014730986523,2783662960221838603),(293823584550906267,4847630088179732782),(15693186194430465755,8163804880526285623),(7353080168325584795,17315892478487497859),(5980311238303466523,6943353798059390089),(14242621660019578011,8684624667957352769),(8241843507567433563,15731952080102886438);
+INSERT INTO account_test VALUES (11338881281426660955, 14765404159170880511);
 
-SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0;
-SELECT 'GOOD', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1;
+SELECT 'Disabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 0;
+SELECT 'Enabled', * FROM account_test FINAL WHERE id = 11338881281426660955 SETTINGS split_parts_ranges_into_intersecting_and_non_intersecting_final = 1;
 

From 48cab9e9dbeb16d1be33bdcce9206c472445cd9f Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Wed, 22 May 2024 15:53:32 +0000
Subject: [PATCH 257/392] Fix tests

---
 src/Columns/ColumnDynamic.cpp | 6 +++---
 src/Columns/ColumnDynamic.h   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Columns/ColumnDynamic.cpp b/src/Columns/ColumnDynamic.cpp
index d63a03dbafd..3c147b6f123 100644
--- a/src/Columns/ColumnDynamic.cpp
+++ b/src/Columns/ColumnDynamic.cpp
@@ -9,7 +9,7 @@
 #include <Common/SipHash.h>
 #include <Processors/Transforms/ColumnGathererTransform.h>
 #include <Interpreters/castColumn.h>
-
+#include <Common/logger_useful.h>
 
 namespace DB
 {
@@ -662,8 +662,8 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
                 all_variants.push_back(source_variants[i]);
                 it = total_sizes.emplace(variant_name, 0).first;
             }
-
-            size_t size = source_statistics.data.empty() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : source_statistics.data.at(variant_name);
+            auto statistics_it = source_statistics.data.find(variant_name);
+            size_t size = statistics_it == source_statistics.data.end() ? source_variant_column.getVariantByGlobalDiscriminator(i).size() : statistics_it->second;
             it->second += size;
         }
     }
diff --git a/src/Columns/ColumnDynamic.h b/src/Columns/ColumnDynamic.h
index 8aece765308..27ad0dd583f 100644
--- a/src/Columns/ColumnDynamic.h
+++ b/src/Columns/ColumnDynamic.h
@@ -96,13 +96,13 @@ public:
 
     MutableColumnPtr cloneEmpty() const override
     {
-        /// Keep current dynamic structure but not statistics.
-        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types);
+        /// Keep current dynamic structure
+        return Base::create(variant_column->cloneEmpty(), variant_info, max_dynamic_types, statistics);
     }
 
     MutableColumnPtr cloneResized(size_t size) const override
     {
-        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types);
+        return Base::create(variant_column->cloneResized(size), variant_info, max_dynamic_types, statistics);
     }
 
     size_t size() const override

From 332f449a0cec30616180266d4a43a4e658794b1f Mon Sep 17 00:00:00 2001
From: Danila Puzov <danila.puzov.lenovo@gmail.com>
Date: Wed, 22 May 2024 18:59:39 +0300
Subject: [PATCH 258/392] Issues

---
 src/Functions/generateSnowflakeID.cpp         | 272 +++++++++++-------
 src/Functions/serial.cpp                      |  67 +++--
 .../03129_serial_test_zookeeper.sql           |  16 +-
 .../03130_generateSnowflakeId.reference       |  11 +
 .../0_stateless/03130_generateSnowflakeId.sql |  29 ++
 .../03130_generate_snowflake_id.reference     |   3 -
 .../03130_generate_snowflake_id.sql           |  11 -
 7 files changed, 252 insertions(+), 157 deletions(-)
 create mode 100644 tests/queries/0_stateless/03130_generateSnowflakeId.reference
 create mode 100644 tests/queries/0_stateless/03130_generateSnowflakeId.sql
 delete mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.reference
 delete mode 100644 tests/queries/0_stateless/03130_generate_snowflake_id.sql

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index d70b8349cd8..6ae5dc13af0 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -5,6 +5,7 @@
 #include <Core/ServerUUID.h>
 #include <Common/Logger.h>
 #include <Common/logger_useful.h>
+#include "base/types.h"
 
 
 namespace DB
@@ -34,43 +35,153 @@ namespace
 - The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
 */
 
+/// bit counts
 constexpr auto timestamp_bits_count = 41;
 constexpr auto machine_id_bits_count = 10;
 constexpr auto machine_seq_num_bits_count = 12;
 
-constexpr int64_t timestamp_mask = ((1LL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count);
-constexpr int64_t machine_id_mask = ((1LL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
-constexpr int64_t machine_seq_num_mask = (1LL << machine_seq_num_bits_count) - 1;
-constexpr int64_t max_machine_seq_num = machine_seq_num_mask;
+/// bits masks for Snowflake ID components
+// constexpr uint64_t timestamp_mask = ((1ULL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); // unused
+constexpr uint64_t machine_id_mask = ((1ULL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
+constexpr uint64_t machine_seq_num_mask = (1ULL << machine_seq_num_bits_count) - 1;
 
-Int64 getMachineID()
+/// max values
+constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
+
+uint64_t getMachineID()
 {
     UUID server_uuid = ServerUUID::get();
     /// hash into 64 bits
-    UInt64 hi = UUIDHelpers::getHighBytes(server_uuid);
-    UInt64 lo = UUIDHelpers::getLowBytes(server_uuid);
-    return ((hi * 11) ^ (lo * 17)) & machine_id_mask;
+    uint64_t hi = UUIDHelpers::getHighBytes(server_uuid);
+    uint64_t lo = UUIDHelpers::getLowBytes(server_uuid);
+    /// return only 10 bits
+    return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
 }
 
-Int64 getTimestamp()
+uint64_t getTimestamp()
 {
     auto now = std::chrono::system_clock::now();
     auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
-    return ticks_since_epoch & ((1LL << timestamp_bits_count) - 1);
+    return static_cast<uint64_t>(ticks_since_epoch) & ((1ULL << timestamp_bits_count) - 1);
 }
 
+struct SnowflakeComponents {
+    uint64_t timestamp;
+    uint64_t machind_id;
+    uint64_t machine_seq_num;
+};
+
+SnowflakeComponents toComponents(uint64_t snowflake) {
+    return {
+        .timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
+        .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
+        .machine_seq_num = (snowflake & machine_seq_num_mask)
+    };
 }
 
-class FunctionSnowflakeID : public IFunction
+uint64_t toSnowflakeID(SnowflakeComponents components) {
+    return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
+            components.machind_id << (machine_seq_num_bits_count) | 
+            components.machine_seq_num);
+}
+
+struct RangeOfSnowflakeIDs {
+    /// [begin, end)
+    SnowflakeComponents begin, end;
+};
+
+/* Get range of `input_rows_count` Snowflake IDs from `max(available, now)`
+
+1. Calculate Snowflake ID by current timestamp (`now`)
+2. `begin = max(available, now)`
+3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
+*/
+RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, size_t input_rows_count)
 {
-private:
-    mutable std::atomic<Int64> lowest_available_snowflake_id = 0; /// atomic to avoid a mutex
+    /// 1. `now`
+    SnowflakeComponents begin = {
+        .timestamp = getTimestamp(),
+        .machind_id = getMachineID(),
+        .machine_seq_num = 0
+    };
 
-public:
+    /// 2. `begin`
+    if (begin.timestamp <= available.timestamp)
+    {
+        begin.timestamp = available.timestamp;
+        begin.machine_seq_num = available.machine_seq_num;
+    }
+
+    /// 3. `end = begin + input_rows_count`
+    SnowflakeComponents end;
+    const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
+    if (input_rows_count >= seq_nums_in_current_timestamp_left)
+        /// if sequence numbers in current timestamp is not enough for rows => update timestamp
+        end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
+    else
+        end.timestamp = begin.timestamp;
+    
+    end.machind_id = begin.machind_id;
+    end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
+
+    return {begin, end};
+}
+
+struct GlobalCounterPolicy
+{
     static constexpr auto name = "generateSnowflakeID";
-    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionSnowflakeID>(); }
+    static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
 
-    String getName() const override { return name; }
+    /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
+    struct Data
+    {
+        static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
+
+        SnowflakeComponents reserveRange(size_t input_rows_count)
+        {
+            uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
+            RangeOfSnowflakeIDs range;
+            do
+            {
+                range = getRangeOfAvailableIDs(toComponents(available_snowflake_id), input_rows_count);
+            }
+            while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, toSnowflakeID(range.end)));
+            /// if `compare_exhange` failed    => another thread updated `lowest_available_snowflake_id` and we should try again
+            ///                      completed => range of IDs [begin, end) is reserved, can return the beginning of the range
+
+            return range.begin;
+        }
+    };
+};
+
+struct ThreadLocalCounterPolicy
+{
+    static constexpr auto name = "generateSnowflakeIDThreadMonotonic";
+    static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
+
+    /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
+    struct Data
+    {
+        static inline thread_local uint64_t lowest_available_snowflake_id = 0;
+
+        SnowflakeComponents reserveRange(size_t input_rows_count)
+        {
+            RangeOfSnowflakeIDs range = getRangeOfAvailableIDs(toComponents(lowest_available_snowflake_id), input_rows_count);
+            lowest_available_snowflake_id = toSnowflakeID(range.end);
+            return range.begin;
+        }
+    };
+};
+
+}
+
+template <typename FillPolicy>
+class FunctionGenerateSnowflakeID : public IFunction, public FillPolicy
+{
+public:
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionGenerateSnowflakeID>(); }
+
+    String getName() const override { return FillPolicy::name; }
     size_t getNumberOfArguments() const override { return 0; }
     bool isDeterministic() const override { return false; }
     bool isDeterministicInScopeOfQuery() const override { return false; }
@@ -80,71 +191,36 @@ public:
 
     DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (!arguments.empty()) {
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be 0.",
-                getName(), arguments.size());
-        }
-        return std::make_shared<DataTypeInt64>();
+        FunctionArgumentDescriptors mandatory_args;
+        FunctionArgumentDescriptors optional_args{
+            {"expr", nullptr, nullptr, "Arbitrary Expression"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        return std::make_shared<DataTypeUInt64>();
     }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
     {
-        auto col_res = ColumnVector<Int64>::create();
-        typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
+        auto col_res = ColumnVector<UInt64>::create();
+        typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
 
         vec_to.resize(input_rows_count);
 
-        if (input_rows_count == 0) {
-            return col_res;
-        }
-
-        const Int64 machine_id = getMachineID();
-        Int64 current_timestamp = getTimestamp();
-        Int64 current_machine_seq_num;
-
-        Int64 available_snowflake_id, next_available_snowflake_id;
-
-        const Int64 input_rows_count_signed = static_cast<Int64>(input_rows_count);
-
-        do
+        if (input_rows_count != 0)
         {
-            available_snowflake_id = lowest_available_snowflake_id.load();
-            const Int64 available_timestamp = (available_snowflake_id & timestamp_mask) >> (machine_id_bits_count + machine_seq_num_bits_count);
-            const Int64 available_machine_seq_num = available_snowflake_id & machine_seq_num_mask;
+            typename FillPolicy::Data data;
+            /// get the begin of available snowflake ids range
+            SnowflakeComponents snowflake_id = data.reserveRange(input_rows_count);
 
-            if (current_timestamp > available_timestamp)
+            for (UInt64 & to_row : vec_to)
             {
-                /// handle overflow
-                current_machine_seq_num = 0;
-            }
-            else
-            {
-                current_timestamp = available_timestamp;
-                current_machine_seq_num = available_machine_seq_num;
-            }
-
-            /// calculate new lowest_available_snowflake_id
-            const Int64 seq_nums_in_current_timestamp_left = (max_machine_seq_num - current_machine_seq_num + 1);
-            Int64 new_timestamp;
-            if (input_rows_count_signed >= seq_nums_in_current_timestamp_left)
-                new_timestamp = current_timestamp + 1 + (input_rows_count_signed - seq_nums_in_current_timestamp_left) / max_machine_seq_num;
-            else
-                new_timestamp = current_timestamp;
-            const Int64 new_machine_seq_num = (current_machine_seq_num + input_rows_count_signed) & machine_seq_num_mask;
-            next_available_snowflake_id = (new_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | new_machine_seq_num;
-        }
-        while (!lowest_available_snowflake_id.compare_exchange_strong(available_snowflake_id, next_available_snowflake_id));
-        /// failed CAS     => another thread updated `lowest_available_snowflake_id`
-        /// successful CAS => we have our range of exclusive values
-
-        for (Int64 & to_row : vec_to)
-        {
-            to_row = (current_timestamp << (machine_id_bits_count + machine_seq_num_bits_count)) | machine_id | current_machine_seq_num;
-            if (current_machine_seq_num++ == max_machine_seq_num)
-            {
-                current_machine_seq_num = 0;
-                ++current_timestamp;
+                to_row = toSnowflakeID(snowflake_id);
+                if (snowflake_id.machine_seq_num++ == max_machine_seq_num)
+                {
+                    snowflake_id.machine_seq_num = 0;
+                    ++snowflake_id.timestamp;
+                }
             }
         }
 
@@ -153,43 +229,27 @@ public:
 
 };
 
+template<typename FillPolicy>
+void registerSnowflakeIDGenerator(auto& factory)
+{
+    static constexpr auto doc_syntax_format = "{}([expression])";
+    static constexpr auto example_format = "SELECT {}()";
+    static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
+
+    FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
+    FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
+    FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
+    FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UInt64";
+    FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
+    FunctionDocumentation::Categories doc_categories = {"Snowflake ID"};
+
+    factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
+}
+
 REGISTER_FUNCTION(GenerateSnowflakeID)
 {
-    factory.registerFunction<FunctionSnowflakeID>(FunctionDocumentation
-    {
-        .description=R"(
-Generates a SnowflakeID -- unique identificators contains:
-- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
-- The middle 10 bits are the machine ID
-- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
-
-In case the number of ids processed overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
-This function guarantees strict monotony on 1 machine and differences in values obtained on different machines.
-)",
-        .syntax = "generateSnowflakeID()",
-        .arguments{},
-        .returned_value = "Column of Int64",
-        .examples{
-            {"single call", "SELECT generateSnowflakeID();", R"(
-┌─generateSnowflakeID()─┐
-│   7195510166884597760 │
-└───────────────────────┘)"},
-            {"column call", "SELECT generateSnowflakeID() FROM numbers(10);", R"(
-┌─generateSnowflakeID()─┐
-│   7195516038159417344 │
-│   7195516038159417345 │
-│   7195516038159417346 │
-│   7195516038159417347 │
-│   7195516038159417348 │
-│   7195516038159417349 │
-│   7195516038159417350 │
-│   7195516038159417351 │
-│   7195516038159417352 │
-│   7195516038159417353 │
-└───────────────────────┘)"},
-            },
-        .categories{"Unique identifiers", "Snowflake ID"}
-    });
+    registerSnowflakeIDGenerator<GlobalCounterPolicy>(factory);
+    registerSnowflakeIDGenerator<ThreadLocalCounterPolicy>(factory);
 }
 
 }
diff --git a/src/Functions/serial.cpp b/src/Functions/serial.cpp
index de3036ad242..d65df83c9f9 100644
--- a/src/Functions/serial.cpp
+++ b/src/Functions/serial.cpp
@@ -1,9 +1,12 @@
+#include "Common/Exception.h"
 #include <Common/ZooKeeper/ZooKeeper.h>
 #include <Columns/ColumnVector.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
 #include <Interpreters/Context.h>
 
+
 namespace DB
 {
 
@@ -14,6 +17,9 @@ namespace ErrorCodes
     extern const int KEEPER_EXCEPTION;
 }
 
+constexpr auto function_node_name = "/serial_ids/";
+constexpr size_t MAX_SERIES_NUMBER = 1000; // ?
+
 class FunctionSerial : public IFunction
 {
 private:
@@ -21,7 +27,7 @@ private:
     ContextPtr context;
 
 public:
-    static constexpr auto name = "serial";
+    static constexpr auto name = "generateSerialID";
 
     explicit FunctionSerial(ContextPtr context_) : context(context_)
     {
@@ -48,16 +54,12 @@ public:
     bool hasInformationAboutMonotonicity() const override { return true; }
     bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
 
-    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
     {
-        if (arguments.size() != 1)
-            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                "Number of arguments for function {} doesn't match: passed {}, should be 1.",
-                getName(), arguments.size());
-        if (!isStringOrFixedString(arguments[0]))
-            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
-                "Type of argument for function {} doesn't match: passed {}, should be string",
-                getName(), arguments[0]->getName());
+        FunctionArgumentDescriptors mandatory_args{
+            {"series identifier", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args);
 
         return std::make_shared<DataTypeInt64>();
     }
@@ -71,12 +73,19 @@ public:
         if (zk->expired())
             zk = context->getZooKeeper();
 
+        // slow?
+        if (zk->exists(function_node_name) && zk->getChildren(function_node_name).size() == MAX_SERIES_NUMBER) {
+            throw Exception(ErrorCodes::KEEPER_EXCEPTION,
+            "At most {} serial nodes can be created",
+            MAX_SERIES_NUMBER);
+        }
+
         auto col_res = ColumnVector<Int64>::create();
         typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
 
         vec_to.resize(input_rows_count);
 
-        const auto & serial_path = "/serials/" + arguments[0].column->getDataAt(0).toString();
+        const auto & serial_path = function_node_name + arguments[0].column->getDataAt(0).toString();
 
         /// CAS in ZooKeeper
         /// `get` value and version, `trySet` new with version check
@@ -130,28 +139,28 @@ Generates and returns sequential numbers starting from the previous counter valu
 This function takes a constant string argument - a series identifier.
 The server should be configured with a ZooKeeper.
 )",
-        .syntax = "serial(identifier)",
+        .syntax = "generateSerialID(identifier)",
         .arguments{
-            {"series identifier", "Series identifier (String)"}
+            {"series identifier", "Series identifier (String or FixedString)"}
         },
         .returned_value = "Sequential numbers of type Int64 starting from the previous counter value",
         .examples{
-            {"first call", "SELECT serial('id1')", R"(
-┌─serial('id1')──┐
-│              1 │
-└────────────────┘)"},
-            {"second call", "SELECT serial('id1')", R"(
-┌─serial('id1')──┐
-│              2 │
-└────────────────┘)"},
-            {"column call", "SELECT *, serial('id1') FROM test_table", R"(
-┌─CounterID─┬─UserID─┬─ver─┬─serial('id1')──┐
-│         1 │      3 │   3 │              3 │
-│         1 │      1 │   1 │              4 │
-│         1 │      2 │   2 │              5 │
-│         1 │      5 │   5 │              6 │
-│         1 │      4 │   4 │              7 │
-└───────────┴────────┴─────┴────────────────┘
+            {"first call", "SELECT generateSerialID('id1')", R"(
+┌─generateSerialID('id1')──┐
+│                        1 │
+└──────────────────────────┘)"},
+            {"second call", "SELECT generateSerialID('id1')", R"(
+┌─generateSerialID('id1')──┐
+│                        2 │
+└──────────────────────────┘)"},
+            {"column call", "SELECT *, generateSerialID('id1') FROM test_table", R"(
+┌─CounterID─┬─UserID─┬─ver─┬─generateSerialID('id1')──┐
+│         1 │      3 │   3 │                        3 │
+│         1 │      1 │   1 │                        4 │
+│         1 │      2 │   2 │                        5 │
+│         1 │      5 │   5 │                        6 │
+│         1 │      4 │   4 │                        7 │
+└───────────┴────────┴─────┴──────────────────────────┘
                   )"}},
         .categories{"Unique identifiers"}
     });
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
index c3395009477..2bd60656259 100644
--- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
+++ b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
@@ -1,12 +1,12 @@
 -- Tags: zookeeper
 
-SELECT serial('x');
-SELECT serial('x');
-SELECT serial('y');
-SELECT serial('x') FROM numbers(5);
+SELECT generateSerialID('x');
+SELECT generateSerialID('x');
+SELECT generateSerialID('y');
+SELECT generateSerialID('x') FROM numbers(5);
 
-SELECT serial(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT serial('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT serial(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+SELECT generateSerialID(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT generateSerialID('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT generateSerialID(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
 
-SELECT serial('z'), serial('z') FROM numbers(5);
+SELECT generateSerialID('z'), generateSerialID('z') FROM numbers(5);
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
new file mode 100644
index 00000000000..8cdced96770
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
@@ -0,0 +1,11 @@
+-- generateSnowflakeID --
+1
+1
+0
+0
+1
+100
+-- generateSnowflakeIDThreadMonotonic --
+1
+1
+100
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
new file mode 100644
index 00000000000..3e994149d2b
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
@@ -0,0 +1,29 @@
+SELECT '-- generateSnowflakeID --';
+SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero
+SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero
+
+SELECT generateSnowflakeID(1) = generateSnowflakeID(2);
+SELECT generateSnowflakeID() = generateSnowflakeID(1);
+SELECT generateSnowflakeID(1) = generateSnowflakeID(1);
+
+SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT count(*)
+FROM
+(
+    SELECT DISTINCT generateSnowflakeID()
+    FROM numbers(100)
+);
+
+SELECT '-- generateSnowflakeIDThreadMonotonic --';
+SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero
+SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero
+
+SELECT generateSnowflakeIDThreadMonotonic(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+
+SELECT count(*)
+FROM
+(
+    SELECT DISTINCT generateSnowflakeIDThreadMonotonic()
+    FROM numbers(100)
+);
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.reference b/tests/queries/0_stateless/03130_generate_snowflake_id.reference
deleted file mode 100644
index 2049ba26379..00000000000
--- a/tests/queries/0_stateless/03130_generate_snowflake_id.reference
+++ /dev/null
@@ -1,3 +0,0 @@
-1
-1
-10
diff --git a/tests/queries/0_stateless/03130_generate_snowflake_id.sql b/tests/queries/0_stateless/03130_generate_snowflake_id.sql
deleted file mode 100644
index 669814c9ecb..00000000000
--- a/tests/queries/0_stateless/03130_generate_snowflake_id.sql
+++ /dev/null
@@ -1,11 +0,0 @@
-SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0;
-SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0;
-
-SELECT generateSnowflakeID(1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-
-SELECT count(*)
-FROM
-(
-    SELECT DISTINCT generateSnowflakeID()
-    FROM numbers(10)
-)
\ No newline at end of file

From b6aa841e575a6594d159be2cc2a5fbc1391190ce Mon Sep 17 00:00:00 2001
From: Danila Puzov <danila.puzov.lenovo@gmail.com>
Date: Wed, 22 May 2024 19:26:48 +0300
Subject: [PATCH 259/392] Docs for generateSnowflakeID

---
 .../sql-reference/functions/uuid-functions.md | 126 ++++++++++++++++++
 src/Functions/generateSnowflakeID.cpp         |   2 +-
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index d1b833c2439..80d7215b9ef 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -690,6 +690,132 @@ serverUUID()
 
 Type: [UUID](../data-types/uuid.md).
 
+## generateSnowflakeID
+
+Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231).
+
+Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
+For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
+In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
+
+Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.
+
+```
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|0|                         timestamp                           |
+├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                   |     machine_id    |    machine_seq_num    |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+```
+
+**Syntax**
+
+``` sql
+generateSnowflakeID([expr])
+```
+
+**Arguments**
+
+- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
+
+**Returned value**
+
+A value of type UInt64.
+
+**Example**
+
+First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
+
+``` sql
+CREATE TABLE tab (id UInt64) ENGINE = Memory;
+
+INSERT INTO tab SELECT generateSnowflakeID();
+
+SELECT * FROM tab;
+```
+
+Result:
+
+```response
+┌──────────────────id─┐
+│ 7199081390080409600 │
+└─────────────────────┘
+```
+
+**Example with multiple Snowflake IDs generated per row**
+
+```sql
+SELECT generateSnowflakeID(1), generateSnowflakeID(2);
+
+┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐
+│    7199081609652224000 │    7199081609652224001 │
+└────────────────────────┴────────────────────────┘
+```
+
+## generateSnowflakeIDThreadMonotonic
+
+Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231).
+
+Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
+
+This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
+
+```
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|0|                         timestamp                           |
+├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                   |     machine_id    |    machine_seq_num    |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+```
+
+**Syntax**
+
+``` sql
+generateSnowflakeIDThreadMonotonic([expr])
+```
+
+**Arguments**
+
+- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
+
+**Returned value**
+
+A value of type UInt64.
+
+**Example**
+
+First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
+
+``` sql
+CREATE TABLE tab (id UInt64) ENGINE = Memory;
+
+INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic();
+
+SELECT * FROM tab;
+```
+
+Result:
+
+```response
+┌──────────────────id─┐
+│ 7199082832006627328 │
+└─────────────────────┘
+```
+
+**Example with multiple Snowflake IDs generated per row**
+
+```sql
+SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2);
+
+┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐
+│                   7199082940311945216 │                   7199082940316139520 │
+└───────────────────────────────────────┴───────────────────────────────────────┘
+```
+
 ## See also
 
 - [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other)
diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 6ae5dc13af0..1b26bf44adb 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -28,7 +28,7 @@ namespace
 |0|                         timestamp                           |
 ├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
 |                   |     machine_id    |    machine_seq_num    |
-├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
 
 - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
 - The middle 10 bits are the machine ID

From a73d60bae5b49bf6b09e4acc05f59cecd528a007 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 22 May 2024 18:35:28 +0200
Subject: [PATCH 260/392] tests for qps_limit_exceeded

---
 contrib/aws                                   |   2 +-
 .../integration/helpers/s3_mocks/broken_s3.py |  40 +++-
 .../test_checking_s3_blobs_paranoid/test.py   | 206 +++++++++---------
 3 files changed, 143 insertions(+), 105 deletions(-)

diff --git a/contrib/aws b/contrib/aws
index 2e12d7c6daf..b7ae6e5bf48 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit 2e12d7c6dafa81311ee3d73ac6a178550ffa75be
+Subproject commit b7ae6e5bf48fb4981f24476bdd187cd35df1e2c6
diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py
index 206f960293f..238b8aac112 100644
--- a/tests/integration/helpers/s3_mocks/broken_s3.py
+++ b/tests/integration/helpers/s3_mocks/broken_s3.py
@@ -165,11 +165,35 @@ class _ServerRuntime:
                 '<?xml version="1.0" encoding="UTF-8"?>'
                 "<Error>"
                 "<Code>ExpectedError</Code>"
-                "<Message>mock s3 injected error</Message>"
+                "<Message>mock s3 injected unretryable error</Message>"
                 "<RequestId>txfbd566d03042474888193-00608d7537</RequestId>"
                 "</Error>"
             )
-            request_handler.write_error(data)
+            request_handler.write_error(500, data)
+
+    class SlowDownAction:
+        def inject_error(self, request_handler):
+            data = (
+                '<?xml version="1.0" encoding="UTF-8"?>'
+                "<Error>"
+                "<Code>SlowDown</Code>"
+                "<Message>Slow Down.</Message>"
+                "<RequestId>txfbd566d03042474888193-00608d7537</RequestId>"
+                "</Error>"
+            )
+            request_handler.write_error(429, data)
+
+    class QpsLimitExceededAction:
+        def inject_error(self, request_handler):
+            data = (
+                '<?xml version="1.0" encoding="UTF-8"?>'
+                "<Error>"
+                "<Code>QpsLimitExceeded</Code>"
+                "<Message>Please reduce your request rate.</Message>"
+                "<RequestId>txfbd566d03042474888193-00608d7537</RequestId>"
+                "</Error>"
+            )
+            request_handler.write_error(429, data)
 
     class RedirectAction:
         def __init__(self, host="localhost", port=1):
@@ -239,6 +263,10 @@ class _ServerRuntime:
                 self.error_handler = _ServerRuntime.BrokenPipeAction()
             elif self.action == "redirect_to":
                 self.error_handler = _ServerRuntime.RedirectAction(*self.action_args)
+            elif self.action == "slow_down":
+                self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args)
+            elif self.action == "qps_limit_exceeded":
+                self.error_handler = _ServerRuntime.QpsLimitExceededAction(*self.action_args)
             else:
                 self.error_handler = _ServerRuntime.Expected500ErrorAction()
 
@@ -344,12 +372,12 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
         self.end_headers()
         self.wfile.write(b"Redirected")
 
-    def write_error(self, data, content_length=None):
+    def write_error(self, http_code, data, content_length=None):
         if content_length is None:
             content_length = len(data)
         self.log_message("write_error %s", data)
         self.read_all_input()
-        self.send_response(500)
+        self.send_response(http_code)
         self.send_header("Content-Type", "text/xml")
         self.send_header("Content-Length", str(content_length))
         self.end_headers()
@@ -418,7 +446,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
         path = [x for x in parts.path.split("/") if x]
         assert path[0] == "mock_settings", path
         if len(path) < 2:
-            return self.write_error("_mock_settings: wrong command")
+            return self.write_error(400, "_mock_settings: wrong command")
 
         if path[1] == "at_part_upload":
             params = urllib.parse.parse_qs(parts.query, keep_blank_values=False)
@@ -477,7 +505,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler):
             self.log_message("reset")
             return self._ok()
 
-        return self.write_error("_mock_settings: wrong command")
+        return self.write_error(400, "_mock_settings: wrong command")
 
     def do_GET(self):
         if self.path == "/":
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 22d6d263d23..97fc5de65e7 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -91,7 +91,7 @@ def get_multipart_counters(node, query_id, log_type="ExceptionWhileProcessing"):
                 SELECT
                     ProfileEvents['S3CreateMultipartUpload'],
                     ProfileEvents['S3UploadPart'],
-                    ProfileEvents['S3WriteRequestsErrors'],
+                    ProfileEvents['S3WriteRequestsErrors'] + ProfileEvents['S3WriteRequestsThrottling'],
                 FROM system.query_log
                 WHERE query_id='{query_id}'
                     AND type='{log_type}'
@@ -148,7 +148,7 @@ def test_upload_s3_fail_create_multi_part_upload(cluster, broken_s3, compression
     )
 
     assert "Code: 499" in error, error
-    assert "mock s3 injected error" in error, error
+    assert "mock s3 injected unretryable error" in error, error
 
     create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
@@ -190,7 +190,7 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     )
 
     assert "Code: 499" in error, error
-    assert "mock s3 injected error" in error, error
+    assert "mock s3 injected unretryable error" in error, error
 
     create_multipart, upload_parts, s3_errors = get_multipart_counters(
         node, insert_query_id
@@ -200,18 +200,28 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
     assert s3_errors >= 2
 
 
-def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
+@pytest.mark.parametrize(
+    "action_and_message", [
+        ("slow_down", "DB::Exception: Slow Down."),
+        ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."),
+        ("connection_refused", "Poco::Exception. Code: 1000, e.code() = 111, Connection refused"),
+    ],
+    ids=lambda x: x[0]
+)
+def test_when_error_is_retried(cluster, broken_s3, action_and_message):
     node = cluster.instances["node"]
 
-    broken_s3.setup_fake_multpartuploads()
-    broken_s3.setup_at_part_upload(count=3, after=2, action="connection_refused")
+    action, message = action_and_message
 
-    insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED"
+    broken_s3.setup_fake_multpartuploads()
+    broken_s3.setup_at_part_upload(count=3, after=2, action=action)
+
+    insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED"
     node.query(
         f"""
         INSERT INTO
             TABLE FUNCTION s3(
-                'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried',
+                'http://resolver:8083/root/data/test_when_{action}_retried',
                 'minio', 'minio123',
                 'CSV', auto, 'none'
             )
@@ -234,13 +244,13 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
     assert upload_parts == 39
     assert s3_errors == 3
 
-    broken_s3.setup_at_part_upload(count=1000, after=2, action="connection_refused")
-    insert_query_id = f"INSERT_INTO_TABLE_FUNCTION_CONNECTION_REFUSED_RETRIED_1"
+    broken_s3.setup_at_part_upload(count=1000, after=2, action=action)
+    insert_query_id = f"INSERT_INTO_TABLE_{action}_RETRIED_1"
     error = node.query_and_get_error(
         f"""
             INSERT INTO
                 TABLE FUNCTION s3(
-                    'http://resolver:8083/root/data/test_when_s3_connection_refused_at_write_retried',
+                    'http://resolver:8083/root/data/test_when_{action}_retried',
                     'minio', 'minio123',
                     'CSV', auto, 'none'
                 )
@@ -258,7 +268,79 @@ def test_when_s3_connection_refused_is_retried(cluster, broken_s3):
 
     assert "Code: 499" in error, error
     assert (
-        "Poco::Exception. Code: 1000, e.code() = 111, Connection refused" in error
+        message in error
+    ), error
+
+
+def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
+    node = cluster.instances["node"]
+
+    broken_s3.setup_fake_multpartuploads()
+    broken_s3.setup_at_part_upload(
+        count=3,
+        after=2,
+        action="broken_pipe",
+    )
+
+    insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD"
+    node.query(
+        f"""
+        INSERT INTO
+            TABLE FUNCTION s3(
+                'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried',
+                'minio', 'minio123',
+                'CSV', auto, 'none'
+            )
+        SELECT
+            *
+        FROM system.numbers
+        LIMIT 1000000
+        SETTINGS
+            s3_max_single_part_upload_size=100,
+            s3_min_upload_part_size=1000000,
+            s3_check_objects_after_upload=0
+        """,
+        query_id=insert_query_id,
+    )
+
+    create_multipart, upload_parts, s3_errors = get_multipart_counters(
+        node, insert_query_id, log_type="QueryFinish"
+    )
+
+    assert create_multipart == 1
+    assert upload_parts == 7
+    assert s3_errors == 3
+
+    broken_s3.setup_at_part_upload(
+        count=1000,
+        after=2,
+        action="broken_pipe",
+    )
+    insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1"
+    error = node.query_and_get_error(
+        f"""
+               INSERT INTO
+                   TABLE FUNCTION s3(
+                       'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried',
+                       'minio', 'minio123',
+                       'CSV', auto, 'none'
+                   )
+               SELECT
+                   *
+               FROM system.numbers
+               LIMIT 1000000
+               SETTINGS
+                   s3_max_single_part_upload_size=100,
+                   s3_min_upload_part_size=1000000,
+                   s3_check_objects_after_upload=0
+               """,
+        query_id=insert_query_id,
+    )
+
+    assert "Code: 1000" in error, error
+    assert (
+        "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe"
+        in error
     ), error
 
 
@@ -401,20 +483,20 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
     )
     error = node.query_and_get_error(
         f"""
-               INSERT INTO
-                   TABLE FUNCTION s3(
-                       'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried',
-                       'minio', 'minio123',
-                       'CSV', auto, 'none'
-                   )
-               SELECT
-                   *
-               FROM system.numbers
-               LIMIT 1000
-               SETTINGS
-                   s3_max_single_part_upload_size=100,
-                   s3_min_upload_part_size=100,
-                   s3_check_objects_after_upload=0
+        INSERT INTO
+            TABLE FUNCTION s3(
+                'http://resolver:8083/root/data/test_when_s3_connection_reset_by_peer_at_create_mpu_retried',
+                'minio', 'minio123',
+                'CSV', auto, 'none'
+            )
+        SELECT
+            *
+        FROM system.numbers
+        LIMIT 1000
+        SETTINGS
+            s3_max_single_part_upload_size=100,
+            s3_min_upload_part_size=100,
+            s3_check_objects_after_upload=0
                """,
         query_id=insert_query_id,
     )
@@ -427,78 +509,6 @@ def test_when_s3_connection_reset_by_peer_at_create_mpu_retried(
     ), error
 
 
-def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):
-    node = cluster.instances["node"]
-
-    broken_s3.setup_fake_multpartuploads()
-    broken_s3.setup_at_part_upload(
-        count=3,
-        after=2,
-        action="broken_pipe",
-    )
-
-    insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD"
-    node.query(
-        f"""
-        INSERT INTO
-            TABLE FUNCTION s3(
-                'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried',
-                'minio', 'minio123',
-                'CSV', auto, 'none'
-            )
-        SELECT
-            *
-        FROM system.numbers
-        LIMIT 1000000
-        SETTINGS
-            s3_max_single_part_upload_size=100,
-            s3_min_upload_part_size=1000000,
-            s3_check_objects_after_upload=0
-        """,
-        query_id=insert_query_id,
-    )
-
-    create_multipart, upload_parts, s3_errors = get_multipart_counters(
-        node, insert_query_id, log_type="QueryFinish"
-    )
-
-    assert create_multipart == 1
-    assert upload_parts == 7
-    assert s3_errors == 3
-
-    broken_s3.setup_at_part_upload(
-        count=1000,
-        after=2,
-        action="broken_pipe",
-    )
-    insert_query_id = f"TEST_WHEN_S3_BROKEN_PIPE_AT_UPLOAD_1"
-    error = node.query_and_get_error(
-        f"""
-               INSERT INTO
-                   TABLE FUNCTION s3(
-                       'http://resolver:8083/root/data/test_when_s3_broken_pipe_at_upload_is_retried',
-                       'minio', 'minio123',
-                       'CSV', auto, 'none'
-                   )
-               SELECT
-                   *
-               FROM system.numbers
-               LIMIT 1000000
-               SETTINGS
-                   s3_max_single_part_upload_size=100,
-                   s3_min_upload_part_size=1000000,
-                   s3_check_objects_after_upload=0
-               """,
-        query_id=insert_query_id,
-    )
-
-    assert "Code: 1000" in error, error
-    assert (
-        "DB::Exception: Poco::Exception. Code: 1000, e.code() = 32, I/O error: Broken pipe"
-        in error
-    ), error
-
-
 def test_query_is_canceled_with_inf_retries(cluster, broken_s3):
     node = cluster.instances["node_with_inf_s3_retries"]
 

From 52fe1fab97a5f39c99c33deb1054bf319fbbf230 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Wed, 22 May 2024 16:46:02 +0000
Subject: [PATCH 261/392] Automatic style fix

---
 tests/integration/helpers/s3_mocks/broken_s3.py    |  4 +++-
 .../test_checking_s3_blobs_paranoid/test.py        | 14 ++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/integration/helpers/s3_mocks/broken_s3.py b/tests/integration/helpers/s3_mocks/broken_s3.py
index 238b8aac112..7d0127bc1c4 100644
--- a/tests/integration/helpers/s3_mocks/broken_s3.py
+++ b/tests/integration/helpers/s3_mocks/broken_s3.py
@@ -266,7 +266,9 @@ class _ServerRuntime:
             elif self.action == "slow_down":
                 self.error_handler = _ServerRuntime.SlowDownAction(*self.action_args)
             elif self.action == "qps_limit_exceeded":
-                self.error_handler = _ServerRuntime.QpsLimitExceededAction(*self.action_args)
+                self.error_handler = _ServerRuntime.QpsLimitExceededAction(
+                    *self.action_args
+                )
             else:
                 self.error_handler = _ServerRuntime.Expected500ErrorAction()
 
diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py
index 97fc5de65e7..a7fe02b16de 100644
--- a/tests/integration/test_checking_s3_blobs_paranoid/test.py
+++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py
@@ -201,12 +201,16 @@ def test_upload_s3_fail_upload_part_when_multi_part_upload(
 
 
 @pytest.mark.parametrize(
-    "action_and_message", [
+    "action_and_message",
+    [
         ("slow_down", "DB::Exception: Slow Down."),
         ("qps_limit_exceeded", "DB::Exception: Please reduce your request rate."),
-        ("connection_refused", "Poco::Exception. Code: 1000, e.code() = 111, Connection refused"),
+        (
+            "connection_refused",
+            "Poco::Exception. Code: 1000, e.code() = 111, Connection refused",
+        ),
     ],
-    ids=lambda x: x[0]
+    ids=lambda x: x[0],
 )
 def test_when_error_is_retried(cluster, broken_s3, action_and_message):
     node = cluster.instances["node"]
@@ -267,9 +271,7 @@ def test_when_error_is_retried(cluster, broken_s3, action_and_message):
     )
 
     assert "Code: 499" in error, error
-    assert (
-        message in error
-    ), error
+    assert message in error, error
 
 
 def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3):

From 1e5069b5dc6f07d7b29b3a94eaad1c15c9842635 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 22 May 2024 19:21:27 +0200
Subject: [PATCH 262/392] Fix duplicate include

---
 src/TableFunctions/ITableFunctionDataLake.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h
index 6ad8689a9b4..fe6e5b3e593 100644
--- a/src/TableFunctions/ITableFunctionDataLake.h
+++ b/src/TableFunctions/ITableFunctionDataLake.h
@@ -7,7 +7,6 @@
 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionObjectStorage.h>
 #include <Storages/ObjectStorage/StorageObjectStorage.h>
-#include <Storages/ObjectStorage/StorageObjectStorage.h>
 #include <Storages/ObjectStorage/DataLakes/IStorageDataLake.h>
 #include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
 #include <TableFunctions/TableFunctionFactory.h>

From 7c9f36ad1ea1e6cc1d480c44a94c9e473f3a27e0 Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Wed, 22 May 2024 19:46:08 +0200
Subject: [PATCH 263/392] Add gh to style-check dockerfile

---
 docker/test/style/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile
index 5d53d03606f..172fbce6406 100644
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@@ -11,6 +11,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
         aspell \
         curl \
         git \
+        gh \
         file \
         libxml2-utils \
         moreutils \

From 6be79a35b6a55e88103056058ce9833ac62be77e Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Wed, 22 May 2024 20:30:19 +0200
Subject: [PATCH 264/392] update contrib/aws to the last head

---
 contrib/aws | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/aws b/contrib/aws
index b7ae6e5bf48..eb96e740453 160000
--- a/contrib/aws
+++ b/contrib/aws
@@ -1 +1 @@
-Subproject commit b7ae6e5bf48fb4981f24476bdd187cd35df1e2c6
+Subproject commit eb96e740453ae27afa1f367ba19f99bdcb38484d

From 7ecfdbb3aaf4b7f4a68d6a332138dd90612e6120 Mon Sep 17 00:00:00 2001
From: Mikhail Artemenko <michicosun@ya.ru>
Date: Wed, 22 May 2024 23:05:27 +0000
Subject: [PATCH 265/392] fix test_hdfsCluster_unset_skip_unavailable_shards

---
 tests/integration/test_storage_hdfs/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index bb72574c6e5..3c43918d8c0 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -895,7 +895,7 @@ def test_hdfsCluster_unset_skip_unavailable_shards(started_cluster):
 
     assert (
         node1.query(
-            "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/skip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')"
+            "select * from hdfsCluster('cluster_non_existent_port', 'hdfs://hdfs1:9000/unskip_unavailable_shards', 'TSV', 'id UInt64, text String, number Float64')"
         )
         == data
     )

From c07c9d4c87efa2d4823526127bd52566773a2cd3 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Wed, 22 May 2024 21:57:43 -0300
Subject: [PATCH 266/392] test for #45804

---
 ...l_and_prewhere_condition_ver_column.reference |  2 ++
 ...1_final_and_prewhere_condition_ver_column.sql | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference
 create mode 100644 tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql

diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference
new file mode 100644
index 00000000000..6ed281c757a
--- /dev/null
+++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.reference
@@ -0,0 +1,2 @@
+1
+1
diff --git a/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql
new file mode 100644
index 00000000000..78a58a979d1
--- /dev/null
+++ b/tests/queries/0_stateless/00331_final_and_prewhere_condition_ver_column.sql
@@ -0,0 +1,16 @@
+SET allow_experimental_analyzer = 1;
+
+-- https://github.com/ClickHouse/ClickHouse/issues/45804
+
+CREATE TABLE myRMT(   
+  key Int64,
+  someCol String,
+  ver DateTime
+) ENGINE = ReplacingMergeTree(ver)
+ORDER BY key as SELECT 1, 'test', '2020-01-01';
+
+SELECT count(ver) FROM myRMT FINAL PREWHERE ver > '2000-01-01';
+
+SELECT count() FROM myRMT FINAL PREWHERE ver > '2000-01-01';
+
+DROP TABLE myRMT;

From 88ae74f6fdd3d859674a588b8b6fba320d214950 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 09:28:38 +0200
Subject: [PATCH 267/392] Add test for reinterpretXYZ

---
 .../functions/type-conversion-functions.md    |  3 +-
 .../03156_reinterpret_functions.sql           | 36 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
 create mode 100644 tests/queries/0_stateless/03156_reinterpret_functions.sql

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 14a12ab5d5d..1030d92c76b 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1000,7 +1000,8 @@ Result:
 
 ## reinterpretAsInt(8\|16\|32\|64)
 
-## reinterpretAsFloat(32\|64)
+## reinterpretAsFloat*
+
 
 ## reinterpretAsDate
 
diff --git a/tests/queries/0_stateless/03156_reinterpret_functions.sql b/tests/queries/0_stateless/03156_reinterpret_functions.sql
new file mode 100644
index 00000000000..4acaaf47cef
--- /dev/null
+++ b/tests/queries/0_stateless/03156_reinterpret_functions.sql
@@ -0,0 +1,36 @@
+-- Date and DateTime
+
+SELECT reinterpretAsDate(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsDate('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsDate([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
+SELECT reinterpretAsDateTime(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsDateTime('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsDateTime([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
+
+SELECT reinterpretAsDate(65);
+SELECT reinterpretAsDate('A');
+SELECT reinterpretAsDateTime(65);
+SELECT reinterpretAsDate('A');
+
+-- Fixed String
+
+SELECT reinterpretAsFixedString(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFixedString(toDate('1970-01-01'),''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFixedString([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
+
+SELECT reinterpretAsFixedString(toDate('1970-03-07'));
+SELECT reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'));
+SELECT reinterpretAsFixedString(65);
+
+-- Float32, Float64
+
+SELECT reinterpretAsFloat32(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFloat64(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFloat32('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFloat64('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+SELECT reinterpretAsFloat32([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
+SELECT reinterpretAsFloat64([0, 1, 2]); -- { clientError4 ILLEGAL_TYPE_OF_ARGUMENT}
+
+
+
+

From 9234beaff8ef19ed758984fb70c82b4edb3762f0 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 09:32:43 +0200
Subject: [PATCH 268/392] Fix typo and move from other-functions to
 math-functions

---
 .../sql-reference/functions/math-functions.md | 46 +++++++++++++++++++
 .../functions/other-functions.md              | 46 -------------------
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 945166056af..324adbfb4b3 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -947,3 +947,49 @@ Result:
 │                               11 │
 └──────────────────────────────────┘
 ```
+
+## proportionsZTest
+
+Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`.
+
+**Syntax**
+
+```sql
+proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type)
+```
+
+**Arguments**
+
+- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md).
+- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md).
+- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md).
+- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md).
+- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md).
+- `pool_type`: Selection of pooling (way in which the standard error is estimated). Can be either `unpooled` or `pooled`. [String](../data-types/string.md). 
+
+:::note
+For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately.
+:::
+
+**Returned value**
+
+- `z_stat`: Z statistic. [Float64](../data-types/float.md).
+- `p_val`: P value. [Float64](../data-types/float.md).
+- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md).
+- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled');
+```
+
+Result:
+
+```response
+┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐
+│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │
+└────────────────────────────────────────────────────────────────────────────────────┘
+```
\ No newline at end of file
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 288432167bb..2b0215115cb 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -903,52 +903,6 @@ SELECT parseTimeDelta('1yr2mo')
 └──────────────────────────┘
 ```
 
-## proportionsZTest
-
-Returns test statistics for the two proportion Z-test - a statistical test for comparing the proportions from two populations `x` and `y`.
-
-**Syntax**
-
-```sql
-proportionsZTest(successes_x, successes_y, trials_x, trials_y, conf_level, pool_type)
-```
-
-**Arguments**
-
-- `successes_x`: Number of successes in population `x`. [UInt64](../data-types/int-uint.md).
-- `successes_y`: Number of successes in population `y`. [UInt64](../data-types/int-uint.md).
-- `trials_x`: Number of trials in population `x`. [UInt64](../data-types/int-uint.md).
-- `trials_y`: Number of trials in population `y`. [UInt64](../data-types/int-uint.md).
-- `conf_level`: Confidence level for the test. [Float64](../data-types/float.md).
-- `pool_type`: Selection of pooling (way in which the standard error is estimated). can be either `unpooled` or `pooled`. [String](../data-types/string.md). 
-
-:::note
-For argument `pool_type`: In the pooled version, the two proportions are averaged, and only one proportion is used to estimate the standard error. In the unpooled version, the two proportions are used separately.
-:::
-
-**Returned value**
-
-- `z_stat`: Z statistic. [Float64](../data-types/float.md).
-- `p_val`: P value. [Float64](../data-types/float.md).
-- `ci_low`: The lower confidence interval. [Float64](../data-types/float.md).
-- `ci_high`: The upper confidence interval. [Float64](../data-types/float.md).
-
-**Example**
-
-Query:
-
-```sql
-SELECT proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled');
-```
-
-Result:
-
-```response
-┌─proportionsZTest(10, 11, 100, 101, 0.95, 'unpooled')───────────────────────────────┐
-│ (-0.20656724435948853,0.8363478437079654,-0.09345975390115283,0.07563797172293502) │
-└────────────────────────────────────────────────────────────────────────────────────┘
-```
-
 ## least(a, b)
 
 Returns the smaller value of a and b.

From 45492baf440418267c8187607650a6ceddc061d3 Mon Sep 17 00:00:00 2001
From: MikhailBurdukov <burdukvmikhail@gmail.com>
Date: Thu, 23 May 2024 08:20:16 +0000
Subject: [PATCH 269/392] Restart Ci


From a21377cf5131de31e2109c117774fdb8058e8bc9 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 11:51:34 +0200
Subject: [PATCH 270/392] Update src/Analyzer/Passes/QueryAnalysisPass.cpp

Co-authored-by: Dmitry Novik <n0vik@clickhouse.com>
---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index cfea45732db..3ccecac951d 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -638,7 +638,10 @@ struct ScopeAliases
 
         auto it = alias_map.find(*key);
 
-        if (it == alias_map.end() && lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION)
+        if (it != alias_map.end())
+            return &it->second;
+
+        if (lookup.lookup_context == IdentifierLookupContext::TABLE_EXPRESSION)
             return {};
 
         while (it == alias_map.end())

From 9d63095db9445f4963da914ddbc819b0a57bc7e2 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 16 Apr 2024 12:55:50 +0000
Subject: [PATCH 271/392] Revert "Revert "Speed up `splitByRegexp`""

This reverts commit 08e5c2ba4d9620551b0de5791876d35888d2c81a.
---
 src/Functions/splitByRegexp.cpp               | 66 ++++++++++++++++++-
 tests/performance/function_tokens.xml         |  2 +
 .../01866_split_by_regexp.reference           | 12 ++++
 .../0_stateless/01866_split_by_regexp.sql     | 17 +++++
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp
index 32afb813a04..e28fe9c38bb 100644
--- a/src/Functions/splitByRegexp.cpp
+++ b/src/Functions/splitByRegexp.cpp
@@ -1,9 +1,11 @@
 #include <Columns/ColumnConst.h>
+#include <DataTypes/IDataType.h>
+#include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionTokens.h>
-#include <Functions/FunctionFactory.h>
 #include <Functions/Regexps.h>
 #include <Common/StringUtils.h>
+#include <base/map.h>
 #include <Common/assert_cast.h>
 
 
@@ -102,7 +104,7 @@ public:
                         return false;
             }
 
-            pos += 1;
+            ++pos;
             token_end = pos;
             ++splits;
         }
@@ -148,11 +150,69 @@ public:
 
 using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
 
+/// Fallback splitByRegexp to splitByChar when its 1st argument is a trivial char for better performance
+class SplitByRegexpOverloadResolver : public IFunctionOverloadResolver
+{
+public:
+    static constexpr auto name = "splitByRegexp";
+    static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<SplitByRegexpOverloadResolver>(context); }
+
+    explicit SplitByRegexpOverloadResolver(ContextPtr context_)
+        : context(context_)
+        , split_by_regexp(FunctionSplitByRegexp::create(context)) {}
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); }
+    bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); }
+
+    FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
+    {
+        if (patternIsTrivialChar(arguments))
+            return FunctionFactory::instance().getImpl("splitByChar", context)->build(arguments);
+        else
+            return std::make_unique<FunctionToFunctionBaseAdaptor>(
+                split_by_regexp, collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }), return_type);
+    }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        return split_by_regexp->getReturnTypeImpl(arguments);
+    }
+
+private:
+    bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const
+    {
+        const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
+        if (!col)
+            throw Exception(
+                ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}. "
+                "Must be constant string.",
+                arguments[0].column->getName(),
+                getName());
+
+        String pattern = col->getValue<String>();
+        if (pattern.size() == 1)
+        {
+            OptimizedRegularExpression re = Regexps::createRegexp<false, false, false>(pattern);
+
+            std::string required_substring;
+            bool is_trivial;
+            bool required_substring_is_prefix;
+            re.getAnalyzeResult(required_substring, is_trivial, required_substring_is_prefix);
+            return is_trivial && required_substring == pattern;
+        }
+        return false;
+    }
+
+    ContextPtr context;
+    FunctionPtr split_by_regexp;
+};
 }
 
 REGISTER_FUNCTION(SplitByRegexp)
 {
-    factory.registerFunction<FunctionSplitByRegexp>();
+    factory.registerFunction<SplitByRegexpOverloadResolver>();
 }
 
 }
diff --git a/tests/performance/function_tokens.xml b/tests/performance/function_tokens.xml
index 63b72f83df3..1ff56323d62 100644
--- a/tests/performance/function_tokens.xml
+++ b/tests/performance/function_tokens.xml
@@ -1,3 +1,5 @@
 <test>
     <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByChar(' ', materialize(s)) as w from numbers(1000000)</query>
+    <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp(' ', materialize(s)) as w from numbers(1000000)</query>
+    <query>with 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' as s select splitByRegexp('\s+', materialize(s)) as w from numbers(100000)</query>
 </test>
diff --git a/tests/queries/0_stateless/01866_split_by_regexp.reference b/tests/queries/0_stateless/01866_split_by_regexp.reference
index a3ae2f35a5f..62939940545 100644
--- a/tests/queries/0_stateless/01866_split_by_regexp.reference
+++ b/tests/queries/0_stateless/01866_split_by_regexp.reference
@@ -5,3 +5,15 @@
 ['gbye','bug']
 ['']
 []
+Test fallback of splitByRegexp to splitByChar if regexp is trivial
+['a','b','c']
+['a','b','c']
+['','','','','','']
+['a^b^c']
+['a$b$c']
+['a)b)c']
+['a','b','c']
+['a','b','c']
+['a','b','c']
+['a|b|c']
+['a\\b\\c']
diff --git a/tests/queries/0_stateless/01866_split_by_regexp.sql b/tests/queries/0_stateless/01866_split_by_regexp.sql
index e472fb68d94..570bd1ba5c0 100644
--- a/tests/queries/0_stateless/01866_split_by_regexp.sql
+++ b/tests/queries/0_stateless/01866_split_by_regexp.sql
@@ -3,3 +3,20 @@ select splitByRegexp('', 'abcde');
 select splitByRegexp('<[^<>]*>', x) from (select arrayJoin(['<h1>hello<h2>world</h2></h1>', 'gbye<split>bug']) x);
 select splitByRegexp('ab', '');
 select splitByRegexp('', '');
+
+SELECT 'Test fallback of splitByRegexp to splitByChar if regexp is trivial';
+select splitByRegexp(' ', 'a b c');
+select splitByRegexp('-', 'a-b-c');
+select splitByRegexp('.', 'a.b.c');
+select splitByRegexp('^', 'a^b^c');
+select splitByRegexp('$', 'a$b$c');
+select splitByRegexp('+', 'a+b+c'); -- { serverError CANNOT_COMPILE_REGEXP }
+select splitByRegexp('?', 'a?b?c'); -- { serverError CANNOT_COMPILE_REGEXP }
+select splitByRegexp('(', 'a(b(c'); -- { serverError CANNOT_COMPILE_REGEXP }
+select splitByRegexp(')', 'a)b)c');
+select splitByRegexp('[', 'a[b[c'); -- { serverError CANNOT_COMPILE_REGEXP }
+select splitByRegexp(']', 'a]b]c');
+select splitByRegexp('{', 'a{b{c');
+select splitByRegexp('}', 'a}b}c');
+select splitByRegexp('|', 'a|b|c');
+select splitByRegexp('\\', 'a\\b\\c');

From 00bbffa6f056348a9252ca178edfee580a1939d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 23 May 2024 11:04:29 +0000
Subject: [PATCH 272/392] Update autogenerated version to 24.6.1.1 and
 contributors

---
 cmake/autogenerated_versions.txt              | 10 +++----
 .../StorageSystemContributors.generated.cpp   | 30 +++++++++++++++++++
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
index f8ff71876c6..dfbbb66a1e9 100644
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@@ -2,11 +2,11 @@
 
 # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54486)
+SET(VERSION_REVISION 54487)
 SET(VERSION_MAJOR 24)
-SET(VERSION_MINOR 5)
+SET(VERSION_MINOR 6)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH 6d4b31322d168356c8b10c43b4cef157c82337ff)
-SET(VERSION_DESCRIBE v24.5.1.1-testing)
-SET(VERSION_STRING 24.5.1.1)
+SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0)
+SET(VERSION_DESCRIBE v24.6.1.1-testing)
+SET(VERSION_STRING 24.6.1.1)
 # end of autochange
diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp
index 909599c00af..b42b070d518 100644
--- a/src/Storages/System/StorageSystemContributors.generated.cpp
+++ b/src/Storages/System/StorageSystemContributors.generated.cpp
@@ -48,6 +48,7 @@ const char * auto_contributors[] {
     "Alex Cao",
     "Alex Cheng",
     "Alex Karo",
+    "Alex Katsman",
     "Alex Krash",
     "Alex Ryndin",
     "Alex Zatelepin",
@@ -101,6 +102,7 @@ const char * auto_contributors[] {
     "Alexey Korepanov",
     "Alexey Milovidov",
     "Alexey Perevyshin",
+    "Alexey Petrunyaka",
     "Alexey Tronov",
     "Alexey Vasiliev",
     "Alexey Zatelepin",
@@ -109,6 +111,7 @@ const char * auto_contributors[] {
     "AlfVII",
     "Alfonso Martinez",
     "Alfred Xu",
+    "Ali",
     "Ali Demirci",
     "Aliaksandr Pliutau",
     "Aliaksandr Shylau",
@@ -250,6 +253,7 @@ const char * auto_contributors[] {
     "Brian Hunter",
     "Brokenice0415",
     "Bulat Gaifullin",
+    "Caio Ricciuti",
     "Camden Cheek",
     "Camilo Sierra",
     "Carbyn",
@@ -384,6 +388,7 @@ const char * auto_contributors[] {
     "Evgenii Pravda",
     "Evgeniia Sudarikova",
     "Evgeniy Gatov",
+    "Evgeniy Leko",
     "Evgeniy Udodov",
     "Evgeny",
     "Evgeny Konkov",
@@ -413,6 +418,7 @@ const char * auto_contributors[] {
     "Fille",
     "Flowyi",
     "Francisco Barón",
+    "Francisco Javier Jurado Moreno",
     "Frank Chen",
     "Frank Zhao",
     "François Violette",
@@ -425,6 +431,7 @@ const char * auto_contributors[] {
     "G5.Qin",
     "Gabriel",
     "Gabriel Archer",
+    "Gabriel Martinez",
     "Gagan Arneja",
     "Gagan Goel",
     "Gao Qiang",
@@ -446,6 +453,7 @@ const char * auto_contributors[] {
     "Grigory Buteyko",
     "Grigory Pervakov",
     "GruffGemini",
+    "Grégoire Pineau",
     "Guillaume Tassery",
     "Guo Wangyang",
     "Guo Wei (William)",
@@ -587,6 +595,7 @@ const char * auto_contributors[] {
     "Keiji Yoshida",
     "Ken Chen",
     "Ken MacInnis",
+    "KenL",
     "Kenji Noguchi",
     "Kerry Clendinning",
     "Kevin Chiang",
@@ -640,6 +649,7 @@ const char * auto_contributors[] {
     "Leonardo Maciel",
     "Leonid Krylov",
     "Leopold Schabel",
+    "Leticia Webb",
     "Lev Borodin",
     "Lewinma",
     "Li Shuai",
@@ -701,6 +711,7 @@ const char * auto_contributors[] {
     "Masha",
     "Mathieu Rey",
     "Matthew Peveler",
+    "Mattias Naarttijärvi",
     "Matwey V. Kornilov",
     "Max",
     "Max Akhmedov",
@@ -711,6 +722,7 @@ const char * auto_contributors[] {
     "MaxTheHuman",
     "MaxWk",
     "Maxim Akhmedov",
+    "Maxim Alexeev",
     "Maxim Babenko",
     "Maxim Fedotov",
     "Maxim Fridental",
@@ -739,6 +751,7 @@ const char * auto_contributors[] {
     "Michael Razuvaev",
     "Michael Schnerring",
     "Michael Smitasin",
+    "Michael Stetsyuk",
     "Michail Safronov",
     "Michal Lisowski",
     "MicrochipQ",
@@ -879,6 +892,7 @@ const char * auto_contributors[] {
     "Pavlo Bashynskiy",
     "Pawel Rog",
     "Paweł Kudzia",
+    "Pazitiff9",
     "Peignon Melvyn",
     "Peng Jian",
     "Peng Liu",
@@ -1084,6 +1098,7 @@ const char * auto_contributors[] {
     "Tom Bombadil",
     "Tom Risse",
     "Tomas Barton",
+    "Tomer Shafir",
     "Tomáš Hromada",
     "Tristan",
     "Tsarkova Anastasia",
@@ -1123,6 +1138,7 @@ const char * auto_contributors[] {
     "Victor Krasnov",
     "Victor Tarnavsky",
     "Viktor Taranenko",
+    "Vinay Suryadevara",
     "Vincent",
     "Vincent Bernat",
     "Vitalii S",
@@ -1162,6 +1178,9 @@ const char * auto_contributors[] {
     "Vladislav Smirnov",
     "Vladislav V",
     "Vojtech Splichal",
+    "Volodya",
+    "Volodya Giro",
+    "Volodyachan",
     "Volodymyr Kuznetsov",
     "Vsevolod Orlov",
     "Vxider",
@@ -1179,6 +1198,7 @@ const char * auto_contributors[] {
     "XenoAmess",
     "Xianda Ke",
     "Xiang Zhou",
+    "Xiaofei Hu",
     "Xin Wang",
     "Xoel Lopez Barata",
     "Xudong Zhang",
@@ -1224,6 +1244,7 @@ const char * auto_contributors[] {
     "Zhipeng",
     "Zhuo Qiu",
     "Zijie Lu",
+    "Zimu Li",
     "Ziy1-Tan",
     "Zoran Pandovski",
     "[데이터플랫폼팀] 이호선",
@@ -1490,6 +1511,7 @@ const char * auto_contributors[] {
     "jiyoungyoooo",
     "jktng",
     "jkuklis",
+    "joe09@foxmail.com",
     "joelynch",
     "johanngan",
     "johnnymatthews",
@@ -1658,6 +1680,7 @@ const char * auto_contributors[] {
     "ongkong",
     "orantius",
     "p0ny",
+    "p1rattttt",
     "palasonicq",
     "palegre-tiny",
     "pawelsz-rb",
@@ -1667,6 +1690,7 @@ const char * auto_contributors[] {
     "pedro.riera",
     "pengxiangcai",
     "peshkurov",
+    "pet74alex",
     "peter279k",
     "philip.han",
     "pingyu",
@@ -1680,6 +1704,7 @@ const char * auto_contributors[] {
     "pyos",
     "pzhdfy",
     "qaziqarta",
+    "qiangxuhui",
     "qianlixiang",
     "qianmoQ",
     "qieqieplus",
@@ -1793,6 +1818,7 @@ const char * auto_contributors[] {
     "unknown",
     "urgordeadbeef",
     "usurai",
+    "v01dxyz",
     "vahid-sohrabloo",
     "vdimir",
     "velavokr",
@@ -1802,6 +1828,7 @@ const char * auto_contributors[] {
     "vic",
     "vicdashkov",
     "vicgao",
+    "vinay92-ch",
     "vinity",
     "vitac",
     "vitstn",
@@ -1818,6 +1845,7 @@ const char * auto_contributors[] {
     "weeds085490",
     "whysage",
     "wineternity",
+    "woodlzm",
     "wuxiaobai24",
     "wxybear",
     "wzl",
@@ -1877,6 +1905,7 @@ const char * auto_contributors[] {
     "zhenjial",
     "zhifeng",
     "zhongyuankai",
+    "zhou",
     "zhoubintao",
     "zhukai",
     "zimv",
@@ -1891,6 +1920,7 @@ const char * auto_contributors[] {
     "zxealous",
     "zy-kkk",
     "zzsmdfj",
+    "zzyReal666",
     "Šimon Podlipský",
     "Александр",
     "Александр Нам",

From 299f0886bfda27e375be3edf9042af513cbf99c8 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 23 May 2024 13:48:17 +0200
Subject: [PATCH 273/392] Followup for #63691

---
 src/Processors/Transforms/SquashingChunksTransform.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp
index 267490dc89e..ed67dd508f3 100644
--- a/src/Processors/Transforms/SquashingChunksTransform.cpp
+++ b/src/Processors/Transforms/SquashingChunksTransform.cpp
@@ -71,7 +71,9 @@ Chunk SimpleSquashingChunksTransform::generate()
     if (squashed_chunk.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform");
 
-    return std::move(squashed_chunk);
+    Chunk result_chunk;
+    result_chunk.swap(squashed_chunk);
+    return result_chunk;
 }
 
 bool SimpleSquashingChunksTransform::canGenerate()
@@ -83,7 +85,10 @@ Chunk SimpleSquashingChunksTransform::getRemaining()
 {
     Block current_block = squashing.add({});
     squashed_chunk.setColumns(current_block.getColumns(), current_block.rows());
-    return std::move(squashed_chunk);
+
+    Chunk result_chunk;
+    result_chunk.swap(squashed_chunk);
+    return result_chunk;
 }
 
 }

From f1c191a3cb2d2037de4346683fbc90a58a98a8a6 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 May 2024 13:48:23 +0200
Subject: [PATCH 274/392] Better

---
 .../ObjectStorage/Azure/Configuration.cpp     |  4 ++++
 .../ObjectStorage/ReadBufferIterator.cpp      | 23 +++++++++++-------
 .../ObjectStorage/ReadBufferIterator.h        |  3 ++-
 .../StorageObjectStorageSource.cpp            | 20 +++++++---------
 .../StorageObjectStorageSource.h              |  5 ++--
 src/Storages/S3Queue/S3QueueSource.cpp        | 24 ++++++++++---------
 6 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp
index cca94488a30..ada3e2e9323 100644
--- a/src/Storages/ObjectStorage/Azure/Configuration.cpp
+++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp
@@ -100,6 +100,10 @@ AzureObjectStorage::SettingsPtr StorageAzureConfiguration::createSettings(Contex
     settings_ptr->max_single_part_upload_size = context_settings.azure_max_single_part_upload_size;
     settings_ptr->max_single_read_retries = context_settings.azure_max_single_read_retries;
     settings_ptr->list_object_keys_size = static_cast<int32_t>(context_settings.azure_list_object_keys_size);
+    settings_ptr->strict_upload_part_size = context_settings.azure_strict_upload_part_size;
+    settings_ptr->max_upload_part_size = context_settings.azure_max_upload_part_size;
+    settings_ptr->max_blocks_in_multipart_upload = context_settings.azure_max_blocks_in_multipart_upload;
+    settings_ptr->min_upload_part_size = context_settings.azure_min_upload_part_size;
     return settings_ptr;
 }
 
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 5a8a4735fe1..50d69129883 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -35,9 +35,10 @@ ReadBufferIterator::ReadBufferIterator(
         format = configuration->format;
 }
 
-SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const String & path, const String & format_name) const
+SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const
 {
-    auto source = std::filesystem::path(configuration->getDataSourceDescription()) / path;
+    chassert(!object_info.getPath().starts_with("/"));
+    auto source = std::filesystem::path(configuration->getDataSourceDescription()) / object_info.getPath();
     return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext());
 }
 
@@ -50,6 +51,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const
         std::back_inserter(sources),
         [&](const auto & elem)
         {
+            chassert(!elem->getPath().starts_with("/"));
             return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath();
         });
     return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext());
@@ -78,7 +80,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
 
         if (format)
         {
-            auto cache_key = getKeyForSchemaCache(object_info->getPath(), *format);
+            const auto cache_key = getKeyForSchemaCache(*object_info, *format);
             if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
                 return columns;
         }
@@ -89,7 +91,7 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
             /// If we have such entry for some format, we can use this format to read the file.
             for (const auto & format_name : FormatFactory::instance().getAllInputFormats())
             {
-                auto cache_key = getKeyForSchemaCache(object_info->getPath(), format_name);
+                const auto cache_key = getKeyForSchemaCache(*object_info, format_name);
                 if (auto columns = schema_cache.tryGetColumns(cache_key, get_last_mod_time))
                 {
                     /// Now format is known. It should be the same for all files.
@@ -99,14 +101,13 @@ std::optional<ColumnsDescription> ReadBufferIterator::tryGetColumnsFromCache(
             }
         }
     }
-
     return std::nullopt;
 }
 
 void ReadBufferIterator::setNumRowsToLastFile(size_t num_rows)
 {
     if (query_settings.schema_inference_use_cache)
-        schema_cache.addNumRows(getKeyForSchemaCache(current_object_info->getPath(), *format), num_rows);
+        schema_cache.addNumRows(getKeyForSchemaCache(*current_object_info, *format), num_rows);
 }
 
 void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
@@ -114,7 +115,7 @@ void ReadBufferIterator::setSchemaToLastFile(const ColumnsDescription & columns)
     if (query_settings.schema_inference_use_cache
         && query_settings.schema_inference_mode == SchemaInferenceMode::UNION)
     {
-        schema_cache.addColumns(getKeyForSchemaCache(current_object_info->getPath(), *format), columns);
+        schema_cache.addColumns(getKeyForSchemaCache(*current_object_info, *format), columns);
     }
 }
 
@@ -135,7 +136,7 @@ void ReadBufferIterator::setFormatName(const String & format_name)
 String ReadBufferIterator::getLastFileName() const
 {
     if (current_object_info)
-        return current_object_info->getFileName();
+        return current_object_info->getPath();
     else
         return "";
 }
@@ -255,17 +256,21 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             }
         }
 
+        LOG_TEST(getLogger("KSSENII"), "Will read columns from {}", current_object_info->getPath());
+
         std::unique_ptr<ReadBuffer> read_buf;
         CompressionMethod compression_method;
         using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive;
         if (const auto * object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
         {
-            compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
+            LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from archive", current_object_info->getPath());
+            compression_method = chooseCompressionMethod(filename, configuration->compression_method);
             const auto & archive_reader = object_info_in_archive->archive_reader;
             read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
         }
         else
         {
+            LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from s3", current_object_info->getPath());
             compression_method = chooseCompressionMethod(filename, configuration->compression_method);
             read_buf = object_storage->readObject(
                 StoredObject(current_object_info->getPath()),
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.h b/src/Storages/ObjectStorage/ReadBufferIterator.h
index 287e316e243..6eeb52ec2ed 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.h
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.h
@@ -13,6 +13,7 @@ public:
     using FileIterator = std::shared_ptr<StorageObjectStorageSource::IIterator>;
     using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
     using ObjectInfoPtr = StorageObjectStorage::ObjectInfoPtr;
+    using ObjectInfo = StorageObjectStorage::ObjectInfo;
     using ObjectInfos = StorageObjectStorage::ObjectInfos;
 
     ReadBufferIterator(
@@ -41,7 +42,7 @@ public:
     std::unique_ptr<ReadBuffer> recreateLastReadBuffer() override;
 
 private:
-    SchemaCache::Key getKeyForSchemaCache(const String & path, const String & format_name) const;
+    SchemaCache::Key getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const;
     SchemaCache::Keys getKeysForSchemaCache() const;
     std::optional<ColumnsDescription> tryGetColumnsFromCache(
         const ObjectInfos::iterator & begin, const ObjectInfos::iterator & end);
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index a2b3ca5b69e..7332574b246 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -183,14 +183,14 @@ Chunk StorageObjectStorageSource::generate()
             VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
                 chunk,
                 read_from_format_info.requested_virtual_columns,
-                fs::path(configuration->getNamespace()) / reader.getRelativePath(),
+                fs::path(configuration->getNamespace()) / reader.getObjectInfo().getPath(),
                 object_info.metadata->size_bytes, &filename);
 
             return chunk;
         }
 
         if (reader.getInputFormat() && getContext()->getSettingsRef().use_cache_for_count_from_files)
-            addNumRowsToCache(reader.getRelativePath(), total_rows_in_file);
+            addNumRowsToCache(reader.getObjectInfo(), total_rows_in_file);
 
         total_rows_in_file = 0;
 
@@ -209,29 +209,28 @@ Chunk StorageObjectStorageSource::generate()
     return {};
 }
 
-void StorageObjectStorageSource::addNumRowsToCache(const String & path, size_t num_rows)
+void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows)
 {
     const auto cache_key = getKeyForSchemaCache(
-        fs::path(configuration->getDataSourceDescription()) / path,
+        fs::path(configuration->getDataSourceDescription()) / object_info.getPath(),
         configuration->format,
         format_settings,
         getContext());
-
     schema_cache.addNumRows(cache_key, num_rows);
 }
 
-std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfoPtr & object_info)
+std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info)
 {
     const auto cache_key = getKeyForSchemaCache(
-        fs::path(configuration->getDataSourceDescription()) / object_info->getPath(),
+        fs::path(configuration->getDataSourceDescription()) / object_info.getPath(),
         configuration->format,
         format_settings,
         getContext());
 
     auto get_last_mod_time = [&]() -> std::optional<time_t>
     {
-        return object_info->metadata
-            ? std::optional<size_t>(object_info->metadata->last_modified.epochTime())
+        return object_info.metadata
+            ? std::optional<size_t>(object_info.metadata->last_modified.epochTime())
             : std::nullopt;
     };
     return schema_cache.tryGetNumRows(cache_key, get_last_mod_time);
@@ -263,7 +262,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
 
     std::optional<size_t> num_rows_from_cache = need_only_count
         && getContext()->getSettingsRef().use_cache_for_count_from_files
-        ? tryGetNumRowsFromCache(object_info)
+        ? tryGetNumRowsFromCache(*object_info)
         : std::nullopt;
 
     if (num_rows_from_cache)
@@ -505,7 +504,6 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
 
         index = 0;
 
-        LOG_TEST(logger, "Filter: {}", filter_dag != nullptr);
         if (filter_dag)
         {
             std::vector<String> paths;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 8dbb31fdfba..e9635ff4dce 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -94,7 +94,6 @@ protected:
         PullingPipelineExecutor * operator->() { return reader.get(); }
         const PullingPipelineExecutor * operator->() const { return reader.get(); }
 
-        std::string getRelativePath() const { return object_info->getPath(); }
         const ObjectInfo & getObjectInfo() const { return *object_info; }
         const IInputFormat * getInputFormat() const { return dynamic_cast<const IInputFormat *>(source.get()); }
 
@@ -115,8 +114,8 @@ protected:
     std::future<ReaderHolder> createReaderAsync(size_t processor = 0);
     std::unique_ptr<ReadBuffer> createReadBuffer(const ObjectInfo & object_info);
 
-    void addNumRowsToCache(const String & path, size_t num_rows);
-    std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfoPtr & object_info);
+    void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows);
+    std::optional<size_t> tryGetNumRowsFromCache(const ObjectInfo & object_info);
     void lazyInitialize(size_t processor);
 };
 
diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp
index 458f681d7b5..c8aaece0711 100644
--- a/src/Storages/S3Queue/S3QueueSource.cpp
+++ b/src/Storages/S3Queue/S3QueueSource.cpp
@@ -238,12 +238,14 @@ Chunk StorageS3QueueSource::generate()
                              key_with_info->relative_path, getCurrentExceptionMessage(true));
                 }
 
-                appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
+                appendLogElement(reader.getObjectInfo().getPath(), *file_status, processed_rows_from_file, false);
             }
 
             break;
         }
 
+        const auto & path = reader.getObjectInfo().getPath();
+
         if (shutdown_called)
         {
             if (processed_rows_from_file == 0)
@@ -253,7 +255,7 @@ Chunk StorageS3QueueSource::generate()
             {
                 LOG_DEBUG(
                     log, "Table is being dropped, {} rows are already processed from {}, but file is not fully processed",
-                    processed_rows_from_file, reader.getRelativePath());
+                    processed_rows_from_file, path);
 
                 try
                 {
@@ -265,7 +267,7 @@ Chunk StorageS3QueueSource::generate()
                               key_with_info->relative_path, getCurrentExceptionMessage(true));
                 }
 
-                appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
+                appendLogElement(path, *file_status, processed_rows_from_file, false);
 
                 /// Leave the file half processed. Table is being dropped, so we do not care.
                 break;
@@ -273,7 +275,7 @@ Chunk StorageS3QueueSource::generate()
 
             LOG_DEBUG(log, "Shutdown called, but file {} is partially processed ({} rows). "
                      "Will process the file fully and then shutdown",
-                     reader.getRelativePath(), processed_rows_from_file);
+                     path, processed_rows_from_file);
         }
 
         auto * prev_scope = CurrentThread::get().attachProfileCountersScope(&file_status->profile_counters);
@@ -287,31 +289,31 @@ Chunk StorageS3QueueSource::generate()
             Chunk chunk;
             if (reader->pull(chunk))
             {
-                LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), reader.getRelativePath());
+                LOG_TEST(log, "Read {} rows from file: {}", chunk.getNumRows(), path);
 
                 file_status->processed_rows += chunk.getNumRows();
                 processed_rows_from_file += chunk.getNumRows();
 
                 VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
-                    chunk, requested_virtual_columns, reader.getRelativePath(), reader.getObjectInfo().metadata->size_bytes);
+                    chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes);
                 return chunk;
             }
         }
         catch (...)
         {
             const auto message = getCurrentExceptionMessage(true);
-            LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", reader.getRelativePath(), message);
+            LOG_ERROR(log, "Got an error while pulling chunk. Will set file {} as failed. Error: {} ", path, message);
 
             files_metadata->setFileFailed(key_with_info->processing_holder, message);
 
-            appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, false);
+            appendLogElement(path, *file_status, processed_rows_from_file, false);
             throw;
         }
 
         files_metadata->setFileProcessed(key_with_info->processing_holder);
-        applyActionAfterProcessing(reader.getRelativePath());
+        applyActionAfterProcessing(path);
 
-        appendLogElement(reader.getRelativePath(), *file_status, processed_rows_from_file, true);
+        appendLogElement(path, *file_status, processed_rows_from_file, true);
         file_status.reset();
         processed_rows_from_file = 0;
 
@@ -327,7 +329,7 @@ Chunk StorageS3QueueSource::generate()
         if (!reader)
             break;
 
-        file_status = files_metadata->getFileStatus(reader.getRelativePath());
+        file_status = files_metadata->getFileStatus(reader.getObjectInfo().getPath());
 
         /// Even if task is finished the thread may be not freed in pool.
         /// So wait until it will be freed before scheduling a new task.

From c150c20512afef6ae816606f197b1ab0a2160712 Mon Sep 17 00:00:00 2001
From: Sema Checherinda <Sema.Checherinda@clickhouse.com>
Date: Thu, 23 May 2024 13:53:36 +0200
Subject: [PATCH 275/392] adjust tests in test_merge_tree_s3

---
 tests/integration/test_merge_tree_s3/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py
index 9216b08f942..0bf81e81383 100644
--- a/tests/integration/test_merge_tree_s3/test.py
+++ b/tests/integration/test_merge_tree_s3/test.py
@@ -857,9 +857,9 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli
     error = node.query_and_get_error(
         "OPTIMIZE TABLE test_merge_canceled_by_s3_errors FINAL",
     )
-    assert "ExpectedError Message: mock s3 injected error" in error, error
+    assert "ExpectedError Message: mock s3 injected unretryable error" in error, error
 
-    node.wait_for_log_line("ExpectedError Message: mock s3 injected error")
+    node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error")
 
     table_uuid = node.query(
         "SELECT uuid FROM system.tables WHERE database = 'default' AND name = 'test_merge_canceled_by_s3_errors' LIMIT 1"
@@ -867,7 +867,7 @@ def test_merge_canceled_by_s3_errors(cluster, broken_s3, node_name, storage_poli
 
     node.query("SYSTEM FLUSH LOGS")
     error_count_in_blob_log = node.query(
-        f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected error%'"
+        f"SELECT count() FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' AND error like '%mock s3 injected unretryable error%'"
     ).strip()
     assert int(error_count_in_blob_log) > 0, node.query(
         f"SELECT * FROM system.blob_storage_log WHERE query_id like '{table_uuid}::%' FORMAT PrettyCompactMonoBlock"
@@ -911,7 +911,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name):
 
     node.query("OPTIMIZE TABLE merge_canceled_by_s3_errors_when_move FINAL")
 
-    node.wait_for_log_line("ExpectedError Message: mock s3 injected error")
+    node.wait_for_log_line("ExpectedError Message: mock s3 injected unretryable error")
 
     count = node.query("SELECT count() FROM merge_canceled_by_s3_errors_when_move")
     assert int(count) == 2000, count

From ce26c4f65746ec3058f1639f83b675feef4fda1c Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 13:54:45 +0200
Subject: [PATCH 276/392] =?UTF-8?q?Review=20changes=20and=20replace=20?=
 =?UTF-8?q?=E2=80=A6=20with=20...?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../template-setting.md                       |  2 +-
 docs/changelogs/v20.7.1.4310-prestable.md     |  2 +-
 docs/changelogs/v21.12.1.9017-prestable.md    |  2 +-
 docs/changelogs/v21.3.3.14-lts.md             |  2 +-
 docs/changelogs/v21.4.1.6422-prestable.md     |  2 +-
 docs/changelogs/v21.4.2.10-prestable.md       |  2 +-
 docs/changelogs/v22.6.1.1985-stable.md        |  4 +-
 docs/changelogs/v22.7.1.2484-stable.md        |  2 +-
 docs/changelogs/v22.8.13.20-lts.md            |  2 +-
 docs/changelogs/v23.11.1.2711-stable.md       |  2 +-
 docs/changelogs/v23.12.1.1368-stable.md       |  2 +-
 docs/changelogs/v23.3.1.2823-lts.md           |  2 +-
 docs/changelogs/v23.5.1.3174-stable.md        |  2 +-
 docs/changelogs/v23.8.1.2992-lts.md           |  2 +-
 docs/changelogs/v24.1.3.31-stable.md          |  2 +-
 docs/changelogs/v24.2.1.2248-stable.md        |  2 +-
 docs/changelogs/v24.3.1.2672-lts.md           |  2 +-
 docs/en/development/style.md                  |  6 +-
 .../table-engines/integrations/hdfs.md        |  2 +-
 .../engines/table-engines/integrations/s3.md  |  2 +-
 .../custom-partitioning-key.md                |  2 +-
 .../mergetree-family/mergetree.md             |  4 +-
 .../table-engines/special/external-data.md    |  2 +-
 .../operations/settings/query-complexity.md   |  4 +-
 docs/en/operations/settings/settings.md       |  2 +-
 .../parametric-functions.md                   |  4 +-
 .../reference/quantiles.md                    |  2 +-
 .../data-types/aggregatefunction.md           |  4 +-
 .../sql-reference/data-types/fixedstring.md   |  4 +-
 .../nested-data-structures/index.md           |  2 +-
 .../data-types/simpleaggregatefunction.md     |  2 +-
 .../functions/arithmetic-functions.md         | 54 ++++++++++++
 .../functions/array-functions.md              | 84 +++++++++----------
 .../functions/date-time-functions.md          |  2 +-
 .../sql-reference/functions/json-functions.md | 24 +++---
 .../functions/other-functions.md              | 62 +-------------
 .../functions/string-replace-functions.md     |  2 +-
 .../functions/string-search-functions.md      | 12 +--
 .../functions/tuple-functions.md              |  6 +-
 .../functions/tuple-map-functions.md          |  4 +-
 .../sql-reference/functions/url-functions.md  |  2 +-
 .../sql-reference/statements/alter/comment.md |  2 +-
 .../sql-reference/statements/alter/delete.md  |  2 +-
 .../sql-reference/statements/alter/index.md   |  2 +-
 .../sql-reference/statements/alter/update.md  |  2 +-
 .../en/sql-reference/statements/alter/view.md |  6 +-
 .../sql-reference/statements/create/view.md   |  2 +-
 .../sql-reference/statements/insert-into.md   |  2 +-
 .../sql-reference/statements/select/limit.md  |  4 +-
 .../statements/select/order-by.md             |  2 +-
 docs/en/sql-reference/table-functions/file.md |  2 +-
 docs/en/sql-reference/table-functions/gcs.md  |  2 +-
 docs/en/sql-reference/table-functions/hdfs.md |  2 +-
 docs/en/sql-reference/table-functions/s3.md   |  2 +-
 docs/ru/development/style.md                  |  8 +-
 .../table-engines/integrations/hdfs.md        |  2 +-
 .../engines/table-engines/integrations/s3.md  |  2 +-
 .../custom-partitioning-key.md                |  2 +-
 .../mergetree-family/mergetree.md             |  4 +-
 .../table-engines/special/external-data.md    |  2 +-
 docs/ru/faq/general/olap.md                   |  6 +-
 .../example-datasets/nyc-taxi.md              |  2 +-
 docs/ru/index.md                              | 12 +--
 .../operations/settings/query-complexity.md   |  4 +-
 docs/ru/operations/settings/settings.md       |  2 +-
 .../parametric-functions.md                   |  4 +-
 .../reference/quantiles.md                    |  2 +-
 .../data-types/aggregatefunction.md           |  4 +-
 .../sql-reference/data-types/fixedstring.md   |  4 +-
 .../nested-data-structures/nested.md          |  2 +-
 docs/ru/sql-reference/data-types/tuple.md     |  2 +-
 .../functions/array-functions.md              | 40 ++++-----
 .../functions/date-time-functions.md          |  2 +-
 .../sql-reference/functions/json-functions.md | 24 +++---
 .../functions/other-functions.md              |  2 +-
 .../functions/string-functions.md             |  2 +-
 .../functions/string-search-functions.md      | 18 ++--
 .../functions/tuple-functions.md              |  6 +-
 .../sql-reference/functions/url-functions.md  |  2 +-
 .../sql-reference/statements/alter/comment.md |  2 +-
 .../sql-reference/statements/alter/delete.md  |  2 +-
 .../sql-reference/statements/alter/index.md   |  2 +-
 .../sql-reference/statements/alter/update.md  |  2 +-
 .../ru/sql-reference/statements/alter/view.md |  4 +-
 .../sql-reference/statements/create/view.md   |  2 +-
 .../sql-reference/statements/insert-into.md   |  2 +-
 docs/ru/sql-reference/table-functions/file.md |  2 +-
 docs/ru/sql-reference/table-functions/s3.md   |  2 +-
 docs/zh/changelog/index.md                    |  4 +-
 docs/zh/development/style.md                  |  8 +-
 .../table-engines/integrations/hdfs.md        |  2 +-
 .../engines/table-engines/integrations/s3.md  |  4 +-
 .../custom-partitioning-key.md                |  2 +-
 .../mergetree-family/mergetree.md             |  4 +-
 .../table-engines/special/external-data.md    |  2 +-
 docs/zh/faq/general/olap.md                   |  6 +-
 .../example-datasets/nyc-taxi.md              |  2 +-
 .../example-datasets/uk-price-paid.mdx        |  2 +-
 .../sparse-primary-indexes.md                 |  2 +-
 docs/zh/index.md                              | 12 +--
 .../operations/settings/query-complexity.md   |  4 +-
 docs/zh/operations/settings/settings.md       |  2 +-
 .../operations/system-tables/dictionaries.md  |  2 +-
 .../parametric-functions.md                   |  4 +-
 .../reference/quantiles.md                    |  2 +-
 .../data-types/aggregatefunction.md           |  2 +-
 .../sql-reference/data-types/domains/index.md |  4 +-
 .../sql-reference/data-types/fixedstring.md   |  4 +-
 .../nested-data-structures/nested.md          |  2 +-
 .../data-types/simpleaggregatefunction.md     |  2 +-
 docs/zh/sql-reference/data-types/tuple.md     |  2 +-
 .../functions/array-functions.md              | 40 ++++-----
 .../functions/date-time-functions.md          |  2 +-
 .../functions/higher-order-functions.md       | 22 ++---
 .../sql-reference/functions/in-functions.md   |  4 +-
 .../sql-reference/functions/json-functions.md | 24 +++---
 .../functions/other-functions.md              |  2 +-
 .../functions/string-functions.md             |  6 +-
 .../functions/string-search-functions.md      | 18 ++--
 .../sql-reference/functions/url-functions.md  |  2 +-
 .../sql-reference/statements/alter/delete.md  |  2 +-
 .../sql-reference/statements/alter/index.md   |  2 +-
 .../sql-reference/statements/alter/update.md  |  2 +-
 .../zh/sql-reference/statements/alter/view.md |  4 +-
 .../sql-reference/statements/create/view.md   |  2 +-
 .../sql-reference/statements/insert-into.md   |  2 +-
 .../sql-reference/statements/select/limit.md  |  4 +-
 .../statements/select/order-by.md             |  2 +-
 docs/zh/sql-reference/table-functions/file.md |  2 +-
 docs/zh/sql-reference/table-functions/hdfs.md |  2 +-
 docs/zh/sql-reference/table-functions/s3.md   |  2 +-
 131 files changed, 384 insertions(+), 384 deletions(-)

diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md
index fc912aba3e1..f4525d872df 100644
--- a/docs/_description_templates/template-setting.md
+++ b/docs/_description_templates/template-setting.md
@@ -2,7 +2,7 @@
 
 Description.
 
-For the switch setting, use the typical phrase: “Enables or disables something …”.
+For the switch setting, use the typical phrase: “Enables or disables something ...”.
 
 Possible values:
 
diff --git a/docs/changelogs/v20.7.1.4310-prestable.md b/docs/changelogs/v20.7.1.4310-prestable.md
index f47c7334228..aa1d993b263 100644
--- a/docs/changelogs/v20.7.1.4310-prestable.md
+++ b/docs/changelogs/v20.7.1.4310-prestable.md
@@ -166,4 +166,4 @@
 * NO CL ENTRY:  'Revert "Abort on std::out_of_range in debug builds"'. [#12752](https://github.com/ClickHouse/ClickHouse/pull/12752) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * NO CL ENTRY:  'Bump protobuf from 3.12.2 to 3.12.4 in /docs/tools'. [#13102](https://github.com/ClickHouse/ClickHouse/pull/13102) ([dependabot-preview[bot]](https://github.com/apps/dependabot-preview)).
 * NO CL ENTRY:  'Merge [#12574](https://github.com/ClickHouse/ClickHouse/issues/12574)'. [#13158](https://github.com/ClickHouse/ClickHouse/pull/13158) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* NO CL ENTRY:  'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer…"'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Add QueryTimeMicroseconds, SelectQueryTimeMicroseconds and InsertQuer..."'. [#13303](https://github.com/ClickHouse/ClickHouse/pull/13303) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
diff --git a/docs/changelogs/v21.12.1.9017-prestable.md b/docs/changelogs/v21.12.1.9017-prestable.md
index 88b8260e312..bd84873e67a 100644
--- a/docs/changelogs/v21.12.1.9017-prestable.md
+++ b/docs/changelogs/v21.12.1.9017-prestable.md
@@ -421,5 +421,5 @@ sidebar_label: 2022
 * Fix possible crash in DataTypeAggregateFunction [#32287](https://github.com/ClickHouse/ClickHouse/pull/32287) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
 * Update backport.py [#32323](https://github.com/ClickHouse/ClickHouse/pull/32323) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fix graphite-bench build [#32351](https://github.com/ClickHouse/ClickHouse/pull/32351) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
-* Revert "graphite: split tagged/plain rollup rules (for merges perfoma… [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Revert "graphite: split tagged/plain rollup rules (for merges perfoma... [#32376](https://github.com/ClickHouse/ClickHouse/pull/32376) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Another attempt to fix unit test Executor::RemoveTasksStress [#32390](https://github.com/ClickHouse/ClickHouse/pull/32390) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
diff --git a/docs/changelogs/v21.3.3.14-lts.md b/docs/changelogs/v21.3.3.14-lts.md
index 57bde602f21..91d99deaa6b 100644
--- a/docs/changelogs/v21.3.3.14-lts.md
+++ b/docs/changelogs/v21.3.3.14-lts.md
@@ -18,4 +18,4 @@ sidebar_label: 2022
 
 #### NOT FOR CHANGELOG / INSIGNIFICANT
 
-* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
diff --git a/docs/changelogs/v21.4.1.6422-prestable.md b/docs/changelogs/v21.4.1.6422-prestable.md
index 2eadb0d4754..66937c3be15 100644
--- a/docs/changelogs/v21.4.1.6422-prestable.md
+++ b/docs/changelogs/v21.4.1.6422-prestable.md
@@ -223,7 +223,7 @@ sidebar_label: 2022
 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)).
 * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)).
 * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)).
 * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)).
diff --git a/docs/changelogs/v21.4.2.10-prestable.md b/docs/changelogs/v21.4.2.10-prestable.md
index 3db17ddfcf3..b9bdbd80c0c 100644
--- a/docs/changelogs/v21.4.2.10-prestable.md
+++ b/docs/changelogs/v21.4.2.10-prestable.md
@@ -226,7 +226,7 @@ sidebar_label: 2022
 * Do not overlap zookeeper path for ReplicatedMergeTree in stateless *.sh tests [#21724](https://github.com/ClickHouse/ClickHouse/pull/21724) ([Azat Khuzhin](https://github.com/azat)).
 * make the fuzzer use sources from the CI [#21754](https://github.com/ClickHouse/ClickHouse/pull/21754) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * Add one more variant to memcpy benchmark [#21759](https://github.com/ClickHouse/ClickHouse/pull/21759) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* fix incorrect number of rows for Chunks with no columns in PartialSor… [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
+* fix incorrect number of rows for Chunks with no columns in PartialSor... [#21761](https://github.com/ClickHouse/ClickHouse/pull/21761) ([Alexander Kuzmenkov](https://github.com/akuzm)).
 * docs(fix): typo [#21775](https://github.com/ClickHouse/ClickHouse/pull/21775) ([Ali Demirci](https://github.com/depyronick)).
 * DDLWorker.cpp: fixed exceeded amount of tries typo [#21807](https://github.com/ClickHouse/ClickHouse/pull/21807) ([Eldar Nasyrov](https://github.com/3ldar-nasyrov)).
 * fix integration MaterializeMySQL test [#21819](https://github.com/ClickHouse/ClickHouse/pull/21819) ([TCeason](https://github.com/TCeason)).
diff --git a/docs/changelogs/v22.6.1.1985-stable.md b/docs/changelogs/v22.6.1.1985-stable.md
index c915d24fe00..7bd7038377a 100644
--- a/docs/changelogs/v22.6.1.1985-stable.md
+++ b/docs/changelogs/v22.6.1.1985-stable.md
@@ -160,7 +160,7 @@ sidebar_label: 2022
 * fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)).
 * The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)).
 * Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)).
-* Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)).
+* Do not drop the inner target table when executing `ALTER TABLE ... MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)).
 * Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)).
 * Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)).
 * Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
@@ -180,7 +180,7 @@ sidebar_label: 2022
 #### NO CL ENTRY
 
 * NO CL ENTRY:  'Revert "Fix mutations in tables with columns of type `Object`"'. [#37355](https://github.com/ClickHouse/ClickHouse/pull/37355) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* NO CL ENTRY:  'Revert "Remove height restrictions from the query div in play web tool, and m…"'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* NO CL ENTRY:  'Revert "Remove height restrictions from the query div in play web tool, and m..."'. [#37501](https://github.com/ClickHouse/ClickHouse/pull/37501) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * NO CL ENTRY:  'Revert "Add support for preprocessing ZooKeeper operations in `clickhouse-keeper`"'. [#37534](https://github.com/ClickHouse/ClickHouse/pull/37534) ([Antonio Andelic](https://github.com/antonio2368)).
 * NO CL ENTRY:  'Revert "(only with zero-copy replication, non-production experimental feature not recommended to use) fix possible deadlock during fetching part"'. [#37545](https://github.com/ClickHouse/ClickHouse/pull/37545) ([Alexander Tokmakov](https://github.com/tavplubix)).
 * NO CL ENTRY:  'Revert "RFC: Fix converting types for UNION queries (may produce LOGICAL_ERROR)"'. [#37582](https://github.com/ClickHouse/ClickHouse/pull/37582) ([Dmitry Novik](https://github.com/novikd)).
diff --git a/docs/changelogs/v22.7.1.2484-stable.md b/docs/changelogs/v22.7.1.2484-stable.md
index 7464b0449ee..c4a76c66e0c 100644
--- a/docs/changelogs/v22.7.1.2484-stable.md
+++ b/docs/changelogs/v22.7.1.2484-stable.md
@@ -410,7 +410,7 @@ sidebar_label: 2022
 * Add test for [#39132](https://github.com/ClickHouse/ClickHouse/issues/39132) [#39173](https://github.com/ClickHouse/ClickHouse/pull/39173) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Suppression for BC check (`Cannot parse string 'Hello' as UInt64`) [#39176](https://github.com/ClickHouse/ClickHouse/pull/39176) ([Alexander Tokmakov](https://github.com/tavplubix)).
 * Fix 01961_roaring_memory_tracking test [#39187](https://github.com/ClickHouse/ClickHouse/pull/39187) ([Dmitry Novik](https://github.com/novikd)).
-* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on … [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)).
+* Cleanup: done during [#38719](https://github.com/ClickHouse/ClickHouse/issues/38719) (SortingStep: deduce way to sort based on ... [#39191](https://github.com/ClickHouse/ClickHouse/pull/39191) ([Igor Nikonov](https://github.com/devcrafter)).
 * Fix exception in AsynchronousMetrics for s390x [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)).
 * Optimize accesses to system.stack_trace (filter by name before sending signal) [#39212](https://github.com/ClickHouse/ClickHouse/pull/39212) ([Azat Khuzhin](https://github.com/azat)).
 * Enable warning "-Wdeprecated-dynamic-exception-spec" [#39213](https://github.com/ClickHouse/ClickHouse/pull/39213) ([Robert Schulze](https://github.com/rschu1ze)).
diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md
index 0734f40bf3e..ad44fbfc5d6 100644
--- a/docs/changelogs/v22.8.13.20-lts.md
+++ b/docs/changelogs/v22.8.13.20-lts.md
@@ -20,4 +20,4 @@ sidebar_label: 2023
 * Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)).
+* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un... [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)).
diff --git a/docs/changelogs/v23.11.1.2711-stable.md b/docs/changelogs/v23.11.1.2711-stable.md
index e32dee41dc7..0bdee08f5c9 100644
--- a/docs/changelogs/v23.11.1.2711-stable.md
+++ b/docs/changelogs/v23.11.1.2711-stable.md
@@ -217,7 +217,7 @@ sidebar_label: 2023
 * S3Queue minor fix [#56999](https://github.com/ClickHouse/ClickHouse/pull/56999) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fix file path validation for DatabaseFileSystem [#57029](https://github.com/ClickHouse/ClickHouse/pull/57029) ([San](https://github.com/santrancisco)).
 * Fix `fuzzBits` with `ARRAY JOIN` [#57033](https://github.com/ClickHouse/ClickHouse/pull/57033) ([Antonio Andelic](https://github.com/antonio2368)).
-* Fix Nullptr dereference in partial merge join with joined_subquery_re… [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)).
+* Fix Nullptr dereference in partial merge join with joined_subquery_re... [#57048](https://github.com/ClickHouse/ClickHouse/pull/57048) ([vdimir](https://github.com/vdimir)).
 * Fix race condition in RemoteSource [#57052](https://github.com/ClickHouse/ClickHouse/pull/57052) ([Raúl Marín](https://github.com/Algunenano)).
 * Implement `bitHammingDistance` for big integers [#57073](https://github.com/ClickHouse/ClickHouse/pull/57073) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * S3-style links bug fix [#57075](https://github.com/ClickHouse/ClickHouse/pull/57075) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
diff --git a/docs/changelogs/v23.12.1.1368-stable.md b/docs/changelogs/v23.12.1.1368-stable.md
index 1a322ae9c0f..cb8ba57100e 100644
--- a/docs/changelogs/v23.12.1.1368-stable.md
+++ b/docs/changelogs/v23.12.1.1368-stable.md
@@ -272,7 +272,7 @@ sidebar_label: 2023
 * Bump Azure to v1.6.0 [#58052](https://github.com/ClickHouse/ClickHouse/pull/58052) ([Robert Schulze](https://github.com/rschu1ze)).
 * Correct values for randomization [#58058](https://github.com/ClickHouse/ClickHouse/pull/58058) ([Anton Popov](https://github.com/CurtizJ)).
 * Non post request should be readonly [#58060](https://github.com/ClickHouse/ClickHouse/pull/58060) ([San](https://github.com/santrancisco)).
-* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test… [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)).
+* Revert "Merge pull request [#55710](https://github.com/ClickHouse/ClickHouse/issues/55710) from guoxiaolongzte/clickhouse-test... [#58066](https://github.com/ClickHouse/ClickHouse/pull/58066) ([Raúl Marín](https://github.com/Algunenano)).
 * fix typo in the test 02479 [#58072](https://github.com/ClickHouse/ClickHouse/pull/58072) ([Sema Checherinda](https://github.com/CheSema)).
 * Bump Azure to 1.7.2 [#58075](https://github.com/ClickHouse/ClickHouse/pull/58075) ([Robert Schulze](https://github.com/rschu1ze)).
 * Fix flaky test `02567_and_consistency` [#58076](https://github.com/ClickHouse/ClickHouse/pull/58076) ([Anton Popov](https://github.com/CurtizJ)).
diff --git a/docs/changelogs/v23.3.1.2823-lts.md b/docs/changelogs/v23.3.1.2823-lts.md
index 0c9be3601da..f81aba53ebe 100644
--- a/docs/changelogs/v23.3.1.2823-lts.md
+++ b/docs/changelogs/v23.3.1.2823-lts.md
@@ -520,7 +520,7 @@ sidebar_label: 2023
 * Improve script for updating clickhouse-docs [#48135](https://github.com/ClickHouse/ClickHouse/pull/48135) ([Alexander Tokmakov](https://github.com/tavplubix)).
 * Fix stdlib compatibility issues [#48150](https://github.com/ClickHouse/ClickHouse/pull/48150) ([DimasKovas](https://github.com/DimasKovas)).
 * Make test test_disallow_concurrency less flaky [#48152](https://github.com/ClickHouse/ClickHouse/pull/48152) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Remove unused mockSystemDatabase from gtest_transform_query_for_exter… [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)).
+* Remove unused mockSystemDatabase from gtest_transform_query_for_exter... [#48162](https://github.com/ClickHouse/ClickHouse/pull/48162) ([Vladimir C](https://github.com/vdimir)).
 * Update environmental-sensors.md [#48166](https://github.com/ClickHouse/ClickHouse/pull/48166) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Correctly handle NULL constants in logical optimizer for new analyzer [#48168](https://github.com/ClickHouse/ClickHouse/pull/48168) ([Antonio Andelic](https://github.com/antonio2368)).
 * Try making KeeperMap test more stable [#48170](https://github.com/ClickHouse/ClickHouse/pull/48170) ([Antonio Andelic](https://github.com/antonio2368)).
diff --git a/docs/changelogs/v23.5.1.3174-stable.md b/docs/changelogs/v23.5.1.3174-stable.md
index 2212eb6e893..4bdd4139afc 100644
--- a/docs/changelogs/v23.5.1.3174-stable.md
+++ b/docs/changelogs/v23.5.1.3174-stable.md
@@ -474,7 +474,7 @@ sidebar_label: 2023
 * Fix flakiness of test_distributed_load_balancing test [#49921](https://github.com/ClickHouse/ClickHouse/pull/49921) ([Azat Khuzhin](https://github.com/azat)).
 * Add some logging [#49925](https://github.com/ClickHouse/ClickHouse/pull/49925) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Support hardlinking parts transactionally [#49931](https://github.com/ClickHouse/ClickHouse/pull/49931) ([Michael Kolupaev](https://github.com/al13n321)).
-* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e… [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)).
+* Fix for analyzer: 02377_ optimize_sorting_by_input_stream_properties_e... [#49943](https://github.com/ClickHouse/ClickHouse/pull/49943) ([Igor Nikonov](https://github.com/devcrafter)).
 * Follow up to [#49429](https://github.com/ClickHouse/ClickHouse/issues/49429) [#49964](https://github.com/ClickHouse/ClickHouse/pull/49964) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fix flaky test_ssl_cert_authentication to use urllib3 [#49982](https://github.com/ClickHouse/ClickHouse/pull/49982) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
 * Fix woboq codebrowser build with -Wno-poison-system-directories [#49992](https://github.com/ClickHouse/ClickHouse/pull/49992) ([Azat Khuzhin](https://github.com/azat)).
diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md
index 7c224b19350..05385d9c52b 100644
--- a/docs/changelogs/v23.8.1.2992-lts.md
+++ b/docs/changelogs/v23.8.1.2992-lts.md
@@ -272,7 +272,7 @@ sidebar_label: 2023
 * Add more checks into ThreadStatus ctor. [#42019](https://github.com/ClickHouse/ClickHouse/pull/42019) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
 * Refactor Query Tree visitor [#46740](https://github.com/ClickHouse/ClickHouse/pull/46740) ([Dmitry Novik](https://github.com/novikd)).
 * Revert "Revert "Randomize JIT settings in tests"" [#48282](https://github.com/ClickHouse/ClickHouse/pull/48282) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau… [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau... [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Fix IN with decimal in analyzer [#48754](https://github.com/ClickHouse/ClickHouse/pull/48754) ([vdimir](https://github.com/vdimir)).
 * Some unclear change in StorageBuffer::reschedule() for something [#49723](https://github.com/ClickHouse/ClickHouse/pull/49723) ([DimasKovas](https://github.com/DimasKovas)).
 * MergeTree & SipHash checksum big-endian support [#50276](https://github.com/ClickHouse/ClickHouse/pull/50276) ([ltrk2](https://github.com/ltrk2)).
diff --git a/docs/changelogs/v24.1.3.31-stable.md b/docs/changelogs/v24.1.3.31-stable.md
index 046ca451fbc..e898fba5c87 100644
--- a/docs/changelogs/v24.1.3.31-stable.md
+++ b/docs/changelogs/v24.1.3.31-stable.md
@@ -13,7 +13,7 @@ sidebar_label: 2024
 
 #### Bug Fix (user-visible misbehavior in an official stable release)
 
-* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
 * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)).
 * Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
diff --git a/docs/changelogs/v24.2.1.2248-stable.md b/docs/changelogs/v24.2.1.2248-stable.md
index 6113dd51ab1..02affe12c43 100644
--- a/docs/changelogs/v24.2.1.2248-stable.md
+++ b/docs/changelogs/v24.2.1.2248-stable.md
@@ -130,7 +130,7 @@ sidebar_label: 2024
 * Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
 * Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
 * Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).
-* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix `ASTAlterCommand::formatImpl` in case of column specific settings... [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
 * Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)).
 * Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)).
 * Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
diff --git a/docs/changelogs/v24.3.1.2672-lts.md b/docs/changelogs/v24.3.1.2672-lts.md
index e5d008680a8..006ab941203 100644
--- a/docs/changelogs/v24.3.1.2672-lts.md
+++ b/docs/changelogs/v24.3.1.2672-lts.md
@@ -526,7 +526,7 @@ sidebar_label: 2024
 * No "please" [#61916](https://github.com/ClickHouse/ClickHouse/pull/61916) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Update version_date.tsv and changelogs after v23.12.6.19-stable [#61917](https://github.com/ClickHouse/ClickHouse/pull/61917) ([robot-clickhouse](https://github.com/robot-clickhouse)).
 * Update version_date.tsv and changelogs after v24.1.8.22-stable [#61918](https://github.com/ClickHouse/ClickHouse/pull/61918) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic… [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix flaky test_broken_projestions/test.py::test_broken_ignored_replic... [#61932](https://github.com/ClickHouse/ClickHouse/pull/61932) ([Kseniia Sumarokova](https://github.com/kssenii)).
 * Check is Rust avaiable for build, if not, suggest a way to disable Rust support [#61938](https://github.com/ClickHouse/ClickHouse/pull/61938) ([Azat Khuzhin](https://github.com/azat)).
 * CI: new ci menu in PR body [#61948](https://github.com/ClickHouse/ClickHouse/pull/61948) ([Max K.](https://github.com/maxknv)).
 * Remove flaky test `01193_metadata_loading` [#61961](https://github.com/ClickHouse/ClickHouse/pull/61961) ([Nikita Taranov](https://github.com/nickitat)).
diff --git a/docs/en/development/style.md b/docs/en/development/style.md
index 77a550f2a0e..1444bc0e452 100644
--- a/docs/en/development/style.md
+++ b/docs/en/development/style.md
@@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x));
 for (size_t i = 0; i < rows; i += storage.index_granularity)
 ```
 
-**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, …) and the ternary operator `?:`.
+**7.** Add spaces around binary operators (`+`, `-`, `*`, `/`, `%`, ...) and the ternary operator `?:`.
 
 ``` cpp
 UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
@@ -86,7 +86,7 @@ dst.ClickGoodEvent     = click.GoodEvent;
 
 If necessary, the operator can be wrapped to the next line. In this case, the offset in front of it is increased.
 
-**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, …) from the argument.
+**11.** Do not use a space to separate unary operators (`--`, `++`, `*`, `&`, ...) from the argument.
 
 **12.** Put a space after a comma, but not before it. The same rule goes for a semicolon inside a `for` expression.
 
@@ -115,7 +115,7 @@ public:
 
 **16.** If the same `namespace` is used for the entire file, and there isn’t anything else significant, an offset is not necessary inside `namespace`.
 
-**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, …
+**17.** If the block for an `if`, `for`, `while`, or other expression consists of a single `statement`, the curly brackets are optional. Place the `statement` on a separate line, instead. This rule is also valid for nested `if`, `for`, `while`, ...
 
 But if the inner `statement` contains curly brackets or `else`, the external block should be written in curly brackets.
 
diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md
index dbd1c270a4a..2749fa7e479 100644
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@@ -118,7 +118,7 @@ If the listing of files contains number ranges with leading zeros, use the const
 
 **Example**
 
-Create table with files named `file000`, `file001`, … , `file999`:
+Create table with files named `file000`, `file001`, ... , `file999`:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index dfa06801d04..cb1da1c8e68 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -178,7 +178,7 @@ If the listing of files contains number ranges with leading zeros, use the const
 
 **Example with wildcards 1**
 
-Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+Create table with files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32)
diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
index 23d98d4b20e..eda87fd06c1 100644
--- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -71,7 +71,7 @@ WHERE table = 'visits'
 └───────────┴───────────────────┴────────┘
 ```
 
-The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries.
+The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md) queries.
 
 The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query.
 
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index 7862eef69f8..a009c4a32f3 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -954,7 +954,7 @@ In the case of `MergeTree` tables, data is getting to disk in different ways:
 - As a result of an insert (`INSERT` query).
 - During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations).
 - When downloading from another replica.
-- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition).
+- As a result of partition freezing [ALTER TABLE ... FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition).
 
 In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy:
 
@@ -966,7 +966,7 @@ Under the hood, mutations and partition freezing make use of [hard links](https:
 In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
 Data is never transferred from the last one and into the first one. One may use system tables [system.part_log](/docs/en/operations/system-tables/part_log.md/#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](/docs/en/operations/system-tables/parts.md/#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
 
-User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
+User can force moving a part or a partition from one volume to another using the query [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](/docs/en/sql-reference/statements/alter/partition.md/#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
 
 Moving data does not interfere with data replication. Therefore, different storage policies can be specified for the same table on different replicas.
 
diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md
index 7ea3f3e30d6..f6d6dae7eb6 100644
--- a/docs/en/engines/table-engines/special/external-data.md
+++ b/docs/en/engines/table-engines/special/external-data.md
@@ -29,7 +29,7 @@ Only a single table can be retrieved from stdin.
 The following parameters are optional: **–name**– Name of the table. If omitted, _data is used.
 **–format** – Data format in the file. If omitted, TabSeparated is used.
 
-One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, …
+One of the following parameters is required:**–types** – A list of comma-separated column types. For example: `UInt64,String`. The columns will be named _1, _2, ...
 **–structure**– The table structure in the format`UserID UInt64`, `URL String`. Defines the column names and types.
 
 The files specified in ‘file’ will be parsed by the format specified in ‘format’, using the data types specified in ‘types’ or ‘structure’. The table will be uploaded to the server and accessible there as a temporary table with the name in ‘name’.
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index d86f18ff982..2a20e74e20f 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -303,7 +303,7 @@ What to do when the amount of data exceeds one of the limits: ‘throw’ or ‘
 
 Limits the number of rows in the hash table that is used when joining tables.
 
-This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine.
+This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine.
 
 If a query contains multiple joins, ClickHouse checks this setting for every intermediate result.
 
@@ -320,7 +320,7 @@ Default value: 0.
 
 Limits the size in bytes of the hash table used when joining tables.
 
-This setting applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
+This setting applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
 
 If the query contains joins, ClickHouse checks this setting for every intermediate result.
 
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 91b544c6a82..2b5cd11819a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -2248,7 +2248,7 @@ Default value: 0.
 
 ## count_distinct_implementation {#count_distinct_implementation}
 
-Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.
+Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.
 
 Possible values:
 
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index 8981ac1f752..1dc89b8dcf9 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -82,7 +82,7 @@ FROM
 
 In this case, you should remember that you do not know the histogram bin borders.
 
-## sequenceMatch(pattern)(timestamp, cond1, cond2, …)
+## sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
 
 Checks whether the sequence contains an event chain that matches the pattern.
 
@@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 - [sequenceCount](#function-sequencecount)
 
-## sequenceCount(pattern)(time, cond1, cond2, …)
+## sequenceCount(pattern)(time, cond1, cond2, ...)
 
 Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
 
diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
index e2a5bc53e32..856d447ac13 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
@@ -7,7 +7,7 @@ sidebar_position: 201
 
 ## quantiles
 
-Syntax: `quantiles(level1, level2, …)(x)`
+Syntax: `quantiles(level1, level2, ...)(x)`
 
 All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDD`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
 
diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md
index 87511a505dc..37f0d0e50ae 100644
--- a/docs/en/sql-reference/data-types/aggregatefunction.md
+++ b/docs/en/sql-reference/data-types/aggregatefunction.md
@@ -6,9 +6,9 @@ sidebar_label: AggregateFunction
 
 # AggregateFunction
 
-Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(…)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix.
+Aggregate functions can have an implementation-defined intermediate state that can be serialized to an `AggregateFunction(...)` data type and stored in a table, usually, by means of [a materialized view](../../sql-reference/statements/create/view.md). The common way to produce an aggregate function state is by calling the aggregate function with the `-State` suffix. To get the final result of aggregation in the future, you must use the same aggregate function with the `-Merge`suffix.
 
-`AggregateFunction(name, types_of_arguments…)` — parametric data type.
+`AggregateFunction(name, types_of_arguments...)` — parametric data type.
 
 **Parameters**
 
diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md
index 0316df7fe34..0c021b28f74 100644
--- a/docs/en/sql-reference/data-types/fixedstring.md
+++ b/docs/en/sql-reference/data-types/fixedstring.md
@@ -21,8 +21,8 @@ The `FixedString` type is efficient when data has the length of precisely `N` by
 Examples of the values that can be efficiently stored in `FixedString`-typed columns:
 
 - The binary representation of IP addresses (`FixedString(16)` for IPv6).
-- Language codes (ru_RU, en_US … ).
-- Currency codes (USD, RUB … ).
+- Language codes (ru_RU, en_US ... ).
+- Currency codes (USD, RUB ... ).
 - Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256).
 
 To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type.
diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md
index d118170cd39..579ee9bfa8b 100644
--- a/docs/en/sql-reference/data-types/nested-data-structures/index.md
+++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md
@@ -6,7 +6,7 @@ sidebar_label: Nested(Name1 Type1, Name2 Type2, ...)
 
 # Nested
 
-## Nested(name1 Type1, Name2 Type2, …)
+## Nested(name1 Type1, Name2 Type2, ...)
 
 A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.
 
diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
index 39f8409c1e1..4fb74ac30e4 100644
--- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md
@@ -5,7 +5,7 @@ sidebar_label: SimpleAggregateFunction
 ---
 # SimpleAggregateFunction
 
-`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data.
+`SimpleAggregateFunction(name, types_of_arguments...)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we do not have to store and process any extra data.
 
 The common way to produce an aggregate function value is by calling the aggregate function with the [-SimpleState](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-simplestate) suffix.
 
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 6d95f3dc358..8b8527acfdf 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -140,6 +140,60 @@ Same as `intDiv` but returns zero when dividing by zero or when dividing a minim
 intDivOrZero(a, b)
 ```
 
+## isFinite
+
+Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0.
+
+**Syntax**
+
+```sql
+isFinite(x)
+```
+
+## isInfinite
+
+Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN.
+
+**Syntax**
+
+```sql
+isInfinite(x)
+```
+
+## ifNotFinite
+
+Checks whether a floating point value is finite.
+
+**Syntax**
+
+```sql
+ifNotFinite(x,y)
+```
+
+**Arguments**
+
+- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md).
+- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md).
+
+**Returned value**
+
+- `x` if `x` is finite.
+- `y` if `x` is not finite.
+
+**Example**
+
+Query:
+
+    SELECT 1/0 as infimum, ifNotFinite(infimum,42)
+
+Result:
+
+    ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐
+    │     inf │                            42 │
+    └─────────┴───────────────────────────────┘
+
+You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`.
+
 ## modulo
 
 Calculates the remainder of the division of two values `a` by `b`.
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 87e733a4b0c..f929ea00b8b 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -561,7 +561,7 @@ Result:
 └─────────────┴─────────────┴────────────────┴─────────────────┘
 ```
 
-## array(x1, …), operator \[x1, …\]
+## array(x1, ...), operator \[x1, ...\]
 
 Creates an array from the function arguments.
 The arguments must be constants and have types that have the smallest common type. At least one argument must be passed, because otherwise it isn’t clear which type of array to create. That is, you can’t use this function to create an empty array (to do that, use the ‘emptyArray\*’ function described above).
@@ -768,9 +768,9 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
 
 Elements set to `NULL` are handled as normal values.
 
-## arrayCount(\[func,\] arr1, …)
+## arrayCount(\[func,\] arr1, ...)
 
-Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array.
+Returns the number of elements for which `func(arr1[i], ..., arrN[i])` returns something other than 0. If `func` is not specified, it returns the number of non-zero elements in the array.
 
 Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
@@ -847,7 +847,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL)
 
 ## arrayEnumerate(arr)
 
-Returns the array \[1, 2, 3, …, length (arr) \]
+Returns the array \[1, 2, 3, ..., length (arr) \]
 
 This function is normally used with ARRAY JOIN. It allows counting something just once for each array after applying ARRAY JOIN. Example:
 
@@ -887,7 +887,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached)
 
 This function can also be used in higher-order functions. For example, you can use it to get array indexes for elements that match a condition.
 
-## arrayEnumerateUniq(arr, …)
+## arrayEnumerateUniq(arr, ...)
 
 Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value.
 For example: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\].
@@ -1206,7 +1206,7 @@ Result:
 └───────────────────┘
 ```
 
-## arraySort(\[func,\] arr, …) {#sort}
+## arraySort(\[func,\] arr, ...) {#sort}
 
 Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
 
@@ -1307,11 +1307,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
 To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used.
 :::
 
-## arrayPartialSort(\[func,\] limit, arr, …)
+## arrayPartialSort(\[func,\] limit, arr, ...)
 
 Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
 
-## arrayReverseSort(\[func,\] arr, …) {#reverse-sort}
+## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort}
 
 Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
 
@@ -1412,7 +1412,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
 └─────────┘
 ```
 
-## arrayPartialReverseSort(\[func,\] limit, arr, …)
+## arrayPartialReverseSort(\[func,\] limit, arr, ...)
 
 Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
 
@@ -1535,7 +1535,7 @@ Result:
 [3,9,1,4,5,6,7,8,2,10]
 ```
 
-## arrayUniq(arr, …)
+## arrayUniq(arr, ...)
 
 If one argument is passed, it counts the number of different elements in the array.
 If multiple arguments are passed, it counts the number of different tuples of elements at corresponding positions in multiple arrays.
@@ -2079,9 +2079,9 @@ Result:
 └───────────────────────────────────────────────┘
 ```
 
-## arrayMap(func, arr1, …)
+## arrayMap(func, arr1, ...)
 
-Returns an array obtained from the original arrays by application of `func(arr1[i], …, arrN[i])` for each element. Arrays `arr1` … `arrN` must have the same number of elements.
+Returns an array obtained from the original arrays by application of `func(arr1[i], ..., arrN[i])` for each element. Arrays `arr1` ... `arrN` must have the same number of elements.
 
 Examples:
 
@@ -2109,9 +2109,9 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
 
 Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayFilter(func, arr1, …)
+## arrayFilter(func, arr1, ...)
 
-Returns an array containing only the elements in `arr1` for which `func(arr1[i], …, arrN[i])` returns something other than 0.
+Returns an array containing only the elements in `arr1` for which `func(arr1[i], ..., arrN[i])` returns something other than 0.
 
 Examples:
 
@@ -2142,9 +2142,9 @@ SELECT
 
 Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayFill(func, arr1, …)
+## arrayFill(func, arr1, ...)
 
-Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], …, arrN[i])` returns 0. The first element of `arr1` will not be replaced.
+Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The first element of `arr1` will not be replaced.
 
 Examples:
 
@@ -2160,9 +2160,9 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14,
 
 Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayReverseFill(func, arr1, …)
+## arrayReverseFill(func, arr1, ...)
 
-Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], …, arrN[i])` returns 0. The last element of `arr1` will not be replaced.
+Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func(arr1[i], ..., arrN[i])` returns 0. The last element of `arr1` will not be replaced.
 
 Examples:
 
@@ -2178,9 +2178,9 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5,
 
 Note that the `arrayReverseFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arraySplit(func, arr1, …)
+## arraySplit(func, arr1, ...)
 
-Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
+Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element.
 
 Examples:
 
@@ -2196,9 +2196,9 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayReverseSplit(func, arr1, …)
+## arrayReverseSplit(func, arr1, ...)
 
-Split `arr1` into multiple arrays. When `func(arr1[i], …, arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
+Split `arr1` into multiple arrays. When `func(arr1[i], ..., arrN[i])` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element.
 
 Examples:
 
@@ -2214,30 +2214,30 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayExists(\[func,\] arr1, …)
+## arrayExists(\[func,\] arr1, ...)
 
-Returns 1 if there is at least one element in `arr` for which `func(arr1[i], …, arrN[i])` returns something other than 0. Otherwise, it returns 0.
+Returns 1 if there is at least one element in `arr` for which `func(arr1[i], ..., arrN[i])` returns something other than 0. Otherwise, it returns 0.
 
 Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-## arrayAll(\[func,\] arr1, …)
+## arrayAll(\[func,\] arr1, ...)
 
-Returns 1 if `func(arr1[i], …, arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0.
+Returns 1 if `func(arr1[i], ..., arrN[i])` returns something other than 0 for all the elements in arrays. Otherwise, it returns 0.
 
 Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-## arrayFirst(func, arr1, …)
+## arrayFirst(func, arr1, ...)
 
-Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.
+Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0.
 
 ## arrayFirstOrNull
 
-Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`.
+Returns the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise it returns `NULL`.
 
 **Syntax**
 
 ```sql
-arrayFirstOrNull(func, arr1, …)
+arrayFirstOrNull(func, arr1, ...)
 ```
 
 **Parameters**
@@ -2292,20 +2292,20 @@ Result:
 \N
 ```
 
-## arrayLast(func, arr1, …)
+## arrayLast(func, arr1, ...)
 
-Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.
+Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0.
 
 Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
 ## arrayLastOrNull
 
-Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`.
+Returns the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0, otherwise returns `NULL`.
 
 **Syntax**
 
 ```sql
-arrayLastOrNull(func, arr1, …)
+arrayLastOrNull(func, arr1, ...)
 ```
 
 **Parameters**
@@ -2348,15 +2348,15 @@ Result:
 \N
 ```
 
-## arrayFirstIndex(func, arr1, …)
+## arrayFirstIndex(func, arr1, ...)
 
-Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.
+Returns the index of the first element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0.
 
 Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
-## arrayLastIndex(func, arr1, …)
+## arrayLastIndex(func, arr1, ...)
 
-Returns the index of the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0.
+Returns the index of the last element in the `arr1` array for which `func(arr1[i], ..., arrN[i])` returns something other than 0.
 
 Note that the `arrayLastIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted.
 
@@ -2580,9 +2580,9 @@ Result:
 └─────┘
 ```
 
-## arrayCumSum(\[func,\] arr1, …)
+## arrayCumSum(\[func,\] arr1, ...)
 
-Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`.
+Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`.
 
 **Syntax**
 
@@ -2614,9 +2614,9 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res
 
 Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument.
 
-## arrayCumSumNonNegative(\[func,\] arr1, …)
+## arrayCumSumNonNegative(\[func,\] arr1, ...)
 
-Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`.
+Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], ..., arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`.
 
 **Syntax**
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 843f22e5a6f..1a56691ffc0 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1499,7 +1499,7 @@ This function returns the week number for date or datetime. The two-argument for
 
 The following table describes how the mode argument works.
 
-| Mode | First day of week | Range | Week 1 is the first week …    |
+| Mode | First day of week | Range | Week 1 is the first week ...    |
 |------|-------------------|-------|-------------------------------|
 | 0    | Sunday            | 0-53  | with a Sunday in this year    |
 | 1    | Monday            | 0-53  | with 4 or more days this year |
diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md
index e920ab82988..ba72b3cc6ed 100644
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@@ -386,7 +386,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1
 SELECT isValidJSON('not a json') = 0
 ```
 
-## JSONHas(json\[, indices_or_keys\]…)
+## JSONHas(json\[, indices_or_keys\]...)
 
 If the value exists in the JSON document, `1` will be returned.
 
@@ -419,7 +419,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a'
 SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello'
 ```
 
-## JSONLength(json\[, indices_or_keys\]…)
+## JSONLength(json\[, indices_or_keys\]...)
 
 Return the length of a JSON array or a JSON object.
 
@@ -432,7 +432,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2
 ```
 
-## JSONType(json\[, indices_or_keys\]…)
+## JSONType(json\[, indices_or_keys\]...)
 
 Return the type of a JSON value.
 
@@ -446,13 +446,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
 SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
 ```
 
-## JSONExtractUInt(json\[, indices_or_keys\]…)
+## JSONExtractUInt(json\[, indices_or_keys\]...)
 
-## JSONExtractInt(json\[, indices_or_keys\]…)
+## JSONExtractInt(json\[, indices_or_keys\]...)
 
-## JSONExtractFloat(json\[, indices_or_keys\]…)
+## JSONExtractFloat(json\[, indices_or_keys\]...)
 
-## JSONExtractBool(json\[, indices_or_keys\]…)
+## JSONExtractBool(json\[, indices_or_keys\]...)
 
 Parses a JSON and extract a value. These functions are similar to `visitParam` functions.
 
@@ -466,7 +466,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200
 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
 ```
 
-## JSONExtractString(json\[, indices_or_keys\]…)
+## JSONExtractString(json\[, indices_or_keys\]...)
 
 Parses a JSON and extract a string. This function is similar to `visitParamExtractString` functions.
 
@@ -484,7 +484,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = ''
 SELECT JSONExtractString('{"abc":"hello}', 'abc') = ''
 ```
 
-## JSONExtract(json\[, indices_or_keys…\], Return_type)
+## JSONExtract(json\[, indices_or_keys...\], Return_type)
 
 Parses a JSON and extract a value of the given ClickHouse data type.
 
@@ -506,7 +506,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday
 SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
 ```
 
-## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type)
+## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type)
 
 Parses key-value pairs from a JSON where the values are of the given ClickHouse data type.
 
@@ -554,7 +554,7 @@ text
 └────────────────────────────────────────────────────────────┘
 ```
 
-## JSONExtractRaw(json\[, indices_or_keys\]…)
+## JSONExtractRaw(json\[, indices_or_keys\]...)
 
 Returns a part of JSON as unparsed string.
 
@@ -566,7 +566,7 @@ Example:
 SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]';
 ```
 
-## JSONExtractArrayRaw(json\[, indices_or_keys…\])
+## JSONExtractArrayRaw(json\[, indices_or_keys...\])
 
 Returns an array with elements of JSON array, each represented as unparsed string.
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 5b77f16027b..4501d1f43d3 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -172,7 +172,7 @@ Result:
 ## visibleWidth
 
 Calculates the approximate width when outputting values to the console in text format (tab-separated).
-This function is used by the system to implement [Pretty formats](../formats.mdx).
+This function is used by the system to implement [Pretty formats](../../interfaces/formats.md).
 
 `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats.
 
@@ -335,7 +335,7 @@ The argument is internally still evaluated. Useful e.g. for benchmarks.
 **Syntax**
 
 ```sql
-ignore(…)
+ignore(x)
 ```
 
 ## sleep
@@ -541,60 +541,6 @@ Result:
 └────────────────────┘
 ```
 
-## isFinite
-
-Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0.
-
-**Syntax**
-
-```sql
-isFinite(x)
-```
-
-## isInfinite
-
-Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN.
-
-**Syntax**
-
-```sql
-isInfinite(x)
-```
-
-## ifNotFinite
-
-Checks whether a floating point value is finite.
-
-**Syntax**
-
-```sql
-ifNotFinite(x,y)
-```
-
-**Arguments**
-
-- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md).
-- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md).
-
-**Returned value**
-
-- `x` if `x` is finite.
-- `y` if `x` is not finite.
-
-**Example**
-
-Query:
-
-    SELECT 1/0 as infimum, ifNotFinite(infimum,42)
-
-Result:
-
-    ┌─infimum─┬─ifNotFinite(divide(1, 0), 42)─┐
-    │     inf │                            42 │
-    └─────────┴───────────────────────────────┘
-
-You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`.
-
 ## isNaN
 
 Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
@@ -2303,7 +2249,7 @@ Accepts a path to a catboost model and model arguments (features). Returns Float
 **Syntax**
 
 ```sql
-catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
+catboostEvaluate(path_to_model, feature_1, feature_2, ..., feature_n)
 ```
 
 **Example**
@@ -2351,7 +2297,7 @@ Throw an exception if argument `x` is true.
 **Syntax**
 
 ```sql
-throwIf(x\[, message\[, error_code\]\])
+throwIf(x[, message[, error_code]])
 ```
 
 **Arguments**
diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 0b761b62006..0e183626555 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -139,7 +139,7 @@ Format the `pattern` string with the values (strings, integers, etc.) listed in
 **Syntax**
 
 ```sql
-format(pattern, s0, s1, …)
+format(pattern, s0, s1, ...)
 ```
 
 **Example**
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 9738c19bf3c..a6eb4a4ceff 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -799,7 +799,7 @@ If you only want to search multiple substrings in a string, you can use function
 **Syntax**
 
 ```sql
-multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiMatchAnyIndex
@@ -809,7 +809,7 @@ Like `multiMatchAny` but returns any index that matches the haystack.
 **Syntax**
 
 ```sql
-multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiMatchAllIndices
@@ -819,7 +819,7 @@ Like `multiMatchAny` but returns the array of all indices that match the haystac
 **Syntax**
 
 ```sql
-multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAny
@@ -833,7 +833,7 @@ Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a
 **Syntax**
 
 ```sql
-multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAnyIndex
@@ -843,7 +843,7 @@ Like `multiFuzzyMatchAny` but returns any index that matches the haystack within
 **Syntax**
 
 ```sql
-multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAllIndices
@@ -853,7 +853,7 @@ Like `multiFuzzyMatchAny` but returns the array of all indices in any order that
 **Syntax**
 
 ```sql
-multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## extract
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 64b1732597f..c2219bb3f90 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -7,15 +7,15 @@ sidebar_label: Tuples
 ## tuple
 
 A function that allows grouping multiple columns.
-For columns with the types T1, T2, …, it returns a Tuple(T1, T2, …) type tuple containing these columns. There is no cost to execute the function.
+For columns with the types T1, T2, ..., it returns a Tuple(T1, T2, ...) type tuple containing these columns. There is no cost to execute the function.
 Tuples are normally used as intermediate values for an argument of IN operators, or for creating a list of formal parameters of lambda functions. Tuples can’t be written to a table.
 
-The function implements the operator `(x, y, …)`.
+The function implements the operator `(x, y, ...)`.
 
 **Syntax**
 
 ``` sql
-tuple(x, y, …)
+tuple(x, y, ...)
 ```
 
 ## tupleElement
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 377283bc006..6386b4d5b1d 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -589,7 +589,7 @@ mapApply(func, map)
 
 **Returned value**
 
-- Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element.
+- Returns a map obtained from the original map by application of `func(map1[i], ..., mapN[i])` for each element.
 
 **Example**
 
@@ -629,7 +629,7 @@ mapFilter(func, map)
 
 **Returned value**
 
-- Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0.
+- Returns a map containing only the elements in `map` for which `func(map1[i], ..., mapN[i])` returns something other than 0.
 
 
 **Example**
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index a0b0170721c..6da82e689a9 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -16,7 +16,7 @@ If the relevant part isn’t present in a URL, an empty string is returned.
 
 Extracts the protocol from a URL.
 
-Examples of typical returned values: http, https, ftp, mailto, tel, magnet…
+Examples of typical returned values: http, https, ftp, mailto, tel, magnet...
 
 ### domain
 
diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md
index f6fb179d969..320828f0de9 100644
--- a/docs/en/sql-reference/statements/alter/comment.md
+++ b/docs/en/sql-reference/statements/alter/comment.md
@@ -4,7 +4,7 @@ sidebar_position: 51
 sidebar_label: COMMENT
 ---
 
-# ALTER TABLE … MODIFY COMMENT
+# ALTER TABLE ... MODIFY COMMENT
 
 Adds, modifies, or removes comment to the table, regardless if it was set before or not. Comment change is reflected in both [system.tables](../../../operations/system-tables/tables.md) and `SHOW CREATE TABLE` query.
 
diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md
index b6f45b67d52..af56bec7a11 100644
--- a/docs/en/sql-reference/statements/alter/delete.md
+++ b/docs/en/sql-reference/statements/alter/delete.md
@@ -4,7 +4,7 @@ sidebar_position: 39
 sidebar_label: DELETE
 ---
 
-# ALTER TABLE … DELETE Statement
+# ALTER TABLE ... DELETE Statement
 
 ``` sql
 ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md
index 7961315c193..3cfb99cff83 100644
--- a/docs/en/sql-reference/statements/alter/index.md
+++ b/docs/en/sql-reference/statements/alter/index.md
@@ -42,7 +42,7 @@ These `ALTER` statements modify entities related to role-based access control:
 
 ## Mutations
 
-`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts.
+`ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE ... DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE ... UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts.
 
 For `*MergeTree` tables mutations execute by **rewriting whole data parts**. There is no atomicity - parts are substituted for mutated parts as soon as they are ready and a `SELECT` query that started executing during a mutation will see data from parts that have already been mutated along with data from parts that have not been mutated yet.
 
diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md
index ab7d0ca7378..0b300e5849a 100644
--- a/docs/en/sql-reference/statements/alter/update.md
+++ b/docs/en/sql-reference/statements/alter/update.md
@@ -4,7 +4,7 @@ sidebar_position: 40
 sidebar_label: UPDATE
 ---
 
-# ALTER TABLE … UPDATE Statements
+# ALTER TABLE ... UPDATE Statements
 
 ``` sql
 ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md
index e063b27424e..83e8e9311b4 100644
--- a/docs/en/sql-reference/statements/alter/view.md
+++ b/docs/en/sql-reference/statements/alter/view.md
@@ -4,9 +4,9 @@ sidebar_position: 50
 sidebar_label: VIEW
 ---
 
-# ALTER TABLE … MODIFY QUERY Statement
+# ALTER TABLE ... MODIFY QUERY Statement
 
-You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process.
+You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE ... MODIFY QUERY` statement without interrupting ingestion process.
 
 This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause.
 
@@ -198,6 +198,6 @@ SELECT * FROM mv;
 
 `ALTER LIVE VIEW ... REFRESH` statement refreshes a [Live view](../create/view.md#live-view). See [Force Live View Refresh](../create/view.md#live-view-alter-refresh).
 
-## ALTER TABLE … MODIFY REFRESH Statement
+## ALTER TABLE ... MODIFY REFRESH Statement
 
 `ALTER TABLE ... MODIFY REFRESH` statement changes refresh parameters of a [Refreshable Materialized View](../create/view.md#refreshable-materialized-view). See [Changing Refresh Parameters](../create/view.md#changing-refresh-parameters).
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 073a3c0d246..b526c94e508 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -306,7 +306,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE
 
 Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them.
 
-You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE … MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused.
+You can modify `SELECT` query that was specified in the window view by using `ALTER TABLE ... MODIFY QUERY` statement. The data structure resulting in a new `SELECT` query should be the same as the original `SELECT` query when with or without `TO [db.]name` clause. Note that the data in the current window will be lost because the intermediate state cannot be reused.
 
 ### Monitoring New Windows
 
diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index a76692cf291..f3dadabd25f 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -73,7 +73,7 @@ Data can be passed to the INSERT in any [format](../../interfaces/formats.md#for
 INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set
 ```
 
-For example, the following query format is identical to the basic version of INSERT … VALUES:
+For example, the following query format is identical to the basic version of INSERT ... VALUES:
 
 ``` sql
 INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ...
diff --git a/docs/en/sql-reference/statements/select/limit.md b/docs/en/sql-reference/statements/select/limit.md
index d61a5a44b58..58fdf988bf3 100644
--- a/docs/en/sql-reference/statements/select/limit.md
+++ b/docs/en/sql-reference/statements/select/limit.md
@@ -17,11 +17,11 @@ If there is no [ORDER BY](../../../sql-reference/statements/select/order-by.md)
 The number of rows in the result set can also depend on the [limit](../../../operations/settings/settings.md#limit) setting.
 :::
 
-## LIMIT … WITH TIES Modifier
+## LIMIT ... WITH TIES Modifier
 
 When you set `WITH TIES` modifier for `LIMIT n[,m]` and specify `ORDER BY expr_list`, you will get in result first `n` or `n,m` rows and all rows with same `ORDER BY` fields values equal to row at position `n` for `LIMIT n` and `m` for `LIMIT n,m`.
 
-This modifier also can be combined with [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill).
+This modifier also can be combined with [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill).
 
 For example, the following query
 
diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md
index d6432a7b4f8..512a58d7cd9 100644
--- a/docs/en/sql-reference/statements/select/order-by.md
+++ b/docs/en/sql-reference/statements/select/order-by.md
@@ -283,7 +283,7 @@ In `MaterializedView`-engine tables the optimization works with views like `SELE
 
 ## ORDER BY Expr WITH FILL Modifier
 
-This modifier also can be combined with [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties).
+This modifier also can be combined with [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties).
 
 `WITH FILL` modifier can be set after `ORDER BY expr` with optional `FROM expr`, `TO expr` and `STEP expr` parameters.
 All missed values of `expr` column will be filled sequentially and other columns will be filled as defaults.
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index 3a63811add6..f66178afbb2 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -169,7 +169,7 @@ If your listing of files contains number ranges with leading zeros, use the cons
 
 **Example**
 
-Query the total number of rows in files named `file000`, `file001`, … , `file999`:
+Query the total number of rows in files named `file000`, `file001`, ... , `file999`:
 
 ``` sql
 SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md
index 80077ecdb33..b891d88df31 100644
--- a/docs/en/sql-reference/table-functions/gcs.md
+++ b/docs/en/sql-reference/table-functions/gcs.md
@@ -130,7 +130,7 @@ FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefi
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
-Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md
index 92f904b8841..d65615e7588 100644
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@@ -85,7 +85,7 @@ If your listing of files contains number ranges with leading zeros, use the cons
 
 **Example**
 
-Query the data from files named `file000`, `file001`, … , `file999`:
+Query the data from files named `file000`, `file001`, ... , `file999`:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 38d77a98749..cbef80371a3 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -137,7 +137,7 @@ FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/
 If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
 :::
 
-Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, ... , `file-999.csv`:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md
index cd1297504af..08fa7a1e603 100644
--- a/docs/ru/development/style.md
+++ b/docs/ru/development/style.md
@@ -57,7 +57,7 @@ memcpy(&buf[place_value], &x, sizeof(x));
 for (size_t i = 0; i < rows; i += storage.index_granularity)
 ```
 
-**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, …), а также тернарного оператора `?:` ставятся пробелы.
+**7.** Вокруг бинарных операторов (`+`, `-`, `*`, `/`, `%`, ...), а также тернарного оператора `?:` ставятся пробелы.
 
 ``` cpp
 UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
@@ -86,7 +86,7 @@ dst.ClickGoodEvent     = click.GoodEvent;
 
 При необходимости, оператор может быть перенесён на новую строку. В этом случае, перед ним увеличивается отступ.
 
-**11.** Унарные операторы `--`, `++`, `*`, `&`, … не отделяются от аргумента пробелом.
+**11.** Унарные операторы `--`, `++`, `*`, `&`, ... не отделяются от аргумента пробелом.
 
 **12.** После запятой ставится пробел, а перед — нет. Аналогично для точки с запятой внутри выражения `for`.
 
@@ -115,7 +115,7 @@ public:
 
 **16.** Если на весь файл один `namespace` и кроме него ничего существенного нет, то отступ внутри `namespace` не нужен.
 
-**17.** Если блок для выражения `if`, `for`, `while`, … состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, …
+**17.** Если блок для выражения `if`, `for`, `while`, ... состоит из одного `statement`, то фигурные скобки не обязательны. Вместо этого поместите `statement` на отдельную строку. Это правило справедливо и для вложенных `if`, `for`, `while`, ...
 
 Если внутренний `statement` содержит фигурные скобки или `else`, то внешний блок следует писать в фигурных скобках.
 
@@ -266,7 +266,7 @@ void executeQuery(
 
 Пример взят с ресурса http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/.
 
-**7.** Нельзя писать мусорные комментарии (автор, дата создания…) в начале каждого файла.
+**7.** Нельзя писать мусорные комментарии (автор, дата создания...) в начале каждого файла.
 
 **8.** Однострочные комментарии начинаются с трёх слешей: `///` , многострочные с `/**`. Такие комментарии считаются «документирующими».
 
diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md
index 72087b56652..cf43eef73e3 100644
--- a/docs/ru/engines/table-engines/integrations/hdfs.md
+++ b/docs/ru/engines/table-engines/integrations/hdfs.md
@@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs
 
 **Example**
 
-Создадим таблицу с именами `file000`, `file001`, … , `file999`:
+Создадим таблицу с именами `file000`, `file001`, ... , `file999`:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md
index 720aa589122..a1c69df4d0a 100644
--- a/docs/ru/engines/table-engines/integrations/s3.md
+++ b/docs/ru/engines/table-engines/integrations/s3.md
@@ -73,7 +73,7 @@ SELECT * FROM s3_engine_table LIMIT 2;
 
 **Пример подстановки 1**
 
-Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+Таблица содержит данные из файлов с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32)
diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md
index 46597c94370..c3203804211 100644
--- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -66,7 +66,7 @@ WHERE table = 'visits'
 └───────────┴───────────────────┴────────┘
 ```
 
-Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md).
+Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER ... PARTITION](../../../sql-reference/statements/alter/partition.md).
 
 Столбец `name` содержит названия кусков партиций. Значения из этого столбца можно использовать в запросах [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition).
 
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index faa492d4d85..49ba229b1d5 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -771,7 +771,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 -   В результате вставки (запрос `INSERT`).
 -   В фоновых операциях слияний и [мутаций](../../../sql-reference/statements/alter/index.md#mutations).
 -   При скачивании данных с другой реплики.
--   В результате заморозки партиций [ALTER TABLE … FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition).
+-   В результате заморозки партиций [ALTER TABLE ... FREEZE PARTITION](../../../engines/table-engines/mergetree-family/mergetree.md#alter_freeze-partition).
 
 Во всех случаях, кроме мутаций и заморозки партиций, при записи куска выбирается том и диск в соответствии с указанной конфигурацией хранилища:
 
@@ -781,7 +781,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 Мутации и запросы заморозки партиций в реализации используют [жесткие ссылки](https://ru.wikipedia.org/wiki/%D0%96%D1%91%D1%81%D1%82%D0%BA%D0%B0%D1%8F_%D1%81%D1%81%D1%8B%D0%BB%D0%BA%D0%B0). Жесткие ссылки между различными дисками не поддерживаются, поэтому в случае таких операций куски размещаются на тех же дисках, что и исходные.
 
 В фоне куски перемещаются между томами на основе информации о занятом месте (настройка `move_factor`) по порядку, в котором указаны тома в конфигурации. Данные никогда не перемещаются с последнего тома и на первый том. Следить за фоновыми перемещениями можно с помощью системных таблиц [system.part_log](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-part-log) (поле `type = MOVE_PART`) и [system.parts](../../../engines/table-engines/mergetree-family/mergetree.md#system_tables-parts) (поля `path` и `disk`). Также подробная информация о перемещениях доступна в логах сервера.
-С помощью запроса [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке.
+С помощью запроса [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../engines/table-engines/mergetree-family/mergetree.md#alter_move-partition) пользователь может принудительно перенести кусок или партицию с одного раздела на другой. При этом учитываются все ограничения, указанные для фоновых операций. Запрос самостоятельно инициирует процесс перемещения не дожидаясь фоновых операций. В случае недостатка места или неудовлетворения ограничениям пользователь получит сообщение об ошибке.
 
 Перемещения данных не взаимодействуют с репликацией данных, поэтому на разных репликах одной и той же таблицы могут быть указаны разные политики хранения.
 
diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md
index 881566e5f34..3d9737096f5 100644
--- a/docs/ru/engines/table-engines/special/external-data.md
+++ b/docs/ru/engines/table-engines/special/external-data.md
@@ -31,7 +31,7 @@ ClickHouse позволяет отправить на сервер данные,
 - **--format** - формат данных в файле. Если не указано - используется TabSeparated. 
 
 Должен быть указан один из следующих параметров:
-- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … 
+- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, ... 
 - **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. 
 
 Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name.
diff --git a/docs/ru/faq/general/olap.md b/docs/ru/faq/general/olap.md
index c9021f7c92e..bcfe9663381 100644
--- a/docs/ru/faq/general/olap.md
+++ b/docs/ru/faq/general/olap.md
@@ -9,13 +9,13 @@ sidebar_position: 100
 [OLAP](https://ru.wikipedia.org/wiki/OLAP) (OnLine Analytical Processing) переводится как обработка данных в реальном времени. Это широкий термин, который можно рассмотреть с двух сторон: с технической и с точки зрения бизнеса. Для самого общего понимания можно просто прочитать его с конца:
 
 **Processing**
-    Обрабатываются некие исходные данные…
+    Обрабатываются некие исходные данные...
 
 **Analytical**
-:   … чтобы получить какие-то аналитические отчеты или новые знания…
+:   ... чтобы получить какие-то аналитические отчеты или новые знания...
 
 **OnLine**
-:   … в реальном времени, практически без задержек на обработку.
+:   ... в реальном времени, практически без задержек на обработку.
 
 ## OLAP с точки зрения бизнеса {#olap-from-the-business-perspective}
 
diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md
index 12d0c18c3a1..a42033e7d41 100644
--- a/docs/ru/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md
@@ -196,7 +196,7 @@ real    75m56.214s
 
 (Импорт данных напрямую из Postgres также возможен с использованием `COPY ... TO PROGRAM`.)
 
-К сожалению, все поля, связанные с погодой (precipitation…average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных.
+К сожалению, все поля, связанные с погодой (precipitation...average_wind_speed) заполнены NULL. Из-за этого мы исключим их из финального набора данных.
 
 Для начала мы создадим таблицу на одном сервере. Позже мы сделаем таблицу распределенной.
 
diff --git a/docs/ru/index.md b/docs/ru/index.md
index 29f2bbe07fb..d551d492af5 100644
--- a/docs/ru/index.md
+++ b/docs/ru/index.md
@@ -15,7 +15,7 @@ ClickHouse — столбцовая система управления база
 | #0    | 89354350662 | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
 | #1    | 90329509958 | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
 | #2    | 89953706054 | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
-| #N    | …           | …          | …                  | …         | …                   |
+| #N    | ...           | ...          | ...                  | ...         | ...                   |
 
 То есть, значения, относящиеся к одной строке, физически хранятся рядом.
 
@@ -26,11 +26,11 @@ ClickHouse — столбцовая система управления база
 
 | Строка:     | #0                 | #1                 | #2                 | #N |
 |-------------|---------------------|---------------------|---------------------|-----|
-| WatchID:    | 89354350662         | 90329509958         | 89953706054         | …   |
-| JavaEnable: | 1                   | 0                   | 1                   | …   |
-| Title:      | Investor Relations  | Contact us          | Mission             | …   |
-| GoodEvent:  | 1                   | 1                   | 1                   | …   |
-| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | …   |
+| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ...   |
+| JavaEnable: | 1                   | 0                   | 1                   | ...   |
+| Title:      | Investor Relations  | Contact us          | Mission             | ...   |
+| GoodEvent:  | 1                   | 1                   | 1                   | ...   |
+| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ...   |
 
 В примерах изображён только порядок расположения данных.
 То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе.
diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md
index d1d38a587c6..e82a5a008eb 100644
--- a/docs/ru/operations/settings/query-complexity.md
+++ b/docs/ru/operations/settings/query-complexity.md
@@ -260,7 +260,7 @@ FORMAT Null;
 
 Ограничивает количество строк в хэш-таблице, используемой при соединении таблиц.
 
-Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md).
+Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md).
 
 Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата.
 
@@ -277,7 +277,7 @@ FORMAT Null;
 
 Ограничивает размер (в байтах) хэш-таблицы, используемой при объединении таблиц.
 
-Параметр применяется к операциям [SELECT… JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md).
+Параметр применяется к операциям [SELECT... JOIN](../../sql-reference/statements/select/join.md#select-join) и к движку таблиц [Join](../../engines/table-engines/special/join.md).
 
 Если запрос содержит несколько `JOIN`, то ClickHouse проверяет значение настройки для каждого промежуточного результата.
 
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 2b3607dcf08..3a70a0bac12 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -1859,7 +1859,7 @@ SELECT * FROM test_table
 
 ## count_distinct_implementation {#settings-count_distinct_implementation}
 
-Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count).
+Задаёт, какая из функций `uniq*` используется при выполнении конструкции [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count).
 
 Возможные значения:
 
diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
index 6463f6bd95d..e6a61d9b381 100644
--- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
@@ -82,7 +82,7 @@ FROM
 
 В этом случае необходимо помнить, что границы корзин гистограммы не известны.
 
-## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch}
+## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
 
 Проверяет, содержит ли последовательность событий цепочку, которая соответствует указанному шаблону.
 
@@ -172,7 +172,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 -   [sequenceCount](#function-sequencecount)
 
-## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount}
+## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
 
 Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий.
 
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md
index fed0f8b328b..a0a430f7a68 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiles.md
@@ -7,7 +7,7 @@ sidebar_position: 201
 
 ## quantiles {#quantiles}
 
-Синтаксис: `quantiles(level1, level2, …)(x)`
+Синтаксис: `quantiles(level1, level2, ...)(x)`
 
 Все функции для вычисления квантилей имеют соответствующие функции для вычисления нескольких квантилей: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`, `quantilesBFloat16`. Эти функции вычисляют все квантили указанных уровней в один проход и возвращают массив с вычисленными значениями.
 
diff --git a/docs/ru/sql-reference/data-types/aggregatefunction.md b/docs/ru/sql-reference/data-types/aggregatefunction.md
index e42b467e4af..0481151c7e4 100644
--- a/docs/ru/sql-reference/data-types/aggregatefunction.md
+++ b/docs/ru/sql-reference/data-types/aggregatefunction.md
@@ -6,9 +6,9 @@ sidebar_label: AggregateFunction
 
 # AggregateFunction {#data-type-aggregatefunction}
 
-Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(…), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
+Агрегатные функции могут обладать определяемым реализацией промежуточным состоянием, которое может быть сериализовано в тип данных, соответствующий AggregateFunction(...), и быть записано в таблицу обычно посредством [материализованного представления](../../sql-reference/statements/create/view.md). Чтобы получить промежуточное состояние, обычно используются агрегатные функции с суффиксом `-State`. Чтобы в дальнейшем получить агрегированные данные необходимо использовать те же агрегатные функции с суффиксом `-Merge`.
 
-`AggregateFunction(name, types_of_arguments…)` — параметрический тип данных.
+`AggregateFunction(name, types_of_arguments...)` — параметрический тип данных.
 
 **Параметры**
 
diff --git a/docs/ru/sql-reference/data-types/fixedstring.md b/docs/ru/sql-reference/data-types/fixedstring.md
index d7a4e865903..56a5632f88d 100644
--- a/docs/ru/sql-reference/data-types/fixedstring.md
+++ b/docs/ru/sql-reference/data-types/fixedstring.md
@@ -21,8 +21,8 @@ sidebar_label: FixedString(N)
 Примеры значений, которые можно эффективно хранить в столбцах типа `FixedString`:
 
 -   Двоичное представление IP-адреса (`FixedString(16)` для IPv6).
--   Коды языков (ru_RU, en_US … ).
--   Коды валют (USD, RUB … ).
+-   Коды языков (ru_RU, en_US ... ).
+-   Коды валют (USD, RUB ... ).
 -   Двоичное представление хэшей (`FixedString(16)` для MD5, `FixedString(32)` для SHA256).
 
 Для хранения значений UUID используйте тип данных [UUID](uuid.md).
diff --git a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md
index 4ec8333d563..8fd293a0415 100644
--- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md
+++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md
@@ -3,7 +3,7 @@ slug: /ru/sql-reference/data-types/nested-data-structures/nested
 ---
 # Nested {#nested}
 
-## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}
+## Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2}
 
 Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных.
 
diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md
index 8953134d154..9d86c26c563 100644
--- a/docs/ru/sql-reference/data-types/tuple.md
+++ b/docs/ru/sql-reference/data-types/tuple.md
@@ -4,7 +4,7 @@ sidebar_position: 54
 sidebar_label: Tuple(T1, T2, ...)
 ---
 
-# Tuple(T1, T2, …) {#tuplet1-t2}
+# Tuple(T1, T2, ...) {#tuplet1-t2}
 
 Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов.
 
diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md
index 1f06bdf264a..825e3f06be2 100644
--- a/docs/ru/sql-reference/functions/array-functions.md
+++ b/docs/ru/sql-reference/functions/array-functions.md
@@ -161,7 +161,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2);
 ```
 
 
-## array(x1, …), оператор \[x1, …\] {#arrayx1-operator-x1}
+## array(x1, ...), оператор \[x1, ...\] {#arrayx1-operator-x1}
 
 Создаёт массив из аргументов функции.
 Аргументы должны быть константами и иметь типы, для которых есть наименьший общий тип. Должен быть передан хотя бы один аргумент, так как иначе непонятно, какого типа создавать массив. То есть, с помощью этой функции невозможно создать пустой массив (для этого используйте функции emptyArray\*, описанные выше).
@@ -308,7 +308,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
 
 Элементы, равные `NULL`, обрабатываются как обычные значения.
 
-## arrayCount(\[func,\] arr1, …) {#array-count}
+## arrayCount(\[func,\] arr1, ...) {#array-count}
 
 Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива.
 
@@ -335,7 +335,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL)
 
 ## arrayEnumerate(arr) {#array_functions-arrayenumerate}
 
-Возвращает массив \[1, 2, 3, …, length(arr)\]
+Возвращает массив \[1, 2, 3, ..., length(arr)\]
 
 Эта функция обычно используется совместно с ARRAY JOIN. Она позволяет, после применения ARRAY JOIN, посчитать что-либо только один раз для каждого массива. Пример:
 
@@ -375,7 +375,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached)
 
 Также эта функция может быть использована в функциях высшего порядка. Например, с её помощью можно достать индексы массива для элементов, удовлетворяющих некоторому условию.
 
-## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr}
+## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr}
 
 Возвращает массив, такого же размера, как исходный, где для каждого элемента указано, какой он по счету среди элементов с таким же значением.
 Например: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\].
@@ -597,7 +597,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;
 
 Элементы массива равные `NULL` обрабатываются как обычные значения.
 
-## arraySort(\[func,\] arr, …) {#array_functions-sort}
+## arraySort(\[func,\] arr, ...) {#array_functions-sort}
 
 Возвращает массив `arr`, отсортированный в восходящем порядке. Если задана функция `func`, то порядок сортировки определяется результатом применения этой функции на элементы массива `arr`. Если `func` принимает несколько аргументов, то в функцию `arraySort` нужно передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания `arraySort`.
 
@@ -698,11 +698,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
 Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0).
 :::
 
-## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort}
+## arrayPartialSort(\[func,\] limit, arr, ...) {#array_functions-sort}
 
 То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке.
 
-## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}
+## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort}
 
 Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`.
 
@@ -803,11 +803,11 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
 └─────────┘
 ```
 
-## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort}
+## arrayPartialReverseSort(\[func,\] limit, arr, ...) {#array_functions-sort}
 
 То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке.
 
-## arrayUniq(arr, …) {#array-functions-arrayuniq}
+## arrayUniq(arr, ...) {#array-functions-arrayuniq}
 
 Если передан один аргумент, считает количество разных элементов в массиве.
 Если передано несколько аргументов, считает количество разных кортежей из элементов на соответствующих позициях в нескольких массивах.
@@ -1174,7 +1174,7 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]);
 └──────────────────────────────────────┘
 ```
 
-## arrayMap(func, arr1, …) {#array-map}
+## arrayMap(func, arr1, ...) {#array-map}
 
 Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`.
 
@@ -1204,7 +1204,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res;
 
 Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayFilter(func, arr1, …) {#array-filter}
+## arrayFilter(func, arr1, ...) {#array-filter}
 
 Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0.
 
@@ -1237,7 +1237,7 @@ SELECT
 
 Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayFill(func, arr1, …) {#array-fill}
+## arrayFill(func, arr1, ...) {#array-fill}
 
 Перебирает `arr1` от первого элемента к последнему и заменяет `arr1[i]` на `arr1[i - 1]`, если `func` вернула 0. Первый элемент `arr1` остаётся неизменным.
 
@@ -1255,7 +1255,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14,
 
 Функция `arrayFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayReverseFill(func, arr1, …) {#array-reverse-fill}
+## arrayReverseFill(func, arr1, ...) {#array-reverse-fill}
 
 Перебирает `arr1` от последнего элемента к первому и заменяет `arr1[i]` на `arr1[i + 1]`, если `func` вернула 0. Последний элемент `arr1` остаётся неизменным.
 
@@ -1273,7 +1273,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5,
 
 Функция `arrayReverseFill` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arraySplit(func, arr1, …) {#array-split}
+## arraySplit(func, arr1, ...) {#array-split}
 
 Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в левую часть. Массив не разбивается по первому элементу.
 
@@ -1291,7 +1291,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 Функция `arraySplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayReverseSplit(func, arr1, …) {#array-reverse-split}
+## arrayReverseSplit(func, arr1, ...) {#array-reverse-split}
 
 Разделяет массив `arr1` на несколько. Если `func` возвращает не 0, то массив разделяется, а элемент помещается в правую часть. Массив не разбивается по последнему элементу.
 
@@ -1309,25 +1309,25 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 Функция `arrayReverseSplit` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
+## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1}
 
 Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0.
 
 Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
 
-## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
+## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1}
 
 Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0.
 
 Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию.
 
-## arrayFirst(func, arr1, …) {#array-first}
+## arrayFirst(func, arr1, ...) {#array-first}
 
 Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0.
 
 Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен.
 
-## arrayFirstIndex(func, arr1, …) {#array-first-index}
+## arrayFirstIndex(func, arr1, ...) {#array-first-index}
 
 Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0.
 
@@ -1599,7 +1599,7 @@ SELECT arraySum(x -> x*x, [2, 3]) AS res;
 └─────┘
 ```
 
-## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
+## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1}
 
 Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.
 
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index 56ae4359bf1..bcc5f807c32 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -559,7 +559,7 @@ SELECT
 
 Описание режимов (mode):
 
-| Mode | Первый день недели | Диапазон |  Неделя 1 это первая неделя … |
+| Mode | Первый день недели | Диапазон |  Неделя 1 это первая неделя ... |
 | ----------- | -------- | -------- | ------------------ |
 |0|Воскресенье|0-53|с воскресеньем в этом году
 |1|Понедельник|0-53|с 4-мя или более днями в этом году
diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md
index 123f40ce05d..18f625bf80f 100644
--- a/docs/ru/sql-reference/functions/json-functions.md
+++ b/docs/ru/sql-reference/functions/json-functions.md
@@ -88,7 +88,7 @@ SELECT isValidJSON('{"a": "hello", "b": [-100, 200.0, 300]}') = 1
 SELECT isValidJSON('not a json') = 0
 ```
 
-## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys}
+## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys}
 
 Если значение существует в документе JSON, то возвращается `1`.
 
@@ -121,7 +121,7 @@ SELECT JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a'
 SELECT JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello'
 ```
 
-## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys}
+## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys}
 
 Возвращает длину массива JSON или объекта JSON.
 
@@ -134,7 +134,7 @@ SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
 SELECT JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2
 ```
 
-## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys}
+## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys}
 
 Возвращает тип значения JSON.
 
@@ -148,13 +148,13 @@ SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
 SELECT JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
 ```
 
-## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys}
+## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys}
 
-## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys}
+## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys}
 
-## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys}
+## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys}
 
-## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys}
+## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys}
 
 Парсит JSON и извлекает значение. Эти функции аналогичны функциям `visitParam`.
 
@@ -168,7 +168,7 @@ SELECT JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200
 SELECT JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
 ```
 
-## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys}
+## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys}
 
 Парсит JSON и извлекает строку. Эта функция аналогична функции `visitParamExtractString`.
 
@@ -186,7 +186,7 @@ SELECT JSONExtractString('{"abc":"\\u263"}', 'abc') = ''
 SELECT JSONExtractString('{"abc":"hello}', 'abc') = ''
 ```
 
-## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type}
+## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type}
 
 Парсит JSON и извлекает значение с заданным типом данных.
 
@@ -207,7 +207,7 @@ SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday
 SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
 ```
 
-## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
+## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
 
 Разбор пар ключ-значение из JSON, где значение имеет тип данных ClickHouse.
 
@@ -255,7 +255,7 @@ text
 └────────────────────────────────────────────────────────────┘
 ```
 
-## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys}
+## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys}
 
 Возвращает часть JSON в виде строки, содержащей неразобранную подстроку.
 
@@ -267,7 +267,7 @@ text
 SELECT JSONExtractRaw('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = '[-100, 200.0, 300]';
 ```
 
-## JSONExtractArrayRaw(json\[, indices_or_keys\]…) {#jsonextractarrayrawjson-indices-or-keys}
+## JSONExtractArrayRaw(json\[, indices_or_keys\]...) {#jsonextractarrayrawjson-indices-or-keys}
 
 Возвращает массив из элементов JSON массива, каждый из которых представлен в виде строки с неразобранными подстроками из JSON.
 
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 835aed934d5..f7637cfa3f7 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -286,7 +286,7 @@ SELECT byteSize(NULL, 1, 0.3, '');
 Превращает константу в полноценный столбец, содержащий только одно значение.
 В ClickHouse полноценные столбцы и константы представлены в памяти по-разному. Функции по-разному работают для аргументов-констант и обычных аргументов (выполняется разный код), хотя результат почти всегда должен быть одинаковым. Эта функция предназначена для отладки такого поведения.
 
-## ignore(…) {#ignore}
+## ignore(...) {#ignore}
 
 Принимает любые аргументы, в т.ч. `NULL`, всегда возвращает 0.
 При этом, аргумент всё равно вычисляется. Это может использоваться для бенчмарков.
diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md
index eeb5752c626..fc258f7b4cf 100644
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@@ -358,7 +358,7 @@ SELECT repeat('abc', 10);
 
 Разворачивает последовательность кодовых точек Unicode, при допущении, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Иначе — что-то делает (не кидает исключение).
 
-## format(pattern, s0, s1, …) {#format}
+## format(pattern, s0, s1, ...) {#format}
 
 Форматирует константный шаблон со строками, перечисленными в аргументах. `pattern` — упрощенная версия шаблона в языке Python. Шаблон содержит «заменяющие поля», которые окружены фигурными скобками `{}`. Всё, что не содержится в скобках, интерпретируется как обычный текст и просто копируется. Если нужно использовать символ фигурной скобки, можно экранировать двойной скобкой `{{ '{{' }}` или `{{ '}}' }}`. Имя полей могут быть числами (нумерация с нуля) или пустыми (тогда они интерпретируются как последовательные числа).
 
diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md
index 4f9ae4428a4..53da9a6e791 100644
--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@@ -311,19 +311,19 @@ Result:
 
 Смотрите `multiSearchAllPositions`.
 
-## multiSearchFirstPosition(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen}
+## multiSearchFirstPosition(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, ..., needle<sub>n</sub>\]) {#multisearchfirstpositionhaystack-needle1-needle2-needlen}
 
 Так же, как и `position`, только возвращает оффсет первого вхождения любого из needles.
 
 Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`.
 
-## multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen}
+## multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, ..., needle<sub>n</sub>\]) {#multisearchfirstindexhaystack-needle1-needle2-needlen}
 
 Возвращает индекс `i` (нумерация с единицы) первой найденной строки needle<sub>i</sub> в строке `haystack` и 0 иначе.
 
 Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`.
 
-## multiSearchAny(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\]) {#function-multisearchany}
+## multiSearchAny(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, ..., needle<sub>n</sub>\]) {#function-multisearchany}
 
 Возвращает 1, если хотя бы одна подстрока needle<sub>i</sub> нашлась в строке `haystack` и 0 иначе.
 
@@ -343,30 +343,30 @@ Result:
 Регулярное выражение работает со строкой как с набором байт. Регулярное выражение не может содержать нулевые байты.
 Для шаблонов на поиск подстроки в строке, лучше используйте LIKE или position, так как они работают существенно быстрее.
 
-## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyhaystack-pattern1-pattern2-patternn}
+## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multimatchanyhaystack-pattern1-pattern2-patternn}
 
 То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее.
 
 :::note Примечание
 Длина любой строки из `haystack` должна быть меньше 2<sup>32</sup> байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API.
 :::
-## multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
+## multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn}
 
 То же, что и `multiMatchAny`, только возвращает любой индекс подходящего регулярного выражения.
 
-## multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn}
+## multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multimatchallindiceshaystack-pattern1-pattern2-patternn}
 
 То же, что и `multiMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке.
 
-## multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn}
+## multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn}
 
 То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами.
 
-## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn}
+## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn}
 
 То же, что и `multiFuzzyMatchAny`, только возвращает любой индекс подходящего регулярного выражения в пределах константного редакционного расстояния.
 
-## multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn}
+## multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\]) {#multifuzzymatchallindiceshaystack-distance-pattern1-pattern2-patternn}
 
 То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния.
 
diff --git a/docs/ru/sql-reference/functions/tuple-functions.md b/docs/ru/sql-reference/functions/tuple-functions.md
index c702e5d00b1..70ae44aa627 100644
--- a/docs/ru/sql-reference/functions/tuple-functions.md
+++ b/docs/ru/sql-reference/functions/tuple-functions.md
@@ -9,15 +9,15 @@ sidebar_label: Функции для работы с кортежами
 ## tuple {#tuple}
 
 Функция, позволяющая сгруппировать несколько столбцов.
-Для столбцов, имеющих типы T1, T2, … возвращает кортеж типа Tuple(T1, T2, …), содержащий эти столбцы. Выполнение функции ничего не стоит.
+Для столбцов, имеющих типы T1, T2, ... возвращает кортеж типа Tuple(T1, T2, ...), содержащий эти столбцы. Выполнение функции ничего не стоит.
 Кортежи обычно используются как промежуточное значение в качестве аргумента операторов IN, или для создания списка формальных параметров лямбда-функций. Кортежи не могут быть записаны в таблицу.
 
-С помощью функции реализуется оператор `(x, y, …)`.
+С помощью функции реализуется оператор `(x, y, ...)`.
 
 **Синтаксис**
 
 ``` sql
-tuple(x, y, …)
+tuple(x, y, ...)
 ```
 
 ## tupleElement {#tupleelement}
diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md
index 3c6e6151ef8..087891f4347 100644
--- a/docs/ru/sql-reference/functions/url-functions.md
+++ b/docs/ru/sql-reference/functions/url-functions.md
@@ -14,7 +14,7 @@ sidebar_label: "Функции для работы с URL"
 
 ### protocol {#protocol}
 
-Возвращает протокол. Примеры: http, ftp, mailto, magnet…
+Возвращает протокол. Примеры: http, ftp, mailto, magnet...
 
 ### domain {#domain}
 
diff --git a/docs/ru/sql-reference/statements/alter/comment.md b/docs/ru/sql-reference/statements/alter/comment.md
index 727af15d03e..f841c8540f3 100644
--- a/docs/ru/sql-reference/statements/alter/comment.md
+++ b/docs/ru/sql-reference/statements/alter/comment.md
@@ -4,7 +4,7 @@ sidebar_position: 51
 sidebar_label: COMMENT
 ---
 
-# ALTER TABLE … MODIFY COMMENT {#alter-modify-comment}
+# ALTER TABLE ... MODIFY COMMENT {#alter-modify-comment}
 
 Добавляет, изменяет или удаляет комментарий к таблице, независимо от того, был ли он установлен раньше или нет. Изменение комментария отражается как в системной таблице [system.tables](../../../operations/system-tables/tables.md), так и в результате выполнения запроса `SHOW CREATE TABLE`.
 
diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md
index dc968a17349..c91a79f5cdd 100644
--- a/docs/ru/sql-reference/statements/alter/delete.md
+++ b/docs/ru/sql-reference/statements/alter/delete.md
@@ -4,7 +4,7 @@ sidebar_position: 39
 sidebar_label: DELETE
 ---
 
-# ALTER TABLE … DELETE {#alter-mutations}
+# ALTER TABLE ... DELETE {#alter-mutations}
 
 ``` sql
 ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md
index 07f5ff0a298..e8b8af39e11 100644
--- a/docs/ru/sql-reference/statements/alter/index.md
+++ b/docs/ru/sql-reference/statements/alter/index.md
@@ -46,7 +46,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN
 
 ### Мутации {#mutations}
 
-Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией.
+Мутации - разновидность запроса ALTER, позволяющая изменять или удалять данные в таблице. В отличие от стандартных запросов [ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md) и [ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md), рассчитанных на точечное изменение данных, область применения мутаций - достаточно тяжёлые изменения, затрагивающие много строк в таблице. Поддержана для движков таблиц семейства [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md), в том числе для движков с репликацией.
 
 Конвертировать существующие таблицы для работы с мутациями не нужно. Но после применения первой мутации формат данных таблицы становится несовместимым с предыдущими версиями и откатиться на предыдущую версию уже не получится.
 
diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md
index b2032ac77d1..01574a8a9b7 100644
--- a/docs/ru/sql-reference/statements/alter/update.md
+++ b/docs/ru/sql-reference/statements/alter/update.md
@@ -4,7 +4,7 @@ sidebar_position: 40
 sidebar_label: UPDATE
 ---
 
-# ALTER TABLE … UPDATE {#alter-table-update-statements}
+# ALTER TABLE ... UPDATE {#alter-table-update-statements}
 
 ``` sql
 ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
diff --git a/docs/ru/sql-reference/statements/alter/view.md b/docs/ru/sql-reference/statements/alter/view.md
index e6f6730ff99..53e295f6bbe 100644
--- a/docs/ru/sql-reference/statements/alter/view.md
+++ b/docs/ru/sql-reference/statements/alter/view.md
@@ -4,9 +4,9 @@ sidebar_position: 50
 sidebar_label: VIEW
 ---
 
-# Выражение ALTER TABLE … MODIFY QUERY {#alter-modify-query}
+# Выражение ALTER TABLE ... MODIFY QUERY {#alter-modify-query}
 
-Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE … MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. 
+Вы можете изменить запрос `SELECT`, который был задан при создании [материализованного представления](../create/view.md#materialized), с помощью запроса 'ALTER TABLE ... MODIFY QUERY'. Используйте его если при создании материализованного представления не использовалась секция `TO [db.]name`. Настройка `allow_experimental_alter_materialized_view_structure` должна быть включена. 
 
 Если при создании материализованного представления использовалась конструкция `TO [db.]name`, то для изменения отсоедините представление с помощью [DETACH](../detach.md), измените таблицу с помощью [ALTER TABLE](index.md), а затем снова присоедините запрос с помощью [ATTACH](../attach.md).
 
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 032bdc6e6d4..8fa30446bb3 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -60,7 +60,7 @@ AS SELECT ...
 
 Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления.
 
-Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`.
+Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`... Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`.
 
 Выполнение запросов [ALTER](../../../sql-reference/statements/alter/view.md) над материализованными представлениями имеет свои особенности, поэтому эти запросы могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
 
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index 747e36b8809..309d4852b11 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -73,7 +73,7 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ;
 INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set
 ```
 
-Например, следующий формат запроса идентичен базовому варианту INSERT … VALUES:
+Например, следующий формат запроса идентичен базовому варианту INSERT ... VALUES:
 
 ``` sql
 INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ...
diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md
index 5331cf00728..546a674d41a 100644
--- a/docs/ru/sql-reference/table-functions/file.md
+++ b/docs/ru/sql-reference/table-functions/file.md
@@ -116,7 +116,7 @@ SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UIn
 
 **Пример**
 
-Запрос данных из файлов с именами `file000`, `file001`, … , `file999`:
+Запрос данных из файлов с именами `file000`, `file001`, ... , `file999`:
 
 ``` sql
 SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md
index fe40cb0c507..2847a95bf19 100644
--- a/docs/ru/sql-reference/table-functions/s3.md
+++ b/docs/ru/sql-reference/table-functions/s3.md
@@ -108,7 +108,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
 Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`.
 :::
 
-Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, ... , `file-999.csv`:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md
index 7afcc07c6fb..c91d8bcf4d1 100644
--- a/docs/zh/changelog/index.md
+++ b/docs/zh/changelog/index.md
@@ -190,7 +190,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7"
 -   如果在获取系统数据时发生了zookeeper异常。副本，将其显示在单独的列中。 这实现了 [#9137](https://github.com/ClickHouse/ClickHouse/issues/9137) [#9138](https://github.com/ClickHouse/ClickHouse/pull/9138) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))
 -   原子删除destroy上的MergeTree数据部分。 [#8402](https://github.com/ClickHouse/ClickHouse/pull/8402) ([Vladimir Chebotarev](https://github.com/excitoon))
 -   支持分布式表的行级安全性。 [#8926](https://github.com/ClickHouse/ClickHouse/pull/8926) ([伊万](https://github.com/abyss7))
--   Now we recognize suffix (like KB, KiB…) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb))
+-   Now we recognize suffix (like KB, KiB...) in settings values. [#8072](https://github.com/ClickHouse/ClickHouse/pull/8072) ([米哈伊尔\*科罗托夫](https://github.com/millb))
 -   在构建大型连接的结果时防止内存不足。 [#8637](https://github.com/ClickHouse/ClickHouse/pull/8637) ([Artem Zuikov](https://github.com/4ertus2))
 -   在交互模式下为建议添加群集名称 `clickhouse-client`. [#8709](https://github.com/ClickHouse/ClickHouse/pull/8709) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))
 -   Initialize query profiler for all threads in a group, e.g. it allows to fully profile insert-queries [#8820](https://github.com/ClickHouse/ClickHouse/pull/8820) ([伊万](https://github.com/abyss7))
@@ -523,7 +523,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7"
 -   现在后台在磁盘之间移动，运行它的seprate线程池。 [#7670](https://github.com/ClickHouse/ClickHouse/pull/7670) ([Vladimir Chebotarev](https://github.com/excitoon))
 -   `SYSTEM RELOAD DICTIONARY` 现在同步执行。 [#8240](https://github.com/ClickHouse/ClickHouse/pull/8240) ([维塔利\*巴拉诺夫](https://github.com/vitlibar))
 -   堆栈跟踪现在显示物理地址（对象文件中的偏移量），而不是虚拟内存地址（加载对象文件的位置）。 这允许使用 `addr2line` 当二进制独立于位置并且ASLR处于活动状态时。 这修复 [#8360](https://github.com/ClickHouse/ClickHouse/issues/8360). [#8387](https://github.com/ClickHouse/ClickHouse/pull/8387) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))
--   支持行级安全筛选器的新语法: `<table name='table_name'>…</table>`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7))
+-   支持行级安全筛选器的新语法: `<table name='table_name'>...</table>`. 修复 [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779). [#8381](https://github.com/ClickHouse/ClickHouse/pull/8381) ([伊万](https://github.com/abyss7))
 -   现在 `cityHash` 功能可以与工作 `Decimal` 和 `UUID` 类型。 修复 [#5184](https://github.com/ClickHouse/ClickHouse/issues/5184). [#7693](https://github.com/ClickHouse/ClickHouse/pull/7693) ([米哈伊尔\*科罗托夫](https://github.com/millb))
 -   从系统日志中删除了固定的索引粒度（它是1024），因为它在实现自适应粒度之后已经过时。 [#7698](https://github.com/ClickHouse/ClickHouse/pull/7698) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov))
 -   当ClickHouse在没有SSL的情况下编译时，启用MySQL兼容服务器。 [#7852](https://github.com/ClickHouse/ClickHouse/pull/7852) ([尤里\*巴拉诺夫](https://github.com/yurriy))
diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md
index c0a08291e02..724b22ad461 100644
--- a/docs/zh/development/style.md
+++ b/docs/zh/development/style.md
@@ -53,7 +53,7 @@ memcpy(&buf[place_value], &x, sizeof(x));
 for (size_t i = 0; i < rows; i += storage.index_granularity)
 ```
 
-**7.** 在二元运算符（`+`，`-`，`*`，`/`，`％`，…）和三元运算符 `?:` 周围添加空格。
+**7.** 在二元运算符（`+`，`-`，`*`，`/`，`％`，...）和三元运算符 `?:` 周围添加空格。
 
 ``` cpp
 UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
@@ -82,7 +82,7 @@ dst.ClickGoodEvent     = click.GoodEvent;
 
 如有必要，运算符可以包裹到下一行。 在这种情况下，它前面的偏移量增加。
 
-**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, …) 和参数。
+**11.** 不要使用空格来分开一元运算符 (`--`, `++`, `*`, `&`, ...) 和参数。
 
 **12.** 在逗号后面加一个空格，而不是在之前。同样的规则也适合 `for` 循环中的分号。
 
@@ -111,7 +111,7 @@ public:
 
 **16.** 如果对整个文件使用相同的 `namespace`，并且没有其他重要的东西，则 `namespace` 中不需要偏移量。
 
-**17.** 在 `if`, `for`, `while` 中包裹的代码块中，若代码是一个单行的 `statement`，那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`， `for`， `while`， …
+**17.** 在 `if`, `for`, `while` 中包裹的代码块中，若代码是一个单行的 `statement`，那么大括号是可选的。 可以将 `statement` 放到一行中。这个规则同样适用于嵌套的 `if`， `for`， `while`， ...
 
 但是如果内部 `statement` 包含大括号或 `else`，则外部块应该用大括号括起来。
 
@@ -262,7 +262,7 @@ void executeQuery(
 
 这个示例来源于 http://home.tamk.fi/~jaalto/course/coding-style/doc/unmaintainable-code/。
 
-**7.** 不要在每个文件的开头写入垃圾注释（作者，创建日期…）。
+**7.** 不要在每个文件的开头写入垃圾注释（作者，创建日期...）。
 
 **8.** 单行注释用三个斜杆： `///` ，多行注释以 `/**`开始。 这些注释会当做文档。
 
diff --git a/docs/zh/engines/table-engines/integrations/hdfs.md b/docs/zh/engines/table-engines/integrations/hdfs.md
index 55648afe407..be673b6ce92 100644
--- a/docs/zh/engines/table-engines/integrations/hdfs.md
+++ b/docs/zh/engines/table-engines/integrations/hdfs.md
@@ -103,7 +103,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs
 
 **示例**
 
-创建具有名为文件的表 `file000`, `file001`, … , `file999`:
+创建具有名为文件的表 `file000`, `file001`, ... , `file999`:
 
 ``` sql
 CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV')
diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md
index f2585decabf..f18814675c3 100644
--- a/docs/zh/engines/table-engines/integrations/s3.md
+++ b/docs/zh/engines/table-engines/integrations/s3.md
@@ -109,7 +109,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https:
 
 **示例**
 
-使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表:
+使用文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`来创建表:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV');
@@ -202,7 +202,7 @@ ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_p
 !!! warning "Warning"
     如果文件列表中包含有从0开头的数字范围，请对每个数字分别使用带括号的结构，或者使用`?`.
 
-4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表:
+4. 从文件`file-000.csv`, `file-001.csv`, ... , `file-999.csv`创建表:
 
 ``` sql
 CREATE TABLE big_table (name String, value UInt32)
diff --git a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md
index 4fecf4e5669..e283a4c7510 100644
--- a/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md
+++ b/docs/zh/engines/table-engines/mergetree-family/custom-partitioning-key.md
@@ -59,7 +59,7 @@ WHERE table = 'visits'
 └───────────┴────────────────┴────────┘
 ```
 
-`partition` 列存储分区的名称。此示例中有两个分区：`201901` 和 `201902`。在 [ALTER … PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。
+`partition` 列存储分区的名称。此示例中有两个分区：`201901` 和 `201902`。在 [ALTER ... PARTITION](#alter_manipulations-with-partitions) 语句中你可以使用该列值来指定分区名称。
 
 `name` 列为分区中数据片段的名称。在 [ALTER ATTACH PART](#alter_attach-partition) 语句中你可以使用此列值中来指定片段名称。
 
diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
index bfa69338657..67bd681269b 100644
--- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md
@@ -702,7 +702,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 - 插入（`INSERT`查询）
 - 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations)
 - 从另一个副本下载
-- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区
+- [ALTER TABLE ... FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区
 
 除了数据变异和冻结分区以外的情况下，数据按照以下逻辑存储到卷或磁盘上：
 
@@ -713,7 +713,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 
 在后台，数据片段基于剩余空间（`move_factor`参数）根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。
 
-用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷，所有后台移动的限制都会被考虑在内。这个查询会自行启动，无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足，用户会收到报错信息。
+用户可以通过 [ALTER TABLE ... MOVE PART\|PARTITION ... TO VOLUME\|DISK ...](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷，所有后台移动的限制都会被考虑在内。这个查询会自行启动，无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足，用户会收到报错信息。
 
 数据移动不会妨碍到数据复制。也就是说，同一张表的不同副本可以指定不同的存储策略。
 
diff --git a/docs/zh/engines/table-engines/special/external-data.md b/docs/zh/engines/table-engines/special/external-data.md
index 688e25402ab..06c6331b4f3 100644
--- a/docs/zh/engines/table-engines/special/external-data.md
+++ b/docs/zh/engines/table-engines/special/external-data.md
@@ -26,7 +26,7 @@ ClickHouse 允许向服务器发送处理查询所需的数据以及 SELECT 查
 以下的参数是可选的：**–name** – 表的名称，如果省略，则采用 _data。
 **–format** – 文件中的数据格式。 如果省略，则使用 TabSeparated。
 
-以下的参数必选一个：**–types** – 逗号分隔列类型的列表。例如：`UInt64,String`。列将被命名为 _1，_2，…
+以下的参数必选一个：**–types** – 逗号分隔列类型的列表。例如：`UInt64,String`。列将被命名为 _1，_2，...
 **–structure**– 表结构的格式 `UserID UInt64`，`URL String`。定义列的名字以及类型。
 
 在 «file» 中指定的文件将由 «format» 中指定的格式解析，使用在 «types» 或 «structure» 中指定的数据类型。该表将被上传到服务器，并在作为名称为 «name»临时表。
diff --git a/docs/zh/faq/general/olap.md b/docs/zh/faq/general/olap.md
index b014419578b..c4b36b138fa 100644
--- a/docs/zh/faq/general/olap.md
+++ b/docs/zh/faq/general/olap.md
@@ -10,13 +10,13 @@ sidebar_position: 100
 [OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward:
 
 Processing
-:   Some source data is processed…
+:   Some source data is processed...
 
 Analytical
-:   …to produce some analytical reports and insights…
+:   ...to produce some analytical reports and insights...
 
 Online
-:   …in real-time.
+:   ...in real-time.
 
 ## OLAP from the Business Perspective {#olap-from-the-business-perspective}
 
diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md
index 9c487140df3..ceeb6fbb9e0 100644
--- a/docs/zh/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md
@@ -196,7 +196,7 @@ real    75m56.214s
 
 （也可以直接使用`COPY ... TO PROGRAM`从Postgres中导入数据）
 
-数据中所有与天气相关的字段(precipitation……average_wind_speed)都填充了NULL。 所以，我们将从最终数据集中删除它们
+数据中所有与天气相关的字段(precipitation...average_wind_speed)都填充了NULL。 所以，我们将从最终数据集中删除它们
 
 首先，我们使用单台服务器创建表，后面我们将在多台节点上创建这些表。
 
diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
index ecfdcddbbe2..7d4c299b919 100644
--- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
+++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
@@ -212,7 +212,7 @@ ORDER BY year
 └──────┴─────────┴───────────────────────────────────────────────────────┘
 ```
 
-2020 年房价出事了！但这并不令人意外……
+2020 年房价出事了！但这并不令人意外...
 
 ### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods}
 
diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
index 758992e4084..975d5eb764c 100644
--- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
+++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md
@@ -371,7 +371,7 @@ UserID.bin，URL.bin，和EventTime.bin是<font face = "monospace">UserID</font>
 :::note
 - 最后一个索引条目(上图中的“mark 1082”)存储了上图中颗粒1082的主键列的最大值。
 
-- 索引条目(索引标记)不是基于表中的特定行，而是基于颗粒。例如，对于上图中的索引条目‘mark 0’，在我们的表中没有<font face = "monospace">UserID</font>为240.923且<font face = "monospace">URL</font>为“goal://metry=10000467796a411…”的行，相反，对于该表，有一个颗粒0，在该颗粒中，最小<font face = "monospace">UserID</font>值是240.923，最小<font face = "monospace">URL</font>值是“goal://metry=10000467796a411…”，这两个值来自不同的行。
+- 索引条目(索引标记)不是基于表中的特定行，而是基于颗粒。例如，对于上图中的索引条目‘mark 0’，在我们的表中没有<font face = "monospace">UserID</font>为240.923且<font face = "monospace">URL</font>为“goal://metry=10000467796a411...”的行，相反，对于该表，有一个颗粒0，在该颗粒中，最小<font face = "monospace">UserID</font>值是240.923，最小<font face = "monospace">URL</font>值是“goal://metry=10000467796a411...”，这两个值来自不同的行。
 
 - 主索引文件完全加载到主内存中。如果文件大于可用的空闲内存空间，则ClickHouse将发生错误。
 :::
diff --git a/docs/zh/index.md b/docs/zh/index.md
index fab00dbcd1b..ec4b6dce1f8 100644
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -16,7 +16,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 | #0 | 89354350662 | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
 | #1 | 90329509958 | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
 | #2 | 89953706054 | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
-| #N | …           | …          | …                  | …         | …                   |
+| #N | ...           | ...          | ...                  | ...         | ...                   |
 
 处于同一行中的数据总是被物理的存储在一起。
 
@@ -26,11 +26,11 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 
 | Row:        | #0                 | #1                 | #2                 | #N |
 |-------------|---------------------|---------------------|---------------------|-----|
-| WatchID:    | 89354350662         | 90329509958         | 89953706054         | …   |
-| JavaEnable: | 1                   | 0                   | 1                   | …   |
-| Title:      | Investor Relations  | Contact us          | Mission             | …   |
-| GoodEvent:  | 1                   | 1                   | 1                   | …   |
-| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | …   |
+| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ...   |
+| JavaEnable: | 1                   | 0                   | 1                   | ...   |
+| Title:      | Investor Relations  | Contact us          | Mission             | ...   |
+| GoodEvent:  | 1                   | 1                   | 1                   | ...   |
+| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ...   |
 
 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储，来自同一列的数据被存储在一起。
 
diff --git a/docs/zh/operations/settings/query-complexity.md b/docs/zh/operations/settings/query-complexity.md
index 124d5fa5d1a..b1b5ca75018 100644
--- a/docs/zh/operations/settings/query-complexity.md
+++ b/docs/zh/operations/settings/query-complexity.md
@@ -196,7 +196,7 @@ Restrictions on the «maximum amount of something» can take the value 0, which
 
 Limits the number of rows in the hash table that is used when joining tables.
 
-This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine.
+This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and the [Join](../../engines/table-engines/special/join.md) table engine.
 
 If a query contains multiple joins, ClickHouse checks this setting for every intermediate result.
 
@@ -213,7 +213,7 @@ Default value: 0.
 
 Limits the size in bytes of the hash table used when joining tables.
 
-This settings applies to [SELECT … JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
+This settings applies to [SELECT ... JOIN](../../sql-reference/statements/select/join.md#select-join) operations and [Join table engine](../../engines/table-engines/special/join.md).
 
 If the query contains joins, ClickHouse checks this setting for every intermediate result.
 
diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md
index c3b4194ed44..5e59196f56c 100644
--- a/docs/zh/operations/settings/settings.md
+++ b/docs/zh/operations/settings/settings.md
@@ -1002,7 +1002,7 @@ ClickHouse生成异常
 
 ## count_distinct_implementation {#settings-count_distinct_implementation}
 
-指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。
+指定其中的 `uniq*` 函数应用于执行 [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) 建筑。
 
 可能的值:
 
diff --git a/docs/zh/operations/system-tables/dictionaries.md b/docs/zh/operations/system-tables/dictionaries.md
index 0cf91e45e86..c7b1bdd04be 100644
--- a/docs/zh/operations/system-tables/dictionaries.md
+++ b/docs/zh/operations/system-tables/dictionaries.md
@@ -21,7 +21,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
     -   `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now.
 -   `origin` ([字符串](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary.
 -   `type` ([字符串](../../sql-reference/data-types/string.md)) — Type of dictionary allocation. [在内存中存储字典](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md).
--   `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, …, type n)”.
+-   `key` — [密钥类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-key):数字键 ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) or Сomposite key ([字符串](../../sql-reference/data-types/string.md)) — form “(type 1, type 2, ..., type n)”.
 -   `attribute.names` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Array of [属性名称](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 由字典提供。
 -   `attribute.types` ([阵列](../../sql-reference/data-types/array.md)([字符串](../../sql-reference/data-types/string.md))) — Corresponding array of [属性类型](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md#ext_dict_structure-attributes) 这是由字典提供。
 -   `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary.
diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
index cb1dcc35f5c..27d3375aebb 100644
--- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
@@ -80,7 +80,7 @@ FROM
 
 在这种情况下，您应该记住您不知道直方图bin边界。
 
-## sequenceMatch(pattern)(timestamp, cond1, cond2, …) {#function-sequencematch}
+## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch}
 
 检查序列是否包含与模式匹配的事件链。
 
@@ -167,7 +167,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
 
 -   [sequenceCount](#function-sequencecount)
 
-## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount}
+## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount}
 
 计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后，它开始搜索下一个链。
 
diff --git a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md
index 4dce65af1ed..253eb9ef82d 100644
--- a/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/zh/sql-reference/aggregate-functions/reference/quantiles.md
@@ -7,7 +7,7 @@ sidebar_position: 201
 
 **语法**
 ``` sql
-quantiles(level1, level2, …)(x)
+quantiles(level1, level2, ...)(x)
 ```
 
 所有分位数函数(quantile)也有相应的分位数(quantiles)函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。 这些函数一次计算所列的级别的所有分位数, 并返回结果值的数组。
diff --git a/docs/zh/sql-reference/data-types/aggregatefunction.md b/docs/zh/sql-reference/data-types/aggregatefunction.md
index e8f28b367a5..80648eb165b 100644
--- a/docs/zh/sql-reference/data-types/aggregatefunction.md
+++ b/docs/zh/sql-reference/data-types/aggregatefunction.md
@@ -1,7 +1,7 @@
 ---
 slug: /zh/sql-reference/data-types/aggregatefunction
 ---
-# AggregateFunction(name, types_of_arguments…) {#data-type-aggregatefunction}
+# AggregateFunction(name, types_of_arguments...) {#data-type-aggregatefunction}
 
 聚合函数的中间状态，可以通过聚合函数名称加`-State`后缀的形式得到它。与此同时，当您需要访问该类型的最终状态数据时，您需要以相同的聚合函数名加`-Merge`后缀的形式来得到最终状态数据。
 
diff --git a/docs/zh/sql-reference/data-types/domains/index.md b/docs/zh/sql-reference/data-types/domains/index.md
index c123b10f6fe..9f12018732b 100644
--- a/docs/zh/sql-reference/data-types/domains/index.md
+++ b/docs/zh/sql-reference/data-types/domains/index.md
@@ -19,9 +19,9 @@ Domain类型是特定实现的类型，它总是与某个现存的基础类型
 ### Domains的额外特性 {#domainsde-e-wai-te-xing}
 
 -   在执行SHOW CREATE TABLE 或 DESCRIBE TABLE时，其对应的列总是展示为Domain类型的名称
--   在INSERT INTO domain_table(domain_column) VALUES(…)中输入数据总是以更人性化的格式进行输入
+-   在INSERT INTO domain_table(domain_column) VALUES(...)中输入数据总是以更人性化的格式进行输入
 -   在SELECT domain_column FROM domain_table中数据总是以更人性化的格式输出
--   在INSERT INTO domain_table FORMAT CSV …中，实现外部源数据以更人性化的格式载入
+-   在INSERT INTO domain_table FORMAT CSV ...中，实现外部源数据以更人性化的格式载入
 
 ### Domains类型的限制 {#domainslei-xing-de-xian-zhi}
 
diff --git a/docs/zh/sql-reference/data-types/fixedstring.md b/docs/zh/sql-reference/data-types/fixedstring.md
index 633307938a9..d454e935fe7 100644
--- a/docs/zh/sql-reference/data-types/fixedstring.md
+++ b/docs/zh/sql-reference/data-types/fixedstring.md
@@ -18,8 +18,8 @@ slug: /zh/sql-reference/data-types/fixedstring
 可以有效存储在`FixedString`类型的列中的值的示例：
 
 -   二进制表示的IP地址（IPv6使用`FixedString(16)`）
--   语言代码（ru_RU, en_US … ）
--   货币代码（USD, RUB … ）
+-   语言代码（ru_RU, en_US ... ）
+-   货币代码（USD, RUB ... ）
 -   二进制表示的哈希值（MD5使用`FixedString(16)`，SHA256使用`FixedString(32)`）
 
 请使用[UUID](uuid.md)数据类型来存储UUID值，。
diff --git a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md
index 5ef8256b483..57b30de0881 100644
--- a/docs/zh/sql-reference/data-types/nested-data-structures/nested.md
+++ b/docs/zh/sql-reference/data-types/nested-data-structures/nested.md
@@ -1,7 +1,7 @@
 ---
 slug: /zh/sql-reference/data-types/nested-data-structures/nested
 ---
-# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}
+# Nested(Name1 Type1, Name2 Type2, ...) {#nestedname1-type1-name2-type2}
 
 嵌套数据结构类似于嵌套表。嵌套数据结构的参数（列名和类型）与 CREATE 查询类似。每个表可以包含任意多行嵌套数据结构。
 
diff --git a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md
index 601cb602a78..fbaa76365ec 100644
--- a/docs/zh/sql-reference/data-types/simpleaggregatefunction.md
+++ b/docs/zh/sql-reference/data-types/simpleaggregatefunction.md
@@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/simpleaggregatefunction
 ---
 # SimpleAggregateFunction {#data-type-simpleaggregatefunction}
 
-`SimpleAggregateFunction(name, types_of_arguments…)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果，可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果，所以我们不必存储和处理任何额外的数据。
+`SimpleAggregateFunction(name, types_of_arguments...)` 数据类型存储聚合函数的当前值, 并不像 [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) 那样存储其全部状态。这种优化可以应用于具有以下属性函数: 将函数 `f` 应用于行集合 `S1 UNION ALL S2` 的结果，可以通过将 `f` 分别应用于行集合的部分, 然后再将 `f` 应用于结果来获得: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`。 这个属性保证了部分聚合结果足以计算出合并的结果，所以我们不必存储和处理任何额外的数据。
 
 支持以下聚合函数:
 
diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md
index 004c80ff916..38813701c70 100644
--- a/docs/zh/sql-reference/data-types/tuple.md
+++ b/docs/zh/sql-reference/data-types/tuple.md
@@ -1,7 +1,7 @@
 ---
 slug: /zh/sql-reference/data-types/tuple
 ---
-# Tuple(T1, T2, …) {#tuplet1-t2}
+# Tuple(T1, T2, ...) {#tuplet1-t2}
 
 元组，其中每个元素都有单独的 [类型](index.md#data_types)。
 
diff --git a/docs/zh/sql-reference/functions/array-functions.md b/docs/zh/sql-reference/functions/array-functions.md
index d150b94b8af..69db34e4a36 100644
--- a/docs/zh/sql-reference/functions/array-functions.md
+++ b/docs/zh/sql-reference/functions/array-functions.md
@@ -152,7 +152,7 @@ SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2);
 └─────────────┴─────────────┴────────────────┴─────────────────┘
 ```
 
-## array(x1, …), operator \[x1, …\] {#arrayx1-operator-x1}
+## array(x1, ...), operator \[x1, ...\] {#arrayx1-operator-x1}
 
 使用函数的参数作为数组元素创建一个数组。
 参数必须是常量，并且具有最小公共类型的类型。必须至少传递一个参数，否则将不清楚要创建哪种类型的数组。也就是说，你不能使用这个函数来创建一个空数组（为此，使用上面描述的’emptyArray  \*’函数）。
@@ -337,7 +337,7 @@ SELECT indexOf([1, 3, NULL, NULL], NULL)
 
 设置为«NULL»的元素将作为普通的元素值处理。
 
-## arrayCount(\[func,\] arr1, …) {#array-count}
+## arrayCount(\[func,\] arr1, ...) {#array-count}
 
 `func`将arr数组作为参数，其返回结果为非零值的数量。如果未指定“func”，则返回数组中非零元素的数量。
 
@@ -363,7 +363,7 @@ SELECT countEqual([1, 2, NULL, NULL], NULL)
 
 ## arrayEnumerate(arr) {#array_functions-arrayenumerate}
 
-返回 Array \[1, 2, 3, …, length (arr) \]
+返回 Array \[1, 2, 3, ..., length (arr) \]
 
 此功能通常与ARRAY JOIN一起使用。它允许在应用ARRAY JOIN后为每个数组计算一次。例如：
 
@@ -403,7 +403,7 @@ WHERE (CounterID = 160656) AND notEmpty(GoalsReached)
 
 此功能也可用于高阶函数。例如，您可以使用它来获取与条件匹配的元素的数组索引。
 
-## arrayEnumerateUniq(arr, …) {#arrayenumerateuniqarr}
+## arrayEnumerateUniq(arr, ...) {#arrayenumerateuniqarr}
 
 返回与源数组大小相同的数组，其中每个元素表示与其下标对应的源数组元素在源数组中出现的次数。
 例如：arrayEnumerateUniq（ \[10,20,10,30 \]）=  \[1,1,2,1 \]。
@@ -621,7 +621,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res
 
 设置为«NULL»的数组元素作为普通的数组元素值处理。
 
-## arraySort(\[func,\] arr, …) {#array_functions-reverse-sort}
+## arraySort(\[func,\] arr, ...) {#array_functions-reverse-sort}
 
 以升序对`arr`数组的元素进行排序。如果指定了`func`函数，则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数，那么`arraySort`函数也将解析与`func`函数参数相同数量的数组参数。更详细的示例在`arraySort`的末尾。
 
@@ -721,7 +721,7 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
 !!! 注意 "注意"
     为了提高排序效率， 使用了[施瓦茨变换](https://en.wikipedia.org/wiki/Schwartzian_transform)。
 
-## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}
+## arrayReverseSort(\[func,\] arr, ...) {#array_functions-reverse-sort}
 
 以降序对`arr`数组的元素进行排序。如果指定了`func`函数，则排序顺序由`func`函数的调用结果决定。如果`func`接受多个参数，那么`arrayReverseSort`函数也将解析与`func`函数参数相同数量的数组作为参数。更详细的示例在`arrayReverseSort`的末尾。
 
@@ -822,7 +822,7 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
 └─────────┘
 ```
 
-## arrayUniq(arr, …) {#arrayuniqarr}
+## arrayUniq(arr, ...) {#arrayuniqarr}
 
 如果传递一个参数，则计算数组中不同元素的数量。
 如果传递了多个参数，则它计算多个数组中相应位置的不同元素元组的数量。
@@ -1221,7 +1221,7 @@ select arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1]);
 └───────────────────────────────────────────────┘
 ```
 
-## arrayMap(func, arr1, …) {#array-map}
+## arrayMap(func, arr1, ...) {#array-map}
 
 将从 `func` 函数的原始应用中获得的数组返回给 `arr` 数组中的每个元素。
 
@@ -1251,7 +1251,7 @@ SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res
 
 请注意，`arrayMap` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayFilter(func, arr1, …) {#array-filter}
+## arrayFilter(func, arr1, ...) {#array-filter}
 
 返回一个仅包含 `arr1` 中的元素的数组，其中 `func` 返回的值不是 0。
 
@@ -1284,7 +1284,7 @@ SELECT
 
 请注意，`arrayFilter`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayFill(func, arr1, …) {#array-fill}
+## arrayFill(func, arr1, ...) {#array-fill}
 
 从第一个元素到最后一个元素扫描`arr1`，如果`func`返回0，则用`arr1[i - 1]`替换`arr1[i]`。`arr1`的第一个元素不会被替换。
 
@@ -1302,7 +1302,7 @@ SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14,
 
 请注意，`arrayFill` 是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayReverseFill(func, arr1, …) {#array-reverse-fill}
+## arrayReverseFill(func, arr1, ...) {#array-reverse-fill}
 
 从最后一个元素到第一个元素扫描`arr1`，如果`func`返回0，则用`arr1[i + 1]`替换`arr1[i]`。`arr1`的最后一个元素不会被替换。
 
@@ -1320,7 +1320,7 @@ SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5,
 
 请注意，`arrayReverseFill`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arraySplit(func, arr1, …) {#array-split}
+## arraySplit(func, arr1, ...) {#array-split}
 
 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时，数组将在元素的左侧拆分。数组不会在第一个元素之前被拆分。
 
@@ -1338,7 +1338,7 @@ SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 请注意，`arraySplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayReverseSplit(func, arr1, …) {#array-reverse-split}
+## arrayReverseSplit(func, arr1, ...) {#array-reverse-split}
 
 将 `arr1` 拆分为多个数组。当 `func` 返回 0 以外的值时，数组将在元素的右侧拆分。数组不会在最后一个元素之后被拆分。
 
@@ -1356,37 +1356,37 @@ SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res
 
 请注意，`arrayReverseSplit`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。 您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
+## arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1}
 
 如果 `arr` 中至少有一个元素 `func` 返回 0 以外的值，则返回 1。否则，它返回 0。
 
 请注意，`arrayExists`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
+## arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1}
 
 如果 `func` 为 `arr` 中的所有元素返回 0 以外的值，则返回 1。否则，它返回 0。
 
 请注意，`arrayAll`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您可以将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayFirst(func, arr1, …) {#array-first}
+## arrayFirst(func, arr1, ...) {#array-first}
 
 返回 `arr1` 数组中 `func` 返回非 0 的值的第一个元素。
 
 请注意，`arrayFirst`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayLast(func, arr1, …) {#array-last}
+## arrayLast(func, arr1, ...) {#array-last}
 
 返回 `arr1` 数组中的最后一个元素，其中 `func` 返回的值不是 0。
 
 请注意，`arrayLast`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayFirstIndex(func, arr1, …) {#array-first-index}
+## arrayFirstIndex(func, arr1, ...) {#array-first-index}
 
 返回 `arr1` 数组中第一个元素的索引，其中 `func` 返回的值不是 0。
 
 请注意，`arrayFirstIndex`是一个[高阶函数](../../sql-reference/functions/index.md#higher-order-functions)。您必须将 lambda 函数作为第一个参数传递给它，并且不能省略。
 
-## arrayLastIndex(func, arr1, …) {#array-last-index}
+## arrayLastIndex(func, arr1, ...) {#array-last-index}
 
 返回 `arr1` 数组中最后一个元素的索引，其中 `func` 返回的值不是 0。
 
@@ -1612,7 +1612,7 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
 └─────┘
 ```
 
-## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
+## arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1}
 
 返回源数组中元素的部分和的数组（运行总和）。如果指定了 func 函数，则数组元素的值在求和之前由该函数转换。
 
diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md
index d6493ffe605..18b9f3495c0 100644
--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@@ -443,7 +443,7 @@ SELECT toStartOfSecond(dt64, 'Asia/Istanbul');
 `toISOWeek()`是一个兼容函数，等效于`toWeek(date,3)`。
 下表描述了mode参数的工作方式。
 
-| Mode | First day of week | Range | Week 1 is the first week …    |
+| Mode | First day of week | Range | Week 1 is the first week ...    |
 |------|-------------------|-------|-------------------------------|
 | 0    | Sunday            | 0-53  | with a Sunday in this year    |
 | 1    | Monday            | 0-53  | with 4 or more days this year |
diff --git a/docs/zh/sql-reference/functions/higher-order-functions.md b/docs/zh/sql-reference/functions/higher-order-functions.md
index 929dc6f3ea7..0e08f88bba1 100644
--- a/docs/zh/sql-reference/functions/higher-order-functions.md
+++ b/docs/zh/sql-reference/functions/higher-order-functions.md
@@ -15,13 +15,13 @@ slug: /zh/sql-reference/functions/higher-order-functions
 
 除了’arrayMap’和’arrayFilter’以外的所有其他函数，都可以省略第一个参数（lambda函数）。在这种情况下，默认返回数组元素本身。
 
-### arrayMap(func, arr1, …) {#higher_order_functions-array-map}
+### arrayMap(func, arr1, ...) {#higher_order_functions-array-map}
 
 将arr
 将从’func’函数的原始应用程序获得的数组返回到’arr’数组中的每个元素。
 返回从原始应用程序获得的数组 ‘func’ 函数中的每个元素 ‘arr’ 阵列。
 
-### arrayFilter(func, arr1, …) {#arrayfilterfunc-arr1}
+### arrayFilter(func, arr1, ...) {#arrayfilterfunc-arr1}
 
 返回一个仅包含以下元素的数组 ‘arr1’ 对于哪个 ‘func’ 返回0以外的内容。
 
@@ -48,31 +48,31 @@ SELECT
     │ [2] │
     └─────┘
 
-### arrayCount(\[func,\] arr1, …) {#arraycountfunc-arr1}
+### arrayCount(\[func,\] arr1, ...) {#arraycountfunc-arr1}
 
 返回数组arr中非零元素的数量，如果指定了’func’，则通过’func’的返回值确定元素是否为非零元素。
 
-### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1}
+### arrayExists(\[func,\] arr1, ...) {#arrayexistsfunc-arr1}
 
 返回数组’arr’中是否存在非零元素，如果指定了’func’，则使用’func’的返回值确定元素是否为非零元素。
 
-### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1}
+### arrayAll(\[func,\] arr1, ...) {#arrayallfunc-arr1}
 
 返回数组’arr’中是否存在为零的元素，如果指定了’func’，则使用’func’的返回值确定元素是否为零元素。
 
-### arraySum(\[func,\] arr1, …) {#arraysumfunc-arr1}
+### arraySum(\[func,\] arr1, ...) {#arraysumfunc-arr1}
 
 计算arr数组的总和，如果指定了’func’，则通过’func’的返回值计算数组的总和。
 
-### arrayFirst(func, arr1, …) {#arrayfirstfunc-arr1}
+### arrayFirst(func, arr1, ...) {#arrayfirstfunc-arr1}
 
 返回数组中第一个匹配的元素，函数使用’func’匹配所有元素，直到找到第一个匹配的元素。
 
-### arrayFirstIndex(func, arr1, …) {#arrayfirstindexfunc-arr1}
+### arrayFirstIndex(func, arr1, ...) {#arrayfirstindexfunc-arr1}
 
 返回数组中第一个匹配的元素的下标索引，函数使用’func’匹配所有元素，直到找到第一个匹配的元素。
 
-### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
+### arrayCumSum(\[func,\] arr1, ...) {#arraycumsumfunc-arr1}
 
 返回源数组部分数据的总和，如果指定了`func`函数，则使用`func`的返回值计算总和。
 
@@ -98,7 +98,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res
     │ [1,2,0,1] │
     └───────────┘
 
-### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1}
+### arraySort(\[func,\] arr1, ...) {#arraysortfunc-arr1}
 
 返回升序排序`arr1`的结果。如果指定了`func`函数，则排序顺序由`func`的结果决定。
 
@@ -124,7 +124,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])
     │ [1,2,3,4,nan,nan,NULL,NULL]                   │
     └───────────────────────────────────────────────┘
 
-### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1}
+### arrayReverseSort(\[func,\] arr1, ...) {#arrayreversesortfunc-arr1}
 
 返回降序排序`arr1`的结果。如果指定了`func`函数，则排序顺序由`func`的结果决定。
 
diff --git a/docs/zh/sql-reference/functions/in-functions.md b/docs/zh/sql-reference/functions/in-functions.md
index 346e076310e..9858159a495 100644
--- a/docs/zh/sql-reference/functions/in-functions.md
+++ b/docs/zh/sql-reference/functions/in-functions.md
@@ -10,10 +10,10 @@ sidebar_label: IN 运算符
 
 请参阅[IN 运算符](../../sql-reference/operators/in.md#select-in-operators)部分。
 
-## tuple(x, y, …), 运算符 (x, y, …) {#tuplex-y-operator-x-y}
+## tuple(x, y, ...), 运算符 (x, y, ...) {#tuplex-y-operator-x-y}
 
 函数用于对多个列进行分组。
-对于具有类型T1，T2，…的列，它返回包含这些列的元组（T1，T2，…）。 执行该函数没有任何成本。
+对于具有类型T1，T2，...的列，它返回包含这些列的元组（T1，T2，...）。 执行该函数没有任何成本。
 元组通常用作IN运算符的中间参数值，或用于创建lambda函数的形参列表。 元组不能写入表。
 
 ## tupleElement(tuple, n), 运算符 x.N {#tupleelementtuple-n-operator-x-n}
diff --git a/docs/zh/sql-reference/functions/json-functions.md b/docs/zh/sql-reference/functions/json-functions.md
index 52ec0ed1535..f07de564847 100644
--- a/docs/zh/sql-reference/functions/json-functions.md
+++ b/docs/zh/sql-reference/functions/json-functions.md
@@ -56,7 +56,7 @@ slug: /zh/sql-reference/functions/json-functions
 
 以下函数基于[simdjson](https://github.com/lemire/simdjson)，专为更复杂的JSON解析要求而设计。但上述假设2仍然适用。
 
-## JSONHas(json\[, indices_or_keys\]…) {#jsonhasjson-indices-or-keys}
+## JSONHas(json\[, indices_or_keys\]...) {#jsonhasjson-indices-or-keys}
 
 如果JSON中存在该值，则返回`1`。
 
@@ -83,7 +83,7 @@ slug: /zh/sql-reference/functions/json-functions
     select JSONExtractKey('{"a": "hello", "b": [-100, 200.0, 300]}', -2) = 'a'
     select JSONExtractString('{"a": "hello", "b": [-100, 200.0, 300]}', 1) = 'hello'
 
-## JSONLength(json\[, indices_or_keys\]…) {#jsonlengthjson-indices-or-keys}
+## JSONLength(json\[, indices_or_keys\]...) {#jsonlengthjson-indices-or-keys}
 
 返回JSON数组或JSON对象的长度。
 
@@ -94,7 +94,7 @@ slug: /zh/sql-reference/functions/json-functions
     select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 3
     select JSONLength('{"a": "hello", "b": [-100, 200.0, 300]}') = 2
 
-## JSONType(json\[, indices_or_keys\]…) {#jsontypejson-indices-or-keys}
+## JSONType(json\[, indices_or_keys\]...) {#jsontypejson-indices-or-keys}
 
 返回JSON值的类型。
 
@@ -106,13 +106,13 @@ slug: /zh/sql-reference/functions/json-functions
     select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'a') = 'String'
     select JSONType('{"a": "hello", "b": [-100, 200.0, 300]}', 'b') = 'Array'
 
-## JSONExtractUInt(json\[, indices_or_keys\]…) {#jsonextractuintjson-indices-or-keys}
+## JSONExtractUInt(json\[, indices_or_keys\]...) {#jsonextractuintjson-indices-or-keys}
 
-## JSONExtractInt(json\[, indices_or_keys\]…) {#jsonextractintjson-indices-or-keys}
+## JSONExtractInt(json\[, indices_or_keys\]...) {#jsonextractintjson-indices-or-keys}
 
-## JSONExtractFloat(json\[, indices_or_keys\]…) {#jsonextractfloatjson-indices-or-keys}
+## JSONExtractFloat(json\[, indices_or_keys\]...) {#jsonextractfloatjson-indices-or-keys}
 
-## JSONExtractBool(json\[, indices_or_keys\]…) {#jsonextractbooljson-indices-or-keys}
+## JSONExtractBool(json\[, indices_or_keys\]...) {#jsonextractbooljson-indices-or-keys}
 
 解析JSON并提取值。这些函数类似于`visitParam*`函数。
 
@@ -124,7 +124,7 @@ slug: /zh/sql-reference/functions/json-functions
     select JSONExtractFloat('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 2) = 200.0
     select JSONExtractUInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', -1) = 300
 
-## JSONExtractString(json\[, indices_or_keys\]…) {#jsonextractstringjson-indices-or-keys}
+## JSONExtractString(json\[, indices_or_keys\]...) {#jsonextractstringjson-indices-or-keys}
 
 解析JSON并提取字符串。此函数类似于`visitParamExtractString`函数。
 
@@ -140,11 +140,11 @@ slug: /zh/sql-reference/functions/json-functions
     select JSONExtractString('{"abc":"\\u263"}', 'abc') = ''
     select JSONExtractString('{"abc":"hello}', 'abc') = ''
 
-## JSONExtract(json\[, indices_or_keys…\], Return_type) {#jsonextractjson-indices-or-keys-return-type}
+## JSONExtract(json\[, indices_or_keys...\], Return_type) {#jsonextractjson-indices-or-keys-return-type}
 
 解析JSON并提取给定ClickHouse数据类型的值。
 
-这是以前的`JSONExtract<type>函数的变体。 这意味着`JSONExtract(…, ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(…, ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。
+这是以前的`JSONExtract<type>函数的变体。 这意味着`JSONExtract(..., ‘String’)`返回与`JSONExtractString()`返回完全相同。`JSONExtract(..., ‘Float64’)`返回于`JSONExtractFloat()\`返回完全相同。
 
 示例:
 
@@ -156,7 +156,7 @@ slug: /zh/sql-reference/functions/json-functions
     SELECT JSONExtract('{"day": "Thursday"}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Thursday'
     SELECT JSONExtract('{"day": 5}', 'day', 'Enum8(\'Sunday\' = 0, \'Monday\' = 1, \'Tuesday\' = 2, \'Wednesday\' = 3, \'Thursday\' = 4, \'Friday\' = 5, \'Saturday\' = 6)') = 'Friday'
 
-## JSONExtractKeysAndValues(json\[, indices_or_keys…\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
+## JSONExtractKeysAndValues(json\[, indices_or_keys...\], Value_type) {#jsonextractkeysandvaluesjson-indices-or-keys-value-type}
 
 从JSON中解析键值对，其中值是给定的ClickHouse数据类型。
 
@@ -164,7 +164,7 @@ slug: /zh/sql-reference/functions/json-functions
 
     SELECT JSONExtractKeysAndValues('{"x": {"a": 5, "b": 7, "c": 11}}', 'x', 'Int8') = [('a',5),('b',7),('c',11)];
 
-## JSONExtractRaw(json\[, indices_or_keys\]…) {#jsonextractrawjson-indices-or-keys}
+## JSONExtractRaw(json\[, indices_or_keys\]...) {#jsonextractrawjson-indices-or-keys}
 
 返回JSON的部分。
 
diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md
index 2eeaad63694..9c28ff867c5 100644
--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@@ -90,7 +90,7 @@ SELECT 'some-file-name' AS a, basename(a)
 将一个常量列变为一个非常量列。
 在ClickHouse中，非常量列和常量列在内存中的表示方式不同。尽管函数对于常量列和非常量总是返回相同的结果，但它们的工作方式可能完全不同（执行不同的代码）。此函数用于调试这种行为。
 
-## ignore(…) {#ignore}
+## ignore(...) {#ignore}
 
 接受任何参数，包括`NULL`。始终返回0。
 但是，函数的参数总是被计算的。该函数可以用于基准测试。
diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md
index d1914839d7c..c28735c7dc7 100644
--- a/docs/zh/sql-reference/functions/string-functions.md
+++ b/docs/zh/sql-reference/functions/string-functions.md
@@ -95,7 +95,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
 
 以Unicode字符为单位反转UTF-8编码的字符串。如果字符串不是UTF-8编码，则可能获取到一个非预期的结果（不会抛出异常）。
 
-## format(pattern, s0, s1, …) {#formatpattern-s0-s1}
+## format(pattern, s0, s1, ...) {#formatpattern-s0-s1}
 
 使用常量字符串`pattern`格式化其他参数。`pattern`字符串中包含由大括号`{}`包围的«替换字段»。 未被包含在大括号中的任何内容都被视为文本内容，它将原样保留在返回值中。 如果你需要在文本内容中包含一个大括号字符，它可以通过加倍来转义：`{{ '{{' }}`和`{{ '{{' }} '}}' }}`。 字段名称可以是数字（从零开始）或空（然后将它们视为连续数字）
 
@@ -113,11 +113,11 @@ SELECT format('{} {}', 'Hello', 'World')
 └───────────────────────────────────┘
 ```
 
-## concat(s1, s2, …) {#concat-s1-s2}
+## concat(s1, s2, ...) {#concat-s1-s2}
 
 将参数中的多个字符串拼接，不带分隔符。
 
-## concatAssumeInjective(s1, s2, …) {#concatassumeinjectives1-s2}
+## concatAssumeInjective(s1, s2, ...) {#concatassumeinjectives1-s2}
 
 与[concat](#concat-s1-s2)相同，区别在于，你需要保证concat(s1, s2, s3) -\> s4是单射的，它将用于GROUP BY的优化。
 
diff --git a/docs/zh/sql-reference/functions/string-search-functions.md b/docs/zh/sql-reference/functions/string-search-functions.md
index 972fd84e2a1..8ada76eeeda 100644
--- a/docs/zh/sql-reference/functions/string-search-functions.md
+++ b/docs/zh/sql-reference/functions/string-search-functions.md
@@ -204,7 +204,7 @@ SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']);
 **语法**
 
 ```sql
-multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN])
+multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
 ```
 
 ## multiSearchFirstIndex
@@ -216,7 +216,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, …, needleN])
 **语法**
 
 ```sql
-multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, needle<sub>n</sub>\])
+multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, ..., needle<sub>n</sub>\])
 ```
 
 ## multiSearchAny {#multisearchany}
@@ -229,7 +229,7 @@ multiSearchFirstIndex(haystack, \[needle<sub>1</sub>, needle<sub>2</sub>, …, n
 **语法**
 
 ```sql
-multiSearchAny(haystack, [needle1, needle2, …, needleN])
+multiSearchAny(haystack, [needle1, needle2, ..., needleN])
 ```
 
 ## match {#match}
@@ -273,7 +273,7 @@ Hyperscan 通常容易受到正则表达式拒绝服务 (ReDoS) 攻击。有关
 **语法**
 
 ```sql
-multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiMatchAnyIndex
@@ -283,7 +283,7 @@ multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern
 **语法**
 
 ```sql
-multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiMatchAllIndices
@@ -293,7 +293,7 @@ multiMatchAnyIndex(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pa
 **语法**
 
 ```sql
-multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAny
@@ -307,7 +307,7 @@ multiMatchAllIndices(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …,
 **语法**
 
 ```sql
-multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAnyIndex
@@ -317,7 +317,7 @@ multiFuzzyMatchAny(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub
 **语法**
 
 ```sql
-multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## multiFuzzyMatchAllIndices
@@ -327,7 +327,7 @@ multiFuzzyMatchAnyIndex(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2
 **语法**
 
 ```sql
-multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
+multiFuzzyMatchAllIndices(haystack, distance, \[pattern<sub>1</sub>, pattern<sub>2</sub>, ..., pattern<sub>n</sub>\])
 ```
 
 ## extract
diff --git a/docs/zh/sql-reference/functions/url-functions.md b/docs/zh/sql-reference/functions/url-functions.md
index 44880b6ca1a..e7a0354c0bf 100644
--- a/docs/zh/sql-reference/functions/url-functions.md
+++ b/docs/zh/sql-reference/functions/url-functions.md
@@ -11,7 +11,7 @@ slug: /zh/sql-reference/functions/url-functions
 
 ### 协议 {#protocol}
 
-返回URL的协议。例如： http、ftp、mailto、magnet…
+返回URL的协议。例如： http、ftp、mailto、magnet...
 
 ### 域 {#domain}
 
diff --git a/docs/zh/sql-reference/statements/alter/delete.md b/docs/zh/sql-reference/statements/alter/delete.md
index 5eb77c35a93..f0b41c4e214 100644
--- a/docs/zh/sql-reference/statements/alter/delete.md
+++ b/docs/zh/sql-reference/statements/alter/delete.md
@@ -4,7 +4,7 @@ sidebar_position: 39
 sidebar_label: DELETE
 ---
 
-# ALTER TABLE … DELETE 语句 {#alter-mutations}
+# ALTER TABLE ... DELETE 语句 {#alter-mutations}
 
 ``` sql
 ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md
index e173837a16c..2286dcccd13 100644
--- a/docs/zh/sql-reference/statements/alter/index.md
+++ b/docs/zh/sql-reference/statements/alter/index.md
@@ -38,7 +38,7 @@ sidebar_label: ALTER
 
 ## Mutations 突变 {#mutations}
 
-用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的，最明显的是[ALTER TABLE … DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE … UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程，类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并，产生新的“突变”版本的部件。
+用来操作表数据的ALTER查询是通过一种叫做“突变”的机制来实现的，最明显的是[ALTER TABLE ... DELETE](../../../sql-reference/statements/alter/delete.md)和[ALTER TABLE ... UPDATE](../../../sql-reference/statements/alter/update.md)。它们是异步的后台进程，类似于[MergeTree](../../../engines/table-engines/mergetree-family/index.md)表的合并，产生新的“突变”版本的部件。
 
 
diff --git a/docs/zh/sql-reference/statements/alter/update.md b/docs/zh/sql-reference/statements/alter/update.md
index 97b2b43d889..7cf37401dc5 100644
--- a/docs/zh/sql-reference/statements/alter/update.md
+++ b/docs/zh/sql-reference/statements/alter/update.md
@@ -4,7 +4,7 @@ sidebar_position: 40
 sidebar_label: UPDATE
 ---
 
-# ALTER TABLE … UPDATE 语句 {#alter-table-update-statements}
+# ALTER TABLE ... UPDATE 语句 {#alter-table-update-statements}
 
 ``` sql
 ALTER TABLE [db.]table UPDATE column1 = expr1 [, ...] WHERE filter_expr
diff --git a/docs/zh/sql-reference/statements/alter/view.md b/docs/zh/sql-reference/statements/alter/view.md
index 34a612803c1..a19d918612a 100644
--- a/docs/zh/sql-reference/statements/alter/view.md
+++ b/docs/zh/sql-reference/statements/alter/view.md
@@ -4,9 +4,9 @@ sidebar_position: 50
 sidebar_label: VIEW
 ---
 
-# ALTER TABLE … MODIFY QUERY 语句 {#alter-modify-query}
+# ALTER TABLE ... MODIFY QUERY 语句 {#alter-modify-query}
 
-当使用`ALTER TABLE … MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时，可以修改`SELECT`查询。当物化视图在没有  `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。
+当使用`ALTER TABLE ... MODIFY QUERY`语句创建一个[物化视图](../create/view.md#materialized)时，可以修改`SELECT`查询。当物化视图在没有  `TO [db.]name` 的情况下创建时使用它。必须启用 `allow_experimental_alter_materialized_view_structure`设置。
 
 如果一个物化视图使用`TO [db.]name`，你必须先 [DETACH](../detach.mdx) 视图。用[ALTER TABLE](index.md)修改目标表，然后 [ATTACH](../attach.mdx)之前分离的(`DETACH`)视图。
 
diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md
index bce0994ecd2..49a1d66bdf1 100644
--- a/docs/zh/sql-reference/statements/create/view.md
+++ b/docs/zh/sql-reference/statements/create/view.md
@@ -55,7 +55,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中
 
 如果指定`POPULATE`，则在创建视图时将现有表数据插入到视图中，就像创建一个`CREATE TABLE ... AS SELECT ...`一样。 否则，查询仅包含创建视图后插入表中的数据。 我们**不建议**使用POPULATE，因为在创建视图期间插入表中的数据不会插入其中。
 
-`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`……请注意，相应的转换是在每个插入数据块上独立执行的。 例如，如果设置了`GROUP BY`，则在插入期间聚合数据，但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`，例如`SummingMergeTree`。
+`SELECT` 查询可以包含`DISTINCT`、`GROUP BY`、`ORDER BY`、`LIMIT`...请注意，相应的转换是在每个插入数据块上独立执行的。 例如，如果设置了`GROUP BY`，则在插入期间聚合数据，但仅在插入数据的单个数据包内。 数据不会被进一步聚合。 例外情况是使用独立执行数据聚合的`ENGINE`，例如`SummingMergeTree`。
 
 在物化视图上执行[ALTER](../../../sql-reference/statements/alter/index.md)查询有局限性，因此可能不方便。 如果物化视图使用构造`TO [db.]name`，你可以`DETACH`视图，为目标表运行`ALTER`，然后`ATTACH`先前分离的（`DETACH`）视图。
 
diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md
index f80c0a8a8ea..a08a78b6f1d 100644
--- a/docs/zh/sql-reference/statements/insert-into.md
+++ b/docs/zh/sql-reference/statements/insert-into.md
@@ -68,7 +68,7 @@ SELECT * FROM insert_select_testtable;
 INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set
 ```
 
-例如，下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同：
+例如，下面的查询所使用的输入格式就与上面INSERT ... VALUES的中使用的输入格式相同：
 
 ``` sql
 INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ...
diff --git a/docs/zh/sql-reference/statements/select/limit.md b/docs/zh/sql-reference/statements/select/limit.md
index 2bbf2949707..795f3f4ecd1 100644
--- a/docs/zh/sql-reference/statements/select/limit.md
+++ b/docs/zh/sql-reference/statements/select/limit.md
@@ -13,11 +13,11 @@ sidebar_label: LIMIT
 
 如果没有 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句显式排序结果，结果的行选择可能是任意的和非确定性的。
 
-## LIMIT … WITH TIES 修饰符 {#limit-with-ties}
+## LIMIT ... WITH TIES 修饰符 {#limit-with-ties}
 
 如果为 `LIMIT n[,m]` 设置了 `WITH TIES` ，并且声明了 `ORDER BY expr_list`, 除了得到无修饰符的结果（正常情况下的 `limit n`, 前n行数据), 还会返回与第`n`行具有相同排序字段的行(即如果第n+1行的字段与第n行 拥有相同的排序字段，同样返回该结果.
 
-此修饰符可以与： [ORDER BY … WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用.
+此修饰符可以与： [ORDER BY ... WITH FILL modifier](../../../sql-reference/statements/select/order-by.md#orderby-with-fill) 组合使用.
 
 例如以下查询：
 
diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md
index 3286fc9f9e7..2f2d9a4959c 100644
--- a/docs/zh/sql-reference/statements/select/order-by.md
+++ b/docs/zh/sql-reference/statements/select/order-by.md
@@ -89,7 +89,7 @@ SELECT a, b, c FROM t ORDER BY a, b, c
 
 ## ORDER BY Expr WITH FILL Modifier {#orderby-with-fill}
 
-此修饰符可以与 [LIMIT … WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用.
+此修饰符可以与 [LIMIT ... WITH TIES modifier](../../../sql-reference/statements/select/limit.md#limit-with-ties) 进行组合使用.
 
 可以在`ORDER BY expr`之后用可选的`FROM expr`，`TO expr`和`STEP expr`参数来设置`WITH FILL`修饰符。
 所有`expr`列的缺失值将被顺序填充，而其他列将被填充为默认值。
diff --git a/docs/zh/sql-reference/table-functions/file.md b/docs/zh/sql-reference/table-functions/file.md
index 28682255738..fa1ec12f7df 100644
--- a/docs/zh/sql-reference/table-functions/file.md
+++ b/docs/zh/sql-reference/table-functions/file.md
@@ -114,7 +114,7 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
 
 **示例**
 
-从名为 `file000`, `file001`, … , `file999`的文件中查询数据:
+从名为 `file000`, `file001`, ... , `file999`的文件中查询数据:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/zh/sql-reference/table-functions/hdfs.md b/docs/zh/sql-reference/table-functions/hdfs.md
index b10b10ae2d2..f8320d8d0bb 100644
--- a/docs/zh/sql-reference/table-functions/hdfs.md
+++ b/docs/zh/sql-reference/table-functions/hdfs.md
@@ -84,7 +84,7 @@ FROM hdfs('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV', 'name String, value U
 
 **示例**
 
-从名为 `file000`, `file001`, … , `file999`的文件中查询数据:
+从名为 `file000`, `file001`, ... , `file999`的文件中查询数据:
 
 ``` sql
 SELECT count(*)
diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md
index f7384a7526e..4f2c7299d95 100644
--- a/docs/zh/sql-reference/table-functions/s3.md
+++ b/docs/zh/sql-reference/table-functions/s3.md
@@ -99,7 +99,7 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi
 !!! warning "Warning"
     如果文件列表中包含有从零开头的数字范围，请对每个数字分别使用带括号的结构，或者使用`?`。
 
-计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数:
+计算名为 `file-000.csv`, `file-001.csv`, ... , `file-999.csv` 文件的总行数:
 
 ``` sql
 SELECT count(*)

From 713764f62fa92db1fab04dcb426682b4859d6de1 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 14:01:00 +0200
Subject: [PATCH 277/392] Add missing space before link

---
 docs/en/sql-reference/functions/other-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 4501d1f43d3..829d46df9fa 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -37,7 +37,7 @@ getMacro(name);
 
 **Returned value**
 
-- Value of the specified macro.[String](../../sql-reference/data-types/string.md).
+- Value of the specified macro. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 

From dd7f3d1ba23bf2e18545ece2675f9836d84d7f69 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 May 2024 14:11:30 +0200
Subject: [PATCH 278/392] Fix test

---
 tests/integration/test_storage_s3/test.py | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index dc929b7db46..09b27fff1e8 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -1816,27 +1816,13 @@ def test_schema_inference_cache(started_cluster):
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(
-            instance,
-            files,
-            storage_name,
-            started_cluster,
-            bucket,
-            4 if storage_name == "url" else 1,
-        )
+        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4)
 
         instance.query("system drop schema cache")
         check_cache(instance, [])
 
         run_describe_query(instance, files, storage_name, started_cluster, bucket)
-        check_cache_misses(
-            instance,
-            files,
-            storage_name,
-            started_cluster,
-            bucket,
-            4 if storage_name == "url" else 1,
-        )
+        check_cache_misses(instance, files, storage_name, started_cluster, bucket, 4)
 
         instance.query("system drop schema cache")
 

From 147516f1626f656da5fc4dcc0d9254202a8de860 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 16 Apr 2024 13:05:07 +0000
Subject: [PATCH 279/392] Fix AST fuzzer failure

---
 src/Functions/FunctionHelpers.cpp                      |  2 ++
 src/Functions/splitByRegexp.cpp                        | 10 ++++------
 .../0_stateless/01866_split_by_regexp.reference        |  1 +
 tests/queries/0_stateless/01866_split_by_regexp.sql    |  3 +++
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index d85bb0e7060..3b057779ffe 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -21,6 +21,8 @@ namespace ErrorCodes
 
 const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
 {
+    if (!column)
+        return {};
     if (!isColumnConst(*column))
         return {};
 
diff --git a/src/Functions/splitByRegexp.cpp b/src/Functions/splitByRegexp.cpp
index e28fe9c38bb..042db97794d 100644
--- a/src/Functions/splitByRegexp.cpp
+++ b/src/Functions/splitByRegexp.cpp
@@ -164,6 +164,7 @@ public:
     String getName() const override { return name; }
     size_t getNumberOfArguments() const override { return SplitByRegexpImpl::getNumberOfArguments(); }
     bool isVariadic() const override { return SplitByRegexpImpl::isVariadic(); }
+    /// ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return SplitByRegexpImpl::getArgumentsThatAreAlwaysConstant(); }
 
     FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
     {
@@ -182,14 +183,11 @@ public:
 private:
     bool patternIsTrivialChar(const ColumnsWithTypeAndName & arguments) const
     {
+        if (!arguments[0].column.get())
+            return false;
         const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
         if (!col)
-            throw Exception(
-                ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of first argument of function {}. "
-                "Must be constant string.",
-                arguments[0].column->getName(),
-                getName());
+            return false;
 
         String pattern = col->getValue<String>();
         if (pattern.size() == 1)
diff --git a/tests/queries/0_stateless/01866_split_by_regexp.reference b/tests/queries/0_stateless/01866_split_by_regexp.reference
index 62939940545..552d4d1f96a 100644
--- a/tests/queries/0_stateless/01866_split_by_regexp.reference
+++ b/tests/queries/0_stateless/01866_split_by_regexp.reference
@@ -17,3 +17,4 @@ Test fallback of splitByRegexp to splitByChar if regexp is trivial
 ['a','b','c']
 ['a|b|c']
 ['a\\b\\c']
+AST Fuzzer failure
diff --git a/tests/queries/0_stateless/01866_split_by_regexp.sql b/tests/queries/0_stateless/01866_split_by_regexp.sql
index 570bd1ba5c0..bc25d3e1093 100644
--- a/tests/queries/0_stateless/01866_split_by_regexp.sql
+++ b/tests/queries/0_stateless/01866_split_by_regexp.sql
@@ -20,3 +20,6 @@ select splitByRegexp('{', 'a{b{c');
 select splitByRegexp('}', 'a}b}c');
 select splitByRegexp('|', 'a|b|c');
 select splitByRegexp('\\', 'a\\b\\c');
+
+SELECT 'AST Fuzzer failure';
+SELECT splitByRegexp(materialize(1), NULL, 3) -- { serverError ILLEGAL_COLUMN }

From b1fe9ab5f0aa24408321382e9651517f7808a478 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 23 May 2024 15:33:21 +0200
Subject: [PATCH 280/392] CI: dependency fix for changelog.py

 #do_not_test
---
 tests/ci/ci.py            |  3 ++-
 tests/ci/github_helper.py | 10 +++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index be922a306e1..99555b06bbf 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -45,6 +45,7 @@ from env_helper import (
     S3_BUILDS_BUCKET,
     TEMP_PATH,
     GITHUB_RUN_ID,
+    GITHUB_REPOSITORY,
 )
 from get_robot_token import get_best_robot_token
 from git_helper import GIT_PREFIX, Git
@@ -1913,7 +1914,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None:
         print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!")
     else:
         print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]")
-        GitHub.cancel_wf(run_id)
+        GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), run_id)
 
 
 def main() -> int:
diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py
index 81603c66bae..eb0f6c24527 100644
--- a/tests/ci/github_helper.py
+++ b/tests/ci/github_helper.py
@@ -22,9 +22,6 @@ from github.NamedUser import NamedUser as NamedUser
 from github.PullRequest import PullRequest as PullRequest
 from github.Repository import Repository as Repository
 
-from env_helper import GITHUB_REPOSITORY
-from get_robot_token import get_best_robot_token
-
 # pylint: enable=useless-import-alias
 
 CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache")
@@ -265,12 +262,11 @@ class GitHub(github.Github):
         assert isinstance(value, int)
         self._retries = value
 
-    # minimalistic static methods not using pygithub
+    # static methods not using pygithub
     @staticmethod
-    def cancel_wf(run_id, strict=False):
-        token = get_best_robot_token()
+    def cancel_wf(repo, run_id, token, strict=False):
         headers = {"Authorization": f"token {token}"}
-        url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/actions/runs/{run_id}/cancel"
+        url = f"https://api.github.com/repos/{repo}/actions/runs/{run_id}/cancel"
         try:
             response = requests.post(url, headers=headers, timeout=10)
             response.raise_for_status()

From 6e3a609907192d7cc378fb209d0e2431b8859eb0 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 15:43:17 +0200
Subject: [PATCH 281/392] Fix formatting in ru/index.md

---
 docs/ru/index.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/ru/index.md b/docs/ru/index.md
index d551d492af5..02be8912b94 100644
--- a/docs/ru/index.md
+++ b/docs/ru/index.md
@@ -12,10 +12,10 @@ ClickHouse — столбцовая система управления база
 
 | Строка | WatchID     | JavaEnable | Title              | GoodEvent | EventTime           |
 |--------|-------------|------------|--------------------|-----------|---------------------|
-| #0    | 89354350662 | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
-| #1    | 90329509958 | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
-| #2    | 89953706054 | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
-| #N    | ...           | ...          | ...                  | ...         | ...                   |
+| #0     | 89354350662 | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
+| #1     | 90329509958 | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
+| #2     | 89953706054 | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
+| #N     | ...         | ...        | ...                | ...       | ...                 |
 
 То есть, значения, относящиеся к одной строке, физически хранятся рядом.
 
@@ -24,13 +24,13 @@ ClickHouse — столбцовая система управления база
 
 В столбцовых СУБД данные хранятся в таком порядке:
 
-| Строка:     | #0                 | #1                 | #2                 | #N |
+| Строка:     | #0                  | #1                  | #2                  | #N  |
 |-------------|---------------------|---------------------|---------------------|-----|
-| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ...   |
-| JavaEnable: | 1                   | 0                   | 1                   | ...   |
-| Title:      | Investor Relations  | Contact us          | Mission             | ...   |
-| GoodEvent:  | 1                   | 1                   | 1                   | ...   |
-| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ...   |
+| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ... |
+| JavaEnable: | 1                   | 0                   | 1                   | ... |
+| Title:      | Investor Relations  | Contact us          | Mission             | ... |
+| GoodEvent:  | 1                   | 1                   | 1                   | ... |
+| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... |
 
 В примерах изображён только порядок расположения данных.
 То есть значения из разных столбцов хранятся отдельно, а данные одного столбца — вместе.

From e24253c097ed2f0325c9be77fc87ebbe8f086a5c Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 15:45:26 +0200
Subject: [PATCH 282/392] Fix formatting in zh/index.md

---
 docs/zh/index.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/zh/index.md b/docs/zh/index.md
index ec4b6dce1f8..c092f296722 100644
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -13,10 +13,10 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 
 | Row | WatchID     | JavaEnable | Title              | GoodEvent | EventTime           |
 |-----|-------------|------------|--------------------|-----------|---------------------|
-| #0 | 89354350662 | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
-| #1 | 90329509958 | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
-| #2 | 89953706054 | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
-| #N | ...           | ...          | ...                  | ...         | ...                   |
+| #0 | 89354350662  | 1          | Investor Relations | 1         | 2016-05-18 05:19:20 |
+| #1 | 90329509958  | 0          | Contact us         | 1         | 2016-05-18 08:10:20 |
+| #2 | 89953706054  | 1          | Mission            | 1         | 2016-05-18 07:38:00 |
+| #N | ...          | ...        | ...                | ...       | ...                 |
 
 处于同一行中的数据总是被物理的存储在一起。
 
@@ -24,13 +24,13 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS)
 
 在列式数据库系统中，数据按如下的顺序存储：
 
-| Row:        | #0                 | #1                 | #2                 | #N |
+| Row:        | #0                  | #1                  | #2                  | #N |
 |-------------|---------------------|---------------------|---------------------|-----|
-| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ...   |
-| JavaEnable: | 1                   | 0                   | 1                   | ...   |
-| Title:      | Investor Relations  | Contact us          | Mission             | ...   |
-| GoodEvent:  | 1                   | 1                   | 1                   | ...   |
-| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ...   |
+| WatchID:    | 89354350662         | 90329509958         | 89953706054         | ... |
+| JavaEnable: | 1                   | 0                   | 1                   | ... |
+| Title:      | Investor Relations  | Contact us          | Mission             | ... |
+| GoodEvent:  | 1                   | 1                   | 1                   | ... |
+| EventTime:  | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | ... |
 
 这些示例只显示了数据的排列顺序。来自不同列的值被单独存储，来自同一列的数据被存储在一起。
 

From 87b4d43a3f93864c122f7fe2451c696720207809 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 15:48:20 +0200
Subject: [PATCH 283/392] Update return type formatting

---
 .../functions/arithmetic-functions.md         |   8 +-
 .../functions/array-functions.md              |  86 +++----
 .../sql-reference/functions/bit-functions.md  |  24 +-
 .../functions/bitmap-functions.md             |  22 +-
 .../functions/date-time-functions.md          | 222 +++++-------------
 .../functions/distance-functions.md           |  58 ++---
 .../functions/encoding-functions.md           |  38 +--
 .../functions/ext-dict-functions.md           |  24 +-
 .../sql-reference/functions/hash-functions.md | 134 +++--------
 .../sql-reference/functions/introspection.md  |  29 +--
 .../functions/ip-address-functions.md         |  20 +-
 .../sql-reference/functions/json-functions.md |  22 +-
 .../sql-reference/functions/math-functions.md |   4 +-
 .../functions/other-functions.md              | 140 ++++-------
 .../functions/random-functions.md             |  56 ++---
 .../functions/rounding-functions.md           |   2 +-
 .../functions/splitting-merging-functions.md  |  57 +++--
 .../functions/string-functions.md             | 100 ++------
 .../functions/string-search-functions.md      |  64 ++---
 .../functions/time-series-functions.md        |  14 +-
 .../functions/time-window-functions.md        |   8 +-
 .../functions/tuple-functions.md              |  36 +--
 .../functions/tuple-map-functions.md          |  16 +-
 .../functions/type-conversion-functions.md    |   8 +-
 .../sql-reference/functions/ulid-functions.md |   4 +-
 .../sql-reference/functions/url-functions.md  |  36 +--
 .../sql-reference/functions/uuid-functions.md |   8 +-
 27 files changed, 369 insertions(+), 871 deletions(-)

diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 6d95f3dc358..aef4150ff50 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -320,9 +320,7 @@ multiplyDecimal(a, b[, result_scale])
 
 **Returned value**
 
-- The result of multiplication with given scale.
-
-Type: [Decimal256](../../sql-reference/data-types/decimal.md).
+- The result of multiplication with given scale. [Decimal256](../../sql-reference/data-types/decimal.md).
 
 **Example**
 
@@ -396,9 +394,7 @@ divideDecimal(a, b[, result_scale])
 
 **Returned value**
 
-- The result of division with given scale.
-
-Type: [Decimal256](../../sql-reference/data-types/decimal.md).
+- The result of division with given scale. [Decimal256](../../sql-reference/data-types/decimal.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 87e733a4b0c..512874d20b7 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -30,9 +30,7 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid-
 
 **Returned value**
 
-- Returns `1` for an empty array or `0` for a non-empty array.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for an empty array or `0` for a non-empty array. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -74,9 +72,7 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu
 
 **Returned value**
 
-- Returns `1` for a non-empty array or `0` for an empty array.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for a non-empty array or `0` for an empty array. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -797,9 +793,11 @@ The sizes of the two vectors must be equal. Arrays and Tuples may also contain m
 
 **Returned value**
 
-- The dot product of the two vectors.
+- The dot product of the two vectors. [Numeric](https://clickhouse.com/docs/en/native-protocol/columns#numeric-types).
 
-Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype.
+:::note
+The return type is determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype.
+:::
 
 **Examples**
 
@@ -1186,9 +1184,7 @@ arrayShingles(array, length)
 
 **Returned value**
 
-- An array of generated shingles.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- An array of generated shingles. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -1562,9 +1558,7 @@ arrayDifference(array)
 
 **Returned values**
 
-Returns an array of differences between adjacent array elements.
-
-Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
+Returns an array of differences between adjacent array elements. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
 
 **Example**
 
@@ -1841,9 +1835,7 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN)
 
 **Returned value**
 
-- Array containing results of the aggregate function over specified ranges.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- Array containing results of the aggregate function over specified ranges. [Array](../../sql-reference/data-types/array.md).
 
 **Example**
 
@@ -1986,9 +1978,7 @@ arrayCompact(arr)
 
 **Returned value**
 
-The array without duplicate.
-
-Type: `Array`.
+The array without duplicate. [Array](../data-types/array.md).
 
 **Example**
 
@@ -2024,9 +2014,7 @@ The function can take any number of arrays of different types. All the input arr
 
 **Returned value**
 
-- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../../sql-reference/data-types/array.md).
 
 **Example**
 
@@ -2383,7 +2371,8 @@ arrayMin([func,] arr)
 
 - The minimum of function values (or the array minimum).
 
-Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
+:::note
+If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements.
 
 **Examples**
 
@@ -2438,7 +2427,9 @@ arrayMax([func,] arr)
 
 - The maximum of function values (or the array maximum).
 
-Type: if `func` is specified, matches `func` return value type, else matches the array elements type.
+:::note
+if `func` is specified then the return type matches the return value type of `func`, otherwise it matches the type of the array elements.
+:::
 
 **Examples**
 
@@ -2493,7 +2484,14 @@ arraySum([func,] arr)
 
 - The sum of the function values (or the array sum).
 
-Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
+:::note
+Return type:
+
+- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md).
+- For floating point numbers — [Float64](../../sql-reference/data-types/float.md).
+- For numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md). 
+- For numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
+:::
 
 **Examples**
 
@@ -2546,9 +2544,7 @@ arrayAvg([func,] arr)
 
 **Returned value**
 
-- The average of function values (or the array average).
-
-Type: [Float64](../../sql-reference/data-types/float.md).
+- The average of function values (or the array average). [Float64](../../sql-reference/data-types/float.md).
 
 **Examples**
 
@@ -2596,9 +2592,7 @@ arrayCumSum(arr)
 
 **Returned value**
 
-- Returns an array of the partial sums of the elements in the source array.
-
-Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
+- Returns an array of the partial sums of the elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
 
 Example:
 
@@ -2630,9 +2624,7 @@ arrayCumSumNonNegative(arr)
 
 **Returned value**
 
-- Returns an array of non-negative partial sums of elements in the source array.
-
-Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
+- Returns an array of non-negative partial sums of elements in the source array. [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/).
 
 ``` sql
 SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res
@@ -2662,9 +2654,7 @@ arrayProduct(arr)
 
 **Returned value**
 
-- A product of array's elements.
-
-Type: [Float64](../../sql-reference/data-types/float.md).
+- A product of array's elements. [Float64](../../sql-reference/data-types/float.md).
 
 **Examples**
 
@@ -2714,9 +2704,7 @@ arrayRotateLeft(arr, n)
 
 **Returned value**
 
-- An array rotated to the left by the specified number of elements.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- An array rotated to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -2780,9 +2768,7 @@ arrayRotateRight(arr, n)
 
 **Returned value**
 
-- An array rotated to the right by the specified number of elements.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- An array rotated to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -2848,9 +2834,7 @@ arrayShiftLeft(arr, n[, default])
 
 **Returned value**
 
-- An array shifted to the left by the specified number of elements.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- An array shifted to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -2944,9 +2928,7 @@ arrayShiftRight(arr, n[, default])
 
 **Returned value**
 
-- An array shifted to the right by the specified number of elements.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- An array shifted to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -3038,9 +3020,7 @@ arrayRandomSample(arr, samples)
 
 **Returned Value**
 
-- An array containing a random sample of elements from the input array.
-
-Type: [Array](../data-types/array.md).
+- An array containing a random sample of elements from the input array. [Array](../data-types/array.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 0951c783aae..709f438d67f 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -188,9 +188,7 @@ SELECT bitTest(number, index)
 
 **Returned values**
 
-Returns a value of bit at specified position.
-
-Type: `UInt8`.
+Returns a value of bit at specified position. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -253,9 +251,7 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...)
 
 **Returned values**
 
-Returns result of logical conjuction.
-
-Type: `UInt8`.
+Returns result of logical conjuction. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -318,9 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
 
 **Returned values**
 
-Returns result of logical disjunction.
-
-Type: `UInt8`.
+Returns result of logical disjunction. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -372,11 +366,11 @@ bitCount(x)
 
 **Returned value**
 
-- Number of bits set to one in the input number.
+- Number of bits set to one in the input number. [UInt8](../data-types/int-uint.md).
 
-The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`.
-
-Type: `UInt8`.
+:::note
+The function does not convert the input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`.
+:::
 
 **Example**
 
@@ -413,9 +407,7 @@ bitHammingDistance(int1, int2)
 
 **Returned value**
 
-- The Hamming distance.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- The Hamming distance. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index 379be302881..e546de039da 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md).
-- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
+- `range_end` – End of the range (exclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md).
-- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
+- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -134,9 +134,9 @@ subBitmap(bitmap, offset, cardinality_limit)
 
 **Arguments**
 
-- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild).
-- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
-- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild).
+- `offset` – The position of the first element of the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
+- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -163,14 +163,12 @@ bitmapContains(bitmap, needle)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- `needle` – Searched bit value. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- 0 — If `bitmap` does not contain `needle`.
-- 1 — If `bitmap` contains `needle`.
-
-Type: `UInt8`.
+- 0 — If `bitmap` does not contain `needle`. [UInt8](../data-types/int-uint.md).
+- 1 — If `bitmap` contains `needle`. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 843f22e5a6f..7de402d2349 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -50,9 +50,7 @@ Alias:
 
 **Returned value**
 
-- A date created from the arguments.
-
-Type: [Date](../../sql-reference/data-types/date.md).
+- A date created from the arguments. [Date](../../sql-reference/data-types/date.md).
 
 **Example**
 
@@ -109,9 +107,7 @@ makeDateTime(year, month, day, hour, minute, second[, timezone])
 
 **Returned value**
 
-- A date with time created from the arguments.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- A date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -152,7 +148,7 @@ Alias: `TIMESTAMP`
 
 **Arguments**
 
-- `expr` - Date or date with time. Type: [String](../../sql-reference/data-types/string.md).
+- `expr` - Date or date with time. [String](../../sql-reference/data-types/string.md).
 - `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
@@ -200,9 +196,7 @@ Alias: `timezone`.
 
 **Returned value**
 
-- Timezone.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Timezone. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -237,9 +231,7 @@ Alias: `serverTimezone`.
 
 **Returned value**
 
--   Timezone.
-
-Type: [String](../../sql-reference/data-types/string.md).
+-   Timezone. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -278,9 +270,7 @@ Alias: `toTimezone`.
 
 **Returned value**
 
-- Date and time.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -336,9 +326,7 @@ Alias: `timezoneOf`.
 
 **Returned value**
 
-- Timezone name.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Timezone name. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -373,9 +361,7 @@ Alias: `timezoneOffset`.
 
 **Returned value**
 
-- Offset from UTC in seconds.
-
-Type: [Int32](../../sql-reference/data-types/int-uint.md).
+- Offset from UTC in seconds. [Int32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -410,9 +396,7 @@ Alias: `YEAR`
 
 **Returned value**
 
-- The year of the given date/time
-
-Type: `UInt16`
+- The year of the given date/time. [UInt16](../data-types/int-uint.md).
 
 **Example**
 
@@ -446,9 +430,7 @@ Alias: `QUARTER`
 
 **Returned value**
 
-- The quarter of the year (1, 2, 3 or 4) of the given date/time
-
-Type: `UInt8`
+- The quarter of the year (1, 2, 3 or 4) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -482,9 +464,7 @@ Alias: `MONTH`
 
 **Returned value**
 
-- The month of the year (1 - 12) of the given date/time
-
-Type: `UInt8`
+- The month of the year (1 - 12) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -518,9 +498,7 @@ Alias: `DAYOFYEAR`
 
 **Returned value**
 
-- The day of the year (1 - 366) of the given date/time
-
-Type: `UInt16`
+- The day of the year (1 - 366) of the given date/time. [UInt16](../data-types/int-uint.md).
 
 **Example**
 
@@ -554,9 +532,7 @@ Aliases: `DAYOFMONTH`, `DAY`
 
 **Returned value**
 
-- The day of the month (1 - 31) of the given date/time
-
-Type: `UInt8`
+- The day of the month (1 - 31) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -643,9 +619,7 @@ Alias: `HOUR`
 
 **Returned value**
 
-- The hour of the day (0 - 23) of the given date/time
-
-Type: `UInt8`
+- The hour of the day (0 - 23) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -679,9 +653,7 @@ Alias: `MINUTE`
 
 **Returned value**
 
-- The minute of the hour (0 - 59) of the given date/time
-
-Type: `UInt8`
+- The minute of the hour (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -715,9 +687,7 @@ Alias: `SECOND`
 
 **Returned value**
 
-- The second in the minute (0 - 59) of the given date/time
-
-Type: `UInt8`
+- The second in the minute (0 - 59) of the given date/time. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -763,9 +733,7 @@ Result:
 
 **Returned value**
 
-- The millisecond in the minute (0 - 59) of the given date/time
-
-Type: `UInt16`
+- The millisecond in the minute (0 - 59) of the given date/time. [UInt16](../data-types/int-uint.md).
 
 ## toUnixTimestamp
 
@@ -782,9 +750,7 @@ toUnixTimestamp(str, [timezone])
 
 **Returned value**
 
-- Returns the unix timestamp.
-
-Type: `UInt32`.
+- Returns the unix timestamp. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -842,9 +808,7 @@ toStartOfYear(value)
 
 **Returned value**
 
-- The first day of the year of the input date/time
-
-Type: `Date`
+- The first day of the year of the input date/time. [Date](../data-types/date.md).
 
 **Example**
 
@@ -876,9 +840,7 @@ toStartOfISOYear(value)
 
 **Returned value**
 
-- The first day of the year of the input date/time
-
-Type: `Date`
+- The first day of the year of the input date/time. [Date](../data-types/date.md).
 
 **Example**
 
@@ -911,9 +873,7 @@ toStartOfQuarter(value)
 
 **Returned value**
 
-- The first day of the quarter of the given date/time
-
-Type: `Date`
+- The first day of the quarter of the given date/time. [Date](../data-types/date.md).
 
 **Example**
 
@@ -945,9 +905,7 @@ toStartOfMonth(value)
 
 **Returned value**
 
-- The first day of the month of the given date/time
-
-Type: `Date`
+- The first day of the month of the given date/time. [Date](../data-types/date.md).
 
 **Example**
 
@@ -985,9 +943,7 @@ Alias: `LAST_DAY`
 
 **Returned value**
 
-- The last day of the month of the given date/time
-
-Type: `Date`
+- The last day of the month of the given date/time=. [Date](../data-types/date.md).
 
 **Example**
 
@@ -1019,9 +975,7 @@ toMonday(value)
 
 **Returned value**
 
-- The date of the nearest Monday on or prior to the given date
-
-Type: `Date`
+- The date of the nearest Monday on or prior to the given date. [Date](../data-types/date.md).
 
 **Example**
 
@@ -1057,9 +1011,7 @@ toStartOfWeek(t[, mode[, timezone]])
 
 **Returned value**
 
-- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode
-
-Type: `Date`
+- The date of the nearest Sunday or Monday on or prior to the given date, depending on the mode. [Date](../data-types/date.md).
 
 **Example**
 
@@ -1102,9 +1054,7 @@ toLastDayOfWeek(t[, mode[, timezone]])
 
 **Returned value**
 
-- The date of the nearest Sunday or Monday on or after the given date, depending on the mode
-
-Type: `Date`
+- The date of the nearest Sunday or Monday on or after the given date, depending on the mode. [Date](../data-types/date.md).
 
 **Example**
 
@@ -1144,9 +1094,7 @@ toStartOfDay(value)
 
 **Returned value**
 
-- The start of the day of the given date/time
-
-Type: `DateTime`
+- The start of the day of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1178,9 +1126,7 @@ toStartOfHour(value)
 
 **Returned value**
 
-- The start of the hour of the given date/time
-
-Type: `DateTime`
+- The start of the hour of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1214,9 +1160,7 @@ toStartOfMinute(value)
 
 **Returned value**
 
-- The start of the minute of the given date/time
-
-Type: `DateTime`
+- The start of the minute of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1253,9 +1197,7 @@ toStartOfSecond(value, [timezone])
 
 **Returned value**
 
-- Input value without sub-seconds.
-
-Type: [DateTime64](../../sql-reference/data-types/datetime64.md).
+- Input value without sub-seconds. [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Examples**
 
@@ -1309,9 +1251,7 @@ toStartOfFiveMinutes(value)
 
 **Returned value**
 
-- The start of the five-minute interval of the given date/time
-
-Type: `DateTime`
+- The start of the five-minute interval of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1349,9 +1289,7 @@ toStartOfTenMinutes(value)
 
 **Returned value**
 
-- The start of the ten-minute interval of the given date/time
-
-Type: `DateTime`
+- The start of the ten-minute interval of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1389,9 +1327,7 @@ toStartOfFifteenMinutes(value)
 
 **Returned value**
 
-- The start of the fifteen-minute interval of the given date/time
-
-Type: `DateTime`
+- The start of the fifteen-minute interval of the given date/time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1603,9 +1539,7 @@ Alias: `TO_DAYS`
 
 **Returned value**
 
-The number of days passed since date 0000-01-01.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+The number of days passed since date 0000-01-01. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1645,9 +1579,7 @@ Alias: `FROM_DAYS`
 
 **Returned value**
 
-The date corresponding to the number of days passed since year zero.
-
-Type: [Date](../../sql-reference/data-types/date.md).
+The date corresponding to the number of days passed since year zero. [Date](../../sql-reference/data-types/date.md).
 
 **Example**
 
@@ -1709,9 +1641,7 @@ age('unit', startdate, enddate, [timezone])
 
 **Returned value**
 
-Difference between `enddate` and `startdate` expressed in `unit`.
-
-Type: [Int](../../sql-reference/data-types/int-uint.md).
+Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1787,9 +1717,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
 
 **Returned value**
 
-Difference between `enddate` and `startdate` expressed in `unit`.
-
-Type: [Int](../../sql-reference/data-types/int-uint.md).
+Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1858,9 +1786,7 @@ Alias: `dateTrunc`.
 
 **Returned value**
 
-- Value, truncated to the specified part of date.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- Value, truncated to the specified part of date. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -1935,9 +1861,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
 **Returned value**
 
-Date or date with time obtained by adding `value`, expressed in `unit`, to `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2012,9 +1936,7 @@ Aliases: `dateSub`, `DATE_SUB`.
 
 **Returned value**
 
-Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2079,9 +2001,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
 
 **Returned value**
 
-Date or date with time with the specified `value` expressed in `unit` added to `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2130,9 +2050,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
 **Returned value**
 
-Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2167,9 +2085,7 @@ addDate(date, interval)
 
 **Returned value**
 
-Date or date with time obtained by adding `interval` to `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by adding `interval` to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2210,9 +2126,7 @@ subDate(date, interval)
 
 **Returned value**
 
-Date or date with time obtained by subtracting `interval` from `date`.
-
-Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `interval` from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2252,9 +2166,7 @@ now([timezone])
 
 **Returned value**
 
-- Current date and time.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date and time. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -2303,9 +2215,7 @@ now64([scale], [timezone])
 
 **Returned value**
 
-- Current date and time with sub-second precision.
-
-Type: [DateTime64](../../sql-reference/data-types/datetime64.md).
+- Current date and time with sub-second precision. [DateTime64](../../sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -2339,9 +2249,7 @@ nowInBlock([timezone])
 
 **Returned value**
 
-- Current date and time at the moment of processing of each block of data.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date and time at the moment of processing of each block of data. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -2381,9 +2289,7 @@ today()
 
 **Returned value**
 
-- Current date
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -2491,9 +2397,7 @@ YYYYMMDDToDate(yyyymmdd);
 
 **Returned value**
 
-- a date created from the arguments.
-
-Type: [Date](../../sql-reference/data-types/date.md).
+- a date created from the arguments. [Date](../../sql-reference/data-types/date.md).
 
 **Example**
 
@@ -2534,9 +2438,7 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
 
 **Returned value**
 
-- a date with time created from the arguments.
-
-Type: [DateTime](../../sql-reference/data-types/datetime.md).
+- a date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md).
 
 **Example**
 
@@ -3743,9 +3645,7 @@ dateName(date_part, date)
 
 **Returned value**
 
-- The specified part of date.
-
-Type: [String](../../sql-reference/data-types/string.md#string)
+- The specified part of date. [String](../../sql-reference/data-types/string.md#string)
 
 **Example**
 
@@ -3781,9 +3681,7 @@ monthName(date)
 
 **Returned value**
 
-- The name of the month.
-
-Type: [String](../../sql-reference/data-types/string.md#string)
+- The name of the month. [String](../../sql-reference/data-types/string.md#string)
 
 **Example**
 
@@ -3878,9 +3776,7 @@ toModifiedJulianDay(date)
 
 **Returned value**
 
-- Modified Julian Day number.
-
-Type: [Int32](../../sql-reference/data-types/int-uint.md).
+- Modified Julian Day number. [Int32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -3912,9 +3808,7 @@ toModifiedJulianDayOrNull(date)
 
 **Returned value**
 
-- Modified Julian Day number.
-
-Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md).
+- Modified Julian Day number. [Nullable(Int32)](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -3946,9 +3840,7 @@ fromModifiedJulianDay(day)
 
 **Returned value**
 
-- Date in text form.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- Date in text form. [String](../../sql-reference/data-types/string.md)
 
 **Example**
 
@@ -3980,9 +3872,7 @@ fromModifiedJulianDayOrNull(day)
 
 **Returned value**
 
-- Date in text form.
-
-Type: [Nullable(String)](../../sql-reference/data-types/string.md)
+- Date in text form. [Nullable(String)](../../sql-reference/data-types/string.md)
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md
index 5f3514049c7..9fda491ac50 100644
--- a/docs/en/sql-reference/functions/distance-functions.md
+++ b/docs/en/sql-reference/functions/distance-functions.md
@@ -24,9 +24,7 @@ Alias: `normL1`.
 
 **Returned value**
 
-- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance.
-
-Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
 
 **Examples**
 
@@ -62,9 +60,7 @@ Alias: `normL2`.
 
 **Returned value**
 
-- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance).
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -99,9 +95,7 @@ Alias: `normL2Squared`.
 
 **Returned value**
 
-- L2-norm squared.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- L2-norm squared. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -137,9 +131,7 @@ Alias: `normLinf`.
 
 **Returned value**
 
-- Linf-norm or the maximum absolute value.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- Linf-norm or the maximum absolute value. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -176,9 +168,7 @@ Alias: `normLp`.
 
 **Returned value**
 
-- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -215,9 +205,7 @@ Alias: `distanceL1`.
 
 **Returned value**
 
-- 1-norm distance.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- 1-norm distance. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -254,9 +242,7 @@ Alias: `distanceL2`.
 
 **Returned value**
 
-- 2-norm distance.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- 2-norm distance. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -293,7 +279,7 @@ Alias: `distanceL2Squared`.
 
 **Returned value**
 
-Type: [Float](../../sql-reference/data-types/float.md).
+- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -330,9 +316,7 @@ Alias: `distanceLinf`.
 
 **Returned value**
 
-- Infinity-norm distance.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- Infinity-norm distance. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -370,9 +354,7 @@ Alias: `distanceLp`.
 
 **Returned value**
 
-- p-norm distance.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- p-norm distance. [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -409,9 +391,7 @@ Alias: `normalizeL1`.
 
 **Returned value**
 
-- Unit vector.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -447,9 +427,7 @@ Alias: `normalizeL1`.
 
 **Returned value**
 
-- Unit vector.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -485,9 +463,7 @@ Alias: `normalizeLinf `.
 
 **Returned value**
 
-- Unit vector.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -524,9 +500,7 @@ Alias: `normalizeLp `.
 
 **Returned value**
 
-- Unit vector.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -561,9 +535,7 @@ cosineDistance(vector1, vector2)
 
 **Returned value**
 
-- Cosine of the angle between two vectors subtracted from one.
-
-Type: [Float](../../sql-reference/data-types/float.md).
+- Cosine of the angle between two vectors subtracted from one. [Float](../../sql-reference/data-types/float.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index 4f6da764b3c..bc64fdea427 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -22,9 +22,7 @@ char(number_1, [number_2, ..., number_n]);
 
 **Returned value**
 
-- a string of given bytes.
-
-Type: `String`.
+- a string of given bytes. [String](../data-types/string.md).
 
 **Example**
 
@@ -102,9 +100,7 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str
 
 **Returned value**
 
-- A string with the hexadecimal representation of the argument.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A string with the hexadecimal representation of the argument. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -185,15 +181,13 @@ unhex(arg)
 
 **Arguments**
 
-- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `arg` — A string containing any number of hexadecimal digits. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md).
 
 Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex().
 
 **Returned value**
 
-- A binary string (BLOB).
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A binary string (BLOB). [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -251,9 +245,7 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str
 
 **Returned value**
 
-- A string with the binary representation of the argument.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A string with the binary representation of the argument. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -342,9 +334,7 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to
 
 **Returned value**
 
-- A binary string (BLOB).
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A binary string (BLOB). [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -400,9 +390,7 @@ bitPositionsToArray(arg)
 
 **Returned value**
 
-- An array containing a list of positions of bits that equal `1`, in ascending order.
-
-Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -458,9 +446,7 @@ mortonEncode(args)
 
 **Returned value**
 
-- A UInt64 code
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md)
 
 **Example**
 
@@ -500,9 +486,7 @@ Note: when using columns for `args` the provided `range_mask` tuple should still
 
 **Returned value**
 
-- A UInt64 code
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md)
 
 
 **Example**
@@ -621,9 +605,7 @@ mortonDecode(tuple_size, code)
 
 **Returned value**
 
-- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. [UInt64](../../sql-reference/data-types/int-uint.md)
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 4149afce044..41657aafbbe 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -243,10 +243,8 @@ dictHas('dict_name', id_expr)
 
 **Returned value**
 
-- 0, if there is no key.
-- 1, if there is a key.
-
-Type: `UInt8`.
+- 0, if there is no key. [UInt8](../data-types/int-uint.md).
+- 1, if there is a key. [UInt8](../data-types/int-uint.md).
 
 ## dictGetHierarchy
 
@@ -265,9 +263,7 @@ dictGetHierarchy('dict_name', key)
 
 **Returned value**
 
-- Parents for the key.
-
-Type: [Array(UInt64)](../../sql-reference/data-types/array.md).
+- Parents for the key. [Array(UInt64)](../../sql-reference/data-types/array.md).
 
 ## dictIsIn
 
@@ -285,10 +281,8 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr)
 
 **Returned value**
 
-- 0, if `child_id_expr` is not a child of `ancestor_id_expr`.
-- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`.
-
-Type: `UInt8`.
+- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. [UInt8](../data-types/int-uint.md).
+- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. [UInt8](../data-types/int-uint.md).
 
 ## dictGetChildren
 
@@ -307,9 +301,7 @@ dictGetChildren(dict_name, key)
 
 **Returned values**
 
-- First-level descendants for the key.
-
-Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- First-level descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -357,9 +349,7 @@ dictGetDescendants(dict_name, key, level)
 
 **Returned values**
 
-- Descendants for the key.
-
-Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- Descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 1cd7eeb7c83..89b95888f85 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -341,9 +341,7 @@ Even in these cases, we recommend applying the function offline and pre-calculat
 
 **Returned value**
 
-- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64).
-
-Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
 
 **Example**
 
@@ -381,9 +379,7 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust
 
 **Return value**
 
-- BLAKE3 hash as a byte array with type FixedString(32).
-
-Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
 
 **Example**
 
@@ -540,9 +536,7 @@ This is just [JavaHash](#javahash) with zeroed out sign bit. This function is us
 
 **Returned value**
 
-A `Int32` data type hash value.
-
-Type: `hiveHash`.
+- `hiveHash` hash value. [Int32](../data-types/int-uint.md).
 
 **Example**
 
@@ -679,9 +673,7 @@ gccMurmurHash(par1, ...)
 
 **Returned value**
 
-- Calculated hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Calculated hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -718,9 +710,7 @@ MurmurHash(par1, ...)
 
 **Returned value**
 
-- Calculated hash value.
-
-Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+- Calculated hash value. [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -786,9 +776,7 @@ murmurHash3_128(expr)
 
 **Returned value**
 
-A 128-bit `MurmurHash3` hash value.
-
-Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `MurmurHash3` hash value. [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
 
 **Example**
 
@@ -822,9 +810,7 @@ xxh3(expr)
 
 **Returned value**
 
-A 64-bit `xxh3` hash value.
-
-Type:  [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+A 64-bit `xxh3` hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -856,9 +842,11 @@ SELECT xxHash64('')
 
 **Returned value**
 
-A `UInt32` or `UInt64` data type hash value.
+- Hash value. [UInt32/64](../data-types/int-uint.md).  
 
-Type: `UInt32` for `xxHash32` and `UInt64` for `xxHash64`.
+note:::
+The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`.
+:::
 
 **Example**
 
@@ -899,9 +887,7 @@ ngramSimHash(string[, ngramsize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -938,9 +924,7 @@ ngramSimHashCaseInsensitive(string[, ngramsize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -977,9 +961,7 @@ ngramSimHashUTF8(string[, ngramsize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1016,9 +998,7 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1055,9 +1035,7 @@ wordShingleSimHash(string[, shinglesize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1094,9 +1072,7 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1133,9 +1109,7 @@ wordShingleSimHashUTF8(string[, shinglesize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1172,9 +1146,7 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize])
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1208,9 +1180,7 @@ wyHash64(string)
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1248,9 +1218,7 @@ ngramMinHash(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1288,9 +1256,7 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1328,9 +1294,7 @@ ngramMinHashUTF8(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1368,9 +1332,7 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1406,9 +1368,7 @@ ngramMinHashArg(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1444,9 +1404,7 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1482,9 +1440,7 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1520,9 +1476,7 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1560,9 +1514,7 @@ wordShingleMinHash(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1600,9 +1552,7 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1640,9 +1590,7 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1680,9 +1628,7 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1718,9 +1664,7 @@ wordShingleMinHashArg(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1756,9 +1700,7 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1794,9 +1736,7 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
@@ -1832,9 +1772,7 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum])
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each.
-
-Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 1025b8bdc3d..be8a2956d41 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -40,15 +40,10 @@ addressToLine(address_of_binary_instruction)
 
 **Returned value**
 
-- Source code filename and the line number in this file delimited by colon.
-
-        For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number.
-
-- Name of a binary, if the function couldn’t find the debug information.
-
-- Empty string, if the address is not valid.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Source code filename and the line number in this file delimited by colon. [String](../../sql-reference/data-types/string.md).
+    - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number.
+- Name of a binary, if the function couldn’t find the debug information. [String](../../sql-reference/data-types/string.md).
+- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -137,9 +132,7 @@ addressToLineWithInlines(address_of_binary_instruction)
 
 - Array with single element which is name of a binary, if the function couldn’t find the debug information.
 
-- Empty array, if the address is not valid.
-
-Type: [Array(String)](../../sql-reference/data-types/array.md).
+- Empty array, if the address is not valid. [Array(String)](../../sql-reference/data-types/array.md).
 
 **Example**
 
@@ -236,10 +229,8 @@ addressToSymbol(address_of_binary_instruction)
 
 **Returned value**
 
-- Symbol from ClickHouse object files.
-- Empty string, if the address is not valid.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Symbol from ClickHouse object files. [String](../../sql-reference/data-types/string.md).
+- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -333,10 +324,8 @@ demangle(symbol)
 
 **Returned value**
 
-- Name of the C++ function.
-- Empty string if a symbol is not valid.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Name of the C++ function. [String](../../sql-reference/data-types/string.md).
+- Empty string if a symbol is not valid. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index be20e02d77e..21beffbd0a8 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -151,9 +151,7 @@ IPv6StringToNum(string)
 
 **Returned value**
 
-- IPv6 address in binary format.
-
-Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
+- IPv6 address in binary format. [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
 
 **Example**
 
@@ -313,9 +311,7 @@ toIPv6(string)
 
 **Returned value**
 
-- IP address.
-
-Type: [IPv6](../../sql-reference/data-types/ipv6.md).
+- IP address. [IPv6](../../sql-reference/data-types/ipv6.md).
 
 **Examples**
 
@@ -374,9 +370,7 @@ isIPv4String(string)
 
 **Returned value**
 
-- `1` if `string` is IPv4 address, `0` otherwise.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -412,9 +406,7 @@ isIPv6String(string)
 
 **Returned value**
 
-- `1` if `string` is IPv6 address, `0` otherwise.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -454,9 +446,7 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as
 
 **Returned value**
 
-- `1` or `0`.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` or `0`. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md
index e920ab82988..fa02dca07db 100644
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@@ -533,9 +533,7 @@ JSONExtractKeys(json[, a, b, c...])
 
 **Returned value**
 
-Array with the keys of the JSON.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+Array with the keys of the JSON. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 **Example**
 
@@ -595,10 +593,8 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h])
 
 **Returned values**
 
-- Array with `('key', 'value')` tuples. Both tuple members are strings.
-- Empty array if the requested object does not exist, or input JSON is invalid.
-
-Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
+- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
+- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
 
 **Examples**
 
@@ -739,9 +735,7 @@ toJSONString(value)
 
 **Returned value**
 
-- JSON representation of the value.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- JSON representation of the value. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -786,9 +780,7 @@ Alias: `JSON_ARRAY_LENGTH(json)`.
 
 **Returned value**
 
-- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL.
-
-Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md).
+- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -819,9 +811,7 @@ jsonMergePatch(json1, json2, ...)
 
 **Returned value**
 
-- If JSON object strings are valid, return the merged JSON object string.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- If JSON object strings are valid, return the merged JSON object string. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 945166056af..eb0de410f28 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -842,9 +842,7 @@ degrees(x)
 
 **Returned value**
 
-- Value in degrees.
-
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+- Value in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 11ee471d709..2b4f888d06f 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -27,9 +27,7 @@ getMacro(name);
 
 **Returned value**
 
-- Value of the specified macro.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Value of the specified macro. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -82,9 +80,7 @@ This function is case-insensitive.
 
 **Returned value**
 
-- String with the fully qualified domain name.
-
-Type: `String`.
+- String with the fully qualified domain name. [String](../data-types/string.md). 
 
 **Example**
 
@@ -207,9 +203,7 @@ byteSize(argument [, ...])
 
 **Returned value**
 
-- Estimation of byte size of the arguments in memory.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -409,10 +403,8 @@ Aliases: `user()`, `USER()`, `current_user()`. Aliases are case insensitive.
 
 **Returned values**
 
-- The name of the current user.
-- In distributed queries, the login of the user who initiated the query.
-
-Type: `String`.
+- The name of the current user. [String](../data-types/string.md).
+- In distributed queries, the login of the user who initiated the query. [String](../data-types/string.md).
 
 **Example**
 
@@ -448,10 +440,8 @@ isConstant(x)
 
 **Returned values**
 
-- `1` if `x` is constant.
-- `0` if `x` is non-constant.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -517,8 +507,8 @@ ifNotFinite(x,y)
 
 **Arguments**
 
-- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md).
-- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md).
+- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md).
+- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md).
 
 **Returned value**
 
@@ -924,9 +914,7 @@ uptime()
 
 **Returned value**
 
-- Time value of seconds.
-
-Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -971,7 +959,7 @@ None.
 
 **Returned value**
 
-Type: [String](../data-types/string)
+- Current version of ClickHouse. [String](../data-types/string).
 
 **Implementation details**
 
@@ -1041,7 +1029,9 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st
 - Value of `column` with `offset` distance from current row, if `offset` is not outside the block boundaries.
 - The default value of `column` or `default_value` (if given), if `offset` is outside the block boundaries.
 
-Type: type of data blocks affected or default value type.
+:::note
+The return type will be that of the data blocks affected or the default value type.
+:::
 
 **Example**
 
@@ -1238,9 +1228,7 @@ runningConcurrency(start, end)
 
 **Returned values**
 
-- The number of concurrent events at each event start time.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md)
+- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md)
 
 **Example**
 
@@ -1535,7 +1523,7 @@ SELECT * FROM table WHERE indexHint(<expression>)
 
 **Returned value**
 
-Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint).
+- `1`. [Uint8](../data-types/int-uint.md).
 
 **Example**
 
@@ -1638,9 +1626,7 @@ SELECT replicate(x, arr);
 
 **Returned value**
 
-An array of the lame length as `arr` filled with value `x`.
-
-Type: `Array`.
+An array of the lame length as `arr` filled with value `x`. [Array](../data-types/array.md).
 
 **Example**
 
@@ -1670,9 +1656,7 @@ filesystemAvailable()
 
 **Returned value**
 
-- The amount of remaining space available in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1702,9 +1686,7 @@ filesystemFree()
 
 **Returned value**
 
-- The amount of free space in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1734,9 +1716,7 @@ filesystemCapacity()
 
 **Returned value**
 
-- Capacity of the filesystem in bytes.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1847,7 +1827,9 @@ finalizeAggregation(state)
 
 - Value/values that was aggregated.
 
-Type: Value of any types that was aggregated.
+:::note
+The return type is equal to that of any types which were aggregated.
+:::
 
 **Examples**
 
@@ -2284,9 +2266,7 @@ countDigits(x)
 
 **Returned value**
 
-Number of digits.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
+Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
 
 :::note
 For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
@@ -2310,9 +2290,7 @@ Result:
 
 ## errorCodeToName
 
-Returns the textual name of an error code.
-
-Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md).
+Returns the textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md).
 
 **Syntax**
 
@@ -2343,9 +2321,7 @@ tcpPort()
 
 **Returned value**
 
-- The TCP port number.
-
-Type: [UInt16](../../sql-reference/data-types/int-uint.md).
+- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2381,9 +2357,7 @@ currentProfiles()
 
 **Returned value**
 
-- List of the current user settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## enabledProfiles
 
@@ -2397,9 +2371,7 @@ enabledProfiles()
 
 **Returned value**
 
-- List of the enabled settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## defaultProfiles
 
@@ -2413,9 +2385,7 @@ defaultProfiles()
 
 **Returned value**
 
-- List of the default settings profiles.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## currentRoles
 
@@ -2429,9 +2399,7 @@ currentRoles()
 
 **Returned value**
 
-- A list of the current roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## enabledRoles
 
@@ -2445,9 +2413,7 @@ enabledRoles()
 
 **Returned value**
 
-- List of the enabled roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## defaultRoles
 
@@ -2461,9 +2427,7 @@ defaultRoles()
 
 **Returned value**
 
-- List of the default roles for the current user.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 ## getServerPort
 
@@ -2492,9 +2456,7 @@ getServerPort(port_name)
 
 **Returned value**
 
-- The number of the server port.
-
-Type: [UInt16](../../sql-reference/data-types/int-uint.md).
+- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2526,9 +2488,7 @@ queryID()
 
 **Returned value**
 
-- The ID of the current query.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- The ID of the current query. [String](../../sql-reference/data-types/string.md)
 
 **Example**
 
@@ -2562,9 +2522,7 @@ initialQueryID()
 
 **Returned value**
 
-- The ID of the initial current query.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- The ID of the initial current query. [String](../../sql-reference/data-types/string.md)
 
 **Example**
 
@@ -2597,9 +2555,7 @@ shardNum()
 
 **Returned value**
 
-- Shard index or constant `0`.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2639,9 +2595,7 @@ shardCount()
 
 **Returned value**
 
-- Total number of shards or `0`.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **See Also**
 
@@ -2663,9 +2617,7 @@ getOSKernelVersion()
 
 **Returned value**
 
-- The current OS kernel version.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- The current OS kernel version. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -2699,9 +2651,7 @@ zookeeperSessionUptime()
 
 **Returned value**
 
-- Uptime of the current ZooKeeper session in seconds.
-
-Type: [UInt32](../../sql-reference/data-types/int-uint.md).
+- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -2738,9 +2688,7 @@ All arguments must be constant.
 
 **Returned value**
 
-- Randomly generated table structure.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Randomly generated table structure. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -2807,9 +2755,7 @@ structureToCapnProtoSchema(structure)
 
 **Returned value**
 
-- CapnProto schema
-
-Type: [String](../../sql-reference/data-types/string.md).
+- CapnProto schema. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
@@ -2908,9 +2854,7 @@ structureToProtobufSchema(structure)
 
 **Returned value**
 
-- Protobuf schema
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Protobuf schema. [String](../../sql-reference/data-types/string.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index 2d7752ed022..a7866c6d12e 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -204,9 +204,7 @@ randNormal(mean, variance)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -243,9 +241,7 @@ randLogNormal(mean, variance)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -282,9 +278,7 @@ randBinomial(experiments, probability)
 
 **Returned value**
 
-- Random number.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -321,9 +315,7 @@ randNegativeBinomial(experiments, probability)
 
 **Returned value**
 
-- Random number.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -359,9 +351,7 @@ randPoisson(n)
 
 **Returned value**
 
-- Random number.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -397,9 +387,7 @@ randBernoulli(probability)
 
 **Returned value**
 
-- Random number.
-
-Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -435,9 +423,7 @@ randExponential(lambda)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -473,9 +459,7 @@ randChiSquared(degree_of_freedom)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -511,9 +495,7 @@ randStudentT(degree_of_freedom)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -550,9 +532,7 @@ randFisherF(d1, d2)
 
 **Returned value**
 
-- Random number.
-
-Type: [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
 
 **Example**
 
@@ -588,9 +568,7 @@ randomString(length)
 
 **Returned value**
 
-- String filled with random bytes.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- String filled with random bytes. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -630,9 +608,7 @@ randomFixedString(length);
 
 **Returned value(s)**
 
-- String filled with random bytes.
-
-Type: [FixedString](../../sql-reference/data-types/fixedstring.md).
+- String filled with random bytes. [FixedString](../../sql-reference/data-types/fixedstring.md).
 
 **Example**
 
@@ -667,9 +643,7 @@ randomPrintableASCII(length)
 
 **Returned value**
 
-- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters.
-
-Type: [String](../../sql-reference/data-types/string.md)
+- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../../sql-reference/data-types/string.md)
 
 **Example**
 
@@ -701,9 +675,7 @@ randomStringUTF8(length);
 
 **Returned value(s)**
 
-- UTF-8 random string.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- UTF-8 random string. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index afec43cd6f4..6cbcc4e4ef3 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -336,7 +336,7 @@ roundAge(num)
 - Returns `45`, for $45 \leq age \leq 54$.
 - Returns `55`, for $age \geq 55$.
 
-Type: [UInt8](../data-types/int-uint.md).
+Type: [UInt8](../data-types/int-uint.md) in all cases.
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 8e50637cf30..77563713605 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -25,13 +25,15 @@ splitByChar(separator, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. Empty substrings may be selected when:
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+:::note
+ Empty substrings may be selected when:
 
 - A separator occurs at the beginning or end of the string;
 - There are multiple consecutive separators;
 - The original string `s` is empty.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+:::
 
 :::note
 The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list.
@@ -76,15 +78,17 @@ splitByString(separator, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. Empty substrings may be selected when:
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+:::note
+Empty substrings may be selected when:
 
 - A non-empty separator occurs at the beginning or end of the string;
 - There are multiple consecutive non-empty separators;
 - The original string `s` is empty while the separator is not empty.
 
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
+:::
 
 **Example**
 
@@ -131,15 +135,17 @@ splitByRegexp(regexp, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. Empty substrings may be selected when:
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
+:::note
+Empty substrings may be selected when:
 
 - A non-empty regular expression match occurs at the beginning or end of the string;
 - There are multiple consecutive non-empty regular expression matches;
 - The original string `s` is empty while the regular expression is not empty.
 
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
-
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
+:::
 
 **Example**
 
@@ -186,11 +192,11 @@ splitByWhitespace(s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
-
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+ 
+:::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
+:::
 
 **Example**
 
@@ -225,11 +231,11 @@ splitByNonAlpha(s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+:::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
+:::
 
 **Example**
 
@@ -287,11 +293,11 @@ Alias: `splitByAlpha`
 
 **Returned value(s)**
 
-Returns an array of selected substrings.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
+:::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
+:::
 
 **Example**
 
@@ -322,11 +328,8 @@ extractAllGroups(text, regexp)
 
 **Returned values**
 
-- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`).
-
-- If there is no matching group, returns an empty array.
-
-Type: [Array](../data-types/array.md).
+- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). [Array](../data-types/array.md).
+- If there is no matching group, returns an empty array. [Array](../data-types/array.md).
 
 **Example**
 
@@ -359,9 +362,7 @@ ngrams(string, ngramsize)
 
 **Returned values**
 
-- Array with n-grams.
-
-Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- Array with n-grams. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 **Example**
 
@@ -387,9 +388,7 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator
 
 **Returned value**
 
-- The resulting array of tokens from input string.
-
-Type: [Array](../data-types/array.md).
+- The resulting array of tokens from input string. [Array](../data-types/array.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index ba23870a584..f45ceb99617 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -30,9 +30,7 @@ empty(x)
 
 **Returned value**
 
-- Returns `1` for an empty string or `0` for a non-empty string.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for an empty string or `0` for a non-empty string. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -68,9 +66,7 @@ notEmpty(x)
 
 **Returned value**
 
-- Returns `1` for a non-empty string or `0` for an empty string string.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for a non-empty string or `0` for an empty string string. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -289,9 +285,7 @@ Alias: `LPAD`
 
 **Returned value**
 
-- A left-padded string of the given length.
-
-Type: [String](../data-types/string.md).
+- A left-padded string of the given length. [String](../data-types/string.md).
 
 **Example**
 
@@ -325,9 +319,7 @@ leftPadUTF8(string, length[, pad_string])
 
 **Returned value**
 
-- A left-padded string of the given length.
-
-Type: [String](../data-types/string.md).
+- A left-padded string of the given length. [String](../data-types/string.md).
 
 **Example**
 
@@ -457,9 +449,7 @@ Alias: `RPAD`
 
 **Returned value**
 
-- A left-padded string of the given length.
-
-Type: [String](../data-types/string.md).
+- A left-padded string of the given length. [String](../data-types/string.md).
 
 **Example**
 
@@ -493,9 +483,7 @@ rightPadUTF8(string, length[, pad_string])
 
 **Returned value**
 
-- A right-padded string of the given length.
-
-Type: [String](../data-types/string.md).
+- A right-padded string of the given length. [String](../data-types/string.md).
 
 **Example**
 
@@ -676,9 +664,7 @@ Alias: `REPEAT`
 
 **Returned value**
 
-A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string.
-
-Type: `String`.
+A string containing string `s` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md).
 
 **Example**
 
@@ -712,9 +698,7 @@ Alias: `SPACE`.
 
 **Returned value**
 
-The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string.
-
-Type: `String`.
+The string containing string ` ` repeated `n` times. If `n` <= 0, the function returns the empty string. [String](../data-types/string.md).
 
 **Example**
 
@@ -913,9 +897,7 @@ Alias:
 
 **Returned value**
 
-A substring of `s` with `length` many bytes, starting at index `offset`.
-
-Type: `String`.
+A substring of `s` with `length` many bytes, starting at index `offset`. [String](../data-types/string.md).
 
 **Example**
 
@@ -1072,9 +1054,7 @@ base58Encode(plaintext)
 
 **Returned value**
 
-- A string containing the encoded value of the argument.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A string containing the encoded value of the argument. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1106,9 +1086,7 @@ base58Decode(encoded)
 
 **Returned value**
 
-- A string containing the decoded value of the argument.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- A string containing the decoded value of the argument. [String](../data-types/string.md).
 
 **Example**
 
@@ -1284,9 +1262,7 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
 
 **Returned value**
 
-A string without leading and/or trailing specified characters.
-
-Type: `String`.
+A string without leading and/or trailing specified characters. [String](../data-types/string.md).
 
 **Example**
 
@@ -1320,9 +1296,7 @@ Alias: `ltrim(input_string)`.
 
 **Returned value**
 
-A string without leading common whitespaces.
-
-Type: `String`.
+A string without leading common whitespaces. [String](../data-types/string.md).
 
 **Example**
 
@@ -1356,9 +1330,7 @@ Alias: `rtrim(input_string)`.
 
 **Returned value**
 
-A string without trailing common whitespaces.
-
-Type: `String`.
+A string without trailing common whitespaces. [String](../data-types/string.md).
 
 **Example**
 
@@ -1392,9 +1364,7 @@ Alias: `trim(input_string)`.
 
 **Returned value**
 
-A string without leading and trailing common whitespaces.
-
-Type: `String`.
+A string without leading and trailing common whitespaces. [String](../data-types/string.md).
 
 **Example**
 
@@ -1444,9 +1414,7 @@ normalizeQuery(x)
 
 **Returned value**
 
-- Sequence of characters with placeholders.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Sequence of characters with placeholders. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1478,9 +1446,7 @@ normalizedQueryHash(x)
 
 **Returned value**
 
-- Hash value.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- Hash value. [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
 
 **Example**
 
@@ -1512,9 +1478,7 @@ normalizeUTF8NFC(words)
 
 **Returned value**
 
-- String transformed to NFC normalization form.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- String transformed to NFC normalization form. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1546,9 +1510,7 @@ normalizeUTF8NFD(words)
 
 **Returned value**
 
-- String transformed to NFD normalization form.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- String transformed to NFD normalization form. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1580,9 +1542,7 @@ normalizeUTF8NFKC(words)
 
 **Returned value**
 
-- String transformed to NFKC normalization form.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- String transformed to NFKC normalization form. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1614,9 +1574,7 @@ normalizeUTF8NFKD(words)
 
 **Returned value**
 
-- String transformed to NFKD normalization form.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- String transformed to NFKD normalization form. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1651,9 +1609,7 @@ encodeXMLComponent(x)
 
 **Returned value**
 
-- The escaped string.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- The escaped string. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1691,9 +1647,7 @@ decodeXMLComponent(x)
 
 **Returned value**
 
-- The un-escaped string.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- The un-escaped string. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1727,9 +1681,7 @@ decodeHTMLComponent(x)
 
 **Returned value**
 
-- The un-escaped string.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- The un-escaped string. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -1782,9 +1734,7 @@ extractTextFromHTML(x)
 
 **Returned value**
 
-- Extracted text.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Extracted text. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 9738c19bf3c..327eb8994db 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -896,14 +896,16 @@ extractAllGroupsHorizontal(haystack, pattern)
 
 **Arguments**
 
-- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
-- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
+- `haystack` — Input string. [String](../../sql-reference/data-types/string.md).
+- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
 
-- Type: [Array](../../sql-reference/data-types/array.md).
+- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md).
 
+:::note
 If `haystack` does not match the `pattern` regex, an array of empty arrays is returned.
+:::
 
 **Example**
 
@@ -931,14 +933,16 @@ extractAllGroupsVertical(haystack, pattern)
 
 **Arguments**
 
-- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
-- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
+- `haystack` — Input string. [String](../../sql-reference/data-types/string.md).
+- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md).
 
 **Returned value**
 
-- Type: [Array](../../sql-reference/data-types/array.md).
+- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md).
 
+:::note
 If `haystack` does not match the `pattern` regex, an empty array is returned.
+:::
 
 **Example**
 
@@ -1340,9 +1344,7 @@ countSubstrings(haystack, needle[, start_pos])
 
 **Returned values**
 
-- The number of occurrences.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -1389,9 +1391,7 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos])
 
 **Returned values**
 
-- The number of occurrences.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -1443,9 +1443,7 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
 
 **Returned values**
 
-- The number of occurrences.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -1496,9 +1494,7 @@ countMatches(haystack, pattern)
 
 **Returned value**
 
-- The number of matches.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -1543,9 +1539,7 @@ countMatchesCaseInsensitive(haystack, pattern)
 
 **Returned value**
 
-- The number of matches.
-
-Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 **Examples**
 
@@ -1583,9 +1577,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
 
 **Returned values**
 
-`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression.
-
-Type: `String`.
+`pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md).
 
 **Examples**
 
@@ -1624,10 +1616,8 @@ hasSubsequence(haystack, needle)
 
 **Returned values**
 
-- 1, if needle is a subsequence of haystack.
-- 0, otherwise.
-
-Type: `UInt8`.
+- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
+- 0, otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1662,10 +1652,8 @@ hasSubsequenceCaseInsensitive(haystack, needle)
 
 **Returned values**
 
-- 1, if needle is a subsequence of haystack.
-- 0, otherwise.
-
-Type: `UInt8`.
+- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
+- 0, otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1700,10 +1688,8 @@ hasSubsequenceUTF8(haystack, needle)
 
 **Returned values**
 
-- 1, if needle is a subsequence of haystack.
-- 0, otherwise.
-
-Type: `UInt8`.
+- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
+- 0, otherwise. [UInt8](../data-types/int-uint.md).
 
 Query:
 
@@ -1738,10 +1724,8 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
 
 **Returned values**
 
-- 1, if needle is a subsequence of haystack.
-- 0, otherwise.
-
-Type: `UInt8`.
+- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
+- 0, otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index e80a3fa9860..beb7a0503b9 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -30,9 +30,7 @@ At least four data points are required in `series` to detect outliers.
 
 **Returned value**
 
-- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
@@ -81,10 +79,8 @@ seriesPeriodDetectFFT(series);
 
 **Returned value**
 
-- A real value equal to the period of series data
-- Returns NAN when number of data points are less than four.
-
-Type: [Float64](../../sql-reference/data-types/float.md).
+- A real value equal to the period of series data. [Float64](../../sql-reference/data-types/float.md).
+- Returns NAN when number of data points are less than four. [nan](../../sql-reference/data-types/float.md/#nan-and-inf).
 
 **Examples**
 
@@ -134,9 +130,7 @@ The number of data points in `series` should be at least twice the value of `per
 **Returned value**
 
 - An array of four arrays where the first array include seasonal components, the second array - trend,
-the third array - residue component, and the fourth array - baseline(seasonal + trend) component.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../../sql-reference/data-types/array.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md
index d8f23c92e61..2b5f093c149 100644
--- a/docs/en/sql-reference/functions/time-window-functions.md
+++ b/docs/en/sql-reference/functions/time-window-functions.md
@@ -23,9 +23,7 @@ tumble(time_attr, interval [, timezone])
 
 **Returned values**
 
-- The inclusive lower and exclusive upper bound of the corresponding tumbling window.
-
-Type: `Tuple(DateTime, DateTime)`
+- The inclusive lower and exclusive upper bound of the corresponding tumbling window. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
 
 **Example**
 
@@ -60,9 +58,7 @@ hop(time_attr, hop_interval, window_interval [, timezone])
 
 **Returned values**
 
-- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`.
-
-Type: `Tuple(DateTime, DateTime)`
+- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. [Tuple](../data-types/tuple.md)([DateTime](../data-types/datetime.md), [DateTime](../data-types/datetime.md))`.
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index 64b1732597f..cfedc01ce8f 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -134,7 +134,9 @@ Tuples should have the same type of the elements.
 
 - The Hamming distance.
 
-Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
+:::note
+The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
+:::
 
 ``` sql
 SELECT
@@ -200,9 +202,7 @@ tupleToNameValuePairs(tuple)
 
 **Returned value**
 
-- An array with (name, value) pairs.
-
-Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
+- An array with (name, value) pairs. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
 
 **Example**
 
@@ -278,9 +278,7 @@ Alias: `vectorSum`.
 
 **Returned value**
 
-- Tuple with the sum.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the sum. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -317,9 +315,7 @@ Alias: `vectorDifference`.
 
 **Returned value**
 
-- Tuple with the result of subtraction.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of subtraction. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -354,9 +350,7 @@ tupleMultiply(tuple1, tuple2)
 
 **Returned value**
 
-- Tuple with the multiplication.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the multiplication. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -391,9 +385,7 @@ tupleDivide(tuple1, tuple2)
 
 **Returned value**
 
-- Tuple with the result of division.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of division. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -427,9 +419,7 @@ tupleNegate(tuple)
 
 **Returned value**
 
-- Tuple with the result of negation.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of negation. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -464,9 +454,7 @@ tupleMultiplyByNumber(tuple, number)
 
 **Returned value**
 
-- Tuple with multiplied values.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with multiplied values. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
@@ -501,9 +489,7 @@ tupleDivideByNumber(tuple, number)
 
 **Returned value**
 
-- Tuple with divided values.
-
-Type: [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with divided values. [Tuple](../../sql-reference/data-types/tuple.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index 377283bc006..9468228c737 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -21,9 +21,7 @@ map(key1, value1[, key2, value2, ...])
 
 **Returned value**
 
-- Data structure as `key:value` pairs.
-
-Type: [Map(key, value)](../../sql-reference/data-types/map.md).
+- Data structure as `key:value` pairs. [Map(key, value)](../../sql-reference/data-types/map.md).
 
 **Examples**
 
@@ -387,9 +385,7 @@ mapContains(map, key)
 
 **Returned value**
 
-- `1` if `map` contains `key`, `0` if not.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `map` contains `key`, `0` if not. [UInt8](../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -431,9 +427,7 @@ mapKeys(map)
 
 **Returned value**
 
-- Array containing all keys from the `map`.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- Array containing all keys from the `map`. [Array](../../sql-reference/data-types/array.md).
 
 **Example**
 
@@ -474,9 +468,7 @@ mapValues(map)
 
 **Returned value**
 
-- Array containing all the values from `map`.
-
-Type: [Array](../../sql-reference/data-types/array.md).
+- Array containing all the values from `map`. [Array](../../sql-reference/data-types/array.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index ea08ffa50e7..f1c2e92f201 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -631,9 +631,7 @@ toDateTime64(expr, scale, [timezone])
 
 **Returned value**
 
-- A calendar date and time of day, with sub-second precision.
-
-Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
+- A calendar date and time of day, with sub-second precision. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
 
 **Example**
 
@@ -1749,9 +1747,7 @@ toLowCardinality(expr)
 
 **Returned values**
 
-- Result of `expr`.
-
-Type: `LowCardinality(expr_result_type)`
+- Result of `expr`. [LowCardinality](../data-types/lowcardinality.md) of the type of `expr`. 
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md
index eb69b1779ae..b4e3fc2d164 100644
--- a/docs/en/sql-reference/functions/ulid-functions.md
+++ b/docs/en/sql-reference/functions/ulid-functions.md
@@ -65,9 +65,7 @@ ULIDStringToDateTime(ulid[, timezone])
 
 **Returned value**
 
-- Timestamp with milliseconds precision.
-
-Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
+- Timestamp with milliseconds precision. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
 
 **Usage example**
 
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index a0b0170721c..52eeb539ef4 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -28,7 +28,7 @@ domain(url)
 
 **Arguments**
 
-- `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
+- `url` — URL. [String](../../sql-reference/data-types/string.md).
 
 The URL can be specified with or without a scheme. Examples:
 
@@ -48,10 +48,8 @@ clickhouse.com
 
 **Returned values**
 
-- Host name. If ClickHouse can parse the input string as a URL.
-- Empty string. If ClickHouse can’t parse the input string as a URL.
-
-Type: `String`.
+- Host name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md).
+- Empty string. If ClickHouse can’t parse the input string as a URL. [String](../data-types/string.md).
 
 **Example**
 
@@ -79,7 +77,7 @@ topLevelDomain(url)
 
 **Arguments**
 
-- `url` — URL. Type: [String](../../sql-reference/data-types/string.md).
+- `url` — URL. [String](../../sql-reference/data-types/string.md).
 
 The URL can be specified with or without a scheme. Examples:
 
@@ -91,10 +89,8 @@ https://clickhouse.com/time/
 
 **Returned values**
 
-- Domain name. If ClickHouse can parse the input string as a URL.
-- Empty string. If ClickHouse cannot parse the input string as a URL.
-
-Type: `String`.
+- Domain name. If ClickHouse can parse the input string as a URL. [String](../../sql-reference/data-types/string.md).
+- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -162,9 +158,7 @@ cutToFirstSignificantSubdomain(URL, TLD)
 
 **Returned value**
 
-- Part of the domain that includes top-level subdomains up to the first significant subdomain.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -216,9 +210,7 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
 
 **Returned value**
 
-- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -270,9 +262,7 @@ firstSignificantSubdomainCustom(URL, TLD)
 
 **Returned value**
 
-- First significant subdomain.
-
-Type: [String](../../sql-reference/data-types/string.md).
+- First significant subdomain. [String](../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -422,9 +412,7 @@ netloc(URL)
 
 **Returned value**
 
-- `username:password@host:port`.
-
-Type: `String`.
+- `username:password@host:port`. [String](../data-types/string.md).
 
 **Example**
 
@@ -479,9 +467,7 @@ cutURLParameter(URL, name)
 
 **Returned value**
 
-- URL with `name` URL parameter removed.
-
-Type: `String`.
+- URL with `name` URL parameter removed. [String](../data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index d1b833c2439..0c1da88913d 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -289,9 +289,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str
 
 **Returned value**
 
-- Returns `1` for an empty UUID or `0` for a non-empty UUID.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for an empty UUID or `0` for a non-empty UUID. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -331,9 +329,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S
 
 **Returned value**
 
-- Returns `1` for a non-empty UUID or `0` for an empty UUID.
-
-Type: [UInt8](../data-types/int-uint.md).
+- Returns `1` for a non-empty UUID or `0` for an empty UUID. [UInt8](../data-types/int-uint.md).
 
 **Example**
 

From 508b0356543fc3a49e069166093147b3089ed29a Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 14:08:48 +0000
Subject: [PATCH 284/392] Move is NaN from other-functions to arithmetic
 functions

---
 .../en/sql-reference/functions/arithmetic-functions.md | 10 ++++++++++
 docs/en/sql-reference/functions/other-functions.md     | 10 ----------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 8b8527acfdf..7b079152907 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -194,6 +194,16 @@ Result:
 
 You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`.
 
+## isNaN
+
+Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
+
+**Syntax**
+
+```sql
+isNaN(x)
+```
+
 ## modulo
 
 Calculates the remainder of the division of two values `a` by `b`.
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 79c0148d704..c16e8af1ef0 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -541,16 +541,6 @@ Result:
 └────────────────────┘
 ```
 
-## isNaN
-
-Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0.
-
-**Syntax**
-
-```sql
-isNaN(x)
-```
-
 ## hasColumnInTable
 
 Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0.

From 8df4da5efaa014f7866288e1aac799f40f52a8c2 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 23 May 2024 14:21:38 +0000
Subject: [PATCH 285/392] Print query in explain plan with parallel replicas

---
 src/Interpreters/ClusterProxy/executeQuery.cpp | 4 ++++
 src/Processors/QueryPlan/ReadFromRemote.cpp    | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index 4bbda982f5b..13e6fa87051 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -403,6 +403,10 @@ void executeQueryWithParallelReplicas(
     ContextPtr context,
     std::shared_ptr<const StorageLimitsList> storage_limits)
 {
+    auto logger = getLogger("executeQueryWithParallelReplicas");
+    LOG_DEBUG(logger, "Executing read from {}, header {}, query ({}), stage {} with parallel replicas",
+        storage_id.getNameForLogs(), header.dumpStructure(), query_ast->formatForLogging(), processed_stage);
+
     const auto & settings = context->getSettingsRef();
 
     /// check cluster for parallel replicas
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index b4e35af85d6..84c2515e8ca 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -386,6 +386,8 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
     chassert(cluster->getShardCount() == 1);
 
     std::vector<String> description;
+    description.push_back(fmt::format("query: {}", formattedAST(query_ast)));
+
     for (const auto & pool : cluster->getShardsInfo().front().per_replica_pools)
         description.push_back(fmt::format("Replica: {}", pool->getHost()));
 

From 71ce01404ddb4bf26f88d910452e70bb4a27a842 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 23 May 2024 16:34:52 +0200
Subject: [PATCH 286/392] Fix validation

---
 src/Analyzer/ValidationUtils.cpp          |  3 +++
 src/Planner/PlannerExpressionAnalysis.cpp | 24 ++++-------------------
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp
index 9e977964755..59157838edf 100644
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@@ -276,6 +276,9 @@ void validateAggregates(const QueryTreeNodePtr & query_node, AggregatesValidatio
         if (query_node_typed.hasOrderBy())
             validate_group_by_columns_visitor.visit(query_node_typed.getOrderByNode());
 
+        if (query_node_typed.hasInterpolate())
+            validate_group_by_columns_visitor.visit(query_node_typed.getInterpolate());
+
         validate_group_by_columns_visitor.visit(query_node_typed.getProjectionNode());
     }
 
diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 399bbfc67cf..1cdff0a26aa 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -441,30 +441,20 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
         auto & interpolate_list_node = query_node.getInterpolate()->as<ListNode &>();
 
         PlannerActionsVisitor interpolate_actions_visitor(planner_context);
-        auto interpolate_expression_dag = std::make_shared<ActionsDAG>();
+        auto interpolate_actions_dag = std::make_shared<ActionsDAG>();
 
         for (auto & interpolate_node : interpolate_list_node.getNodes())
         {
             auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
-            interpolate_actions_visitor.visit(interpolate_expression_dag, interpolate_node_typed.getInterpolateExpression());
+            interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
+            interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
         }
 
         std::unordered_map<std::string_view, const ActionsDAG::Node *> before_sort_actions_inputs_name_to_node;
         for (const auto & node : before_sort_actions->getInputs())
             before_sort_actions_inputs_name_to_node.emplace(node->result_name, node);
 
-        std::unordered_set<std::string_view> aggregation_keys;
-
-        auto projection_expression_dag = std::make_shared<ActionsDAG>();
-        for (const auto & node : query_node.getProjection())
-            actions_visitor.visit(projection_expression_dag, node);
-        for (const auto & node : projection_expression_dag->getNodes())
-            aggregation_keys.insert(node.result_name);
-
-        if (aggregation_analysis_result_optional)
-            aggregation_keys.insert(aggregation_analysis_result_optional->aggregation_keys.begin(), aggregation_analysis_result_optional->aggregation_keys.end());
-
-        for (const auto & node : interpolate_expression_dag->getNodes())
+        for (const auto & node : interpolate_actions_dag->getNodes())
         {
             if (before_sort_actions_dag_output_node_names.contains(node.result_name) ||
                 node.type != ActionsDAG::ActionType::INPUT)
@@ -479,12 +469,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
                 input_node_it = it;
             }
 
-            if (aggregation_analysis_result_optional)
-                if (!aggregation_keys.contains(node.result_name))
-                    throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
-                        "Column {} is not under aggregate function and not in GROUP BY keys. In query {}",
-                        node.result_name, query_node.formatASTForErrorMessage());
-
             before_sort_actions_outputs.push_back(input_node_it->second);
             before_sort_actions_dag_output_node_names.insert(node.result_name);
         }

From 21f831da0d823b9f00b02100bedb847d7af6720e Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 23 May 2024 16:36:11 +0200
Subject: [PATCH 287/392] Remove unneeded changes

---
 src/Planner/PlannerExpressionAnalysis.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 1cdff0a26aa..6e194b2c03e 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -28,7 +28,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int NOT_AN_AGGREGATE;
 }
 
 namespace
@@ -398,8 +397,7 @@ ProjectionAnalysisResult analyzeProjection(const QueryNode & query_node,
 SortAnalysisResult analyzeSort(const QueryNode & query_node,
     const ColumnsWithTypeAndName & input_columns,
     const PlannerContextPtr & planner_context,
-    ActionsChain & actions_chain,
-    std::optional<AggregationAnalysisResult> aggregation_analysis_result_optional)
+    ActionsChain & actions_chain)
 {
     ActionsDAGPtr before_sort_actions = std::make_shared<ActionsDAG>(input_columns);
     auto & before_sort_actions_outputs = before_sort_actions->getOutputs();
@@ -570,7 +568,7 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
     std::optional<SortAnalysisResult> sort_analysis_result_optional;
     if (query_node.hasOrderBy())
     {
-        sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain, aggregation_analysis_result_optional);
+        sort_analysis_result_optional = analyzeSort(query_node, current_output_columns, planner_context, actions_chain);
         current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
     }
 

From 47578772e4558ec044b676e13f5be6ae89d6c49f Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 May 2024 16:39:16 +0200
Subject: [PATCH 288/392] Fix hdfs assertion

---
 .../ObjectStorage/Azure/Configuration.h       |  2 +-
 .../ObjectStorage/HDFS/Configuration.h        |  2 +-
 .../ObjectStorage/ReadBufferIterator.cpp      |  6 ++---
 .../ObjectStorage/S3/Configuration.cpp        |  2 +-
 src/Storages/ObjectStorage/S3/Configuration.h |  2 +-
 .../ObjectStorage/StorageObjectStorage.h      |  2 +-
 .../StorageObjectStorageSource.cpp            | 23 +++++++++++++++----
 .../StorageObjectStorageSource.h              |  6 +++++
 8 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h
index 19b9cf56f93..35b19079ca9 100644
--- a/src/Storages/ObjectStorage/Azure/Configuration.h
+++ b/src/Storages/ObjectStorage/Azure/Configuration.h
@@ -36,7 +36,7 @@ public:
     void setPaths(const Paths & paths) override { blobs_paths = paths; }
 
     String getNamespace() const override { return container; }
-    String getDataSourceDescription() override { return std::filesystem::path(connection_url) / container; }
+    String getDataSourceDescription() const override { return std::filesystem::path(connection_url) / container; }
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h
index dc06e754c44..01a8b9c5e3b 100644
--- a/src/Storages/ObjectStorage/HDFS/Configuration.h
+++ b/src/Storages/ObjectStorage/HDFS/Configuration.h
@@ -31,7 +31,7 @@ public:
     std::string getPathWithoutGlobs() const override;
 
     String getNamespace() const override { return ""; }
-    String getDataSourceDescription() override { return url; }
+    String getDataSourceDescription() const override { return url; }
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     void check(ContextPtr context) const override;
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 50d69129883..5e89a0a1b9d 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -37,8 +37,7 @@ ReadBufferIterator::ReadBufferIterator(
 
 SchemaCache::Key ReadBufferIterator::getKeyForSchemaCache(const ObjectInfo & object_info, const String & format_name) const
 {
-    chassert(!object_info.getPath().starts_with("/"));
-    auto source = std::filesystem::path(configuration->getDataSourceDescription()) / object_info.getPath();
+    auto source = StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, object_info);
     return DB::getKeyForSchemaCache(source, format_name, format_settings, getContext());
 }
 
@@ -51,8 +50,7 @@ SchemaCache::Keys ReadBufferIterator::getKeysForSchemaCache() const
         std::back_inserter(sources),
         [&](const auto & elem)
         {
-            chassert(!elem->getPath().starts_with("/"));
-            return std::filesystem::path(configuration->getDataSourceDescription()) / elem->getPath();
+            return StorageObjectStorageSource::getUniqueStoragePathIdentifier(*configuration, *elem);
         });
     return DB::getKeysForSchemaCache(sources, *format, format_settings, getContext());
 }
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 00d569fea9f..6b6cde0c431 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -50,7 +50,7 @@ static const std::unordered_set<std::string_view> optional_configuration_keys =
     "no_sign_request"
 };
 
-String StorageS3Configuration::getDataSourceDescription()
+String StorageS3Configuration::getDataSourceDescription() const
 {
     return std::filesystem::path(url.uri.getHost() + std::to_string(url.uri.getPort())) / url.bucket;
 }
diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h
index de6c02d5020..906d10a1a9a 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.h
+++ b/src/Storages/ObjectStorage/S3/Configuration.h
@@ -31,7 +31,7 @@ public:
     void setPaths(const Paths & paths) override { keys = paths; }
 
     String getNamespace() const override { return url.bucket; }
-    String getDataSourceDescription() override;
+    String getDataSourceDescription() const override;
     StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const override;
 
     bool isArchive() const override { return url.archive_pattern.has_value(); }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index 7b118cb7e6b..de75af5035b 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -161,7 +161,7 @@ public:
     virtual const Paths & getPaths() const = 0;
     virtual void setPaths(const Paths & paths) = 0;
 
-    virtual String getDataSourceDescription() = 0;
+    virtual String getDataSourceDescription() const = 0;
     virtual String getNamespace() const = 0;
 
     virtual StorageObjectStorage::QuerySettings getQuerySettings(const ContextPtr &) const = 0;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index 7332574b246..b31d0f8a92e 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -82,6 +82,21 @@ void StorageObjectStorageSource::setKeyCondition(const ActionsDAGPtr & filter_ac
     setKeyConditionImpl(filter_actions_dag, context_, read_from_format_info.format_header);
 }
 
+std::string StorageObjectStorageSource::getUniqueStoragePathIdentifier(
+    const Configuration & configuration,
+    const ObjectInfo & object_info,
+    bool include_connection_info)
+{
+    auto path = object_info.getPath();
+    if (path.starts_with("/"))
+        path = path.substr(1);
+
+    if (include_connection_info)
+        return fs::path(configuration.getDataSourceDescription()) / path;
+    else
+        return fs::path(configuration.getNamespace()) / path;
+}
+
 std::shared_ptr<StorageObjectStorageSource::IIterator> StorageObjectStorageSource::createFileIterator(
     ConfigurationPtr configuration,
     ObjectStoragePtr object_storage,
@@ -183,7 +198,7 @@ Chunk StorageObjectStorageSource::generate()
             VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(
                 chunk,
                 read_from_format_info.requested_virtual_columns,
-                fs::path(configuration->getNamespace()) / reader.getObjectInfo().getPath(),
+                getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false),
                 object_info.metadata->size_bytes, &filename);
 
             return chunk;
@@ -212,7 +227,7 @@ Chunk StorageObjectStorageSource::generate()
 void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows)
 {
     const auto cache_key = getKeyForSchemaCache(
-        fs::path(configuration->getDataSourceDescription()) / object_info.getPath(),
+        getUniqueStoragePathIdentifier(*configuration, object_info),
         configuration->format,
         format_settings,
         getContext());
@@ -222,7 +237,7 @@ void StorageObjectStorageSource::addNumRowsToCache(const ObjectInfo & object_inf
 std::optional<size_t> StorageObjectStorageSource::tryGetNumRowsFromCache(const ObjectInfo & object_info)
 {
     const auto cache_key = getKeyForSchemaCache(
-        fs::path(configuration->getDataSourceDescription()) / object_info.getPath(),
+        getUniqueStoragePathIdentifier(*configuration, object_info),
         configuration->format,
         format_settings,
         getContext());
@@ -511,7 +526,7 @@ StorageObjectStorage::ObjectInfoPtr StorageObjectStorageSource::GlobIterator::ne
             for (const auto & object_info : new_batch)
             {
                 chassert(object_info);
-                paths.push_back(fs::path(configuration->getNamespace()) / object_info->getPath());
+                paths.push_back(getUniqueStoragePathIdentifier(*configuration, *object_info, false));
             }
 
             VirtualColumnUtils::filterByPathOrFile(new_batch, paths, filter_dag, virtual_columns, getContext());
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index e9635ff4dce..fd7c7aa7102 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -17,6 +17,7 @@ class StorageObjectStorageSource : public SourceWithKeyCondition, WithContext
 {
     friend class StorageS3QueueSource;
 public:
+    using Configuration = StorageObjectStorage::Configuration;
     using ConfigurationPtr = StorageObjectStorage::ConfigurationPtr;
     using ObjectInfo = StorageObjectStorage::ObjectInfo;
     using ObjectInfos = StorageObjectStorage::ObjectInfos;
@@ -58,6 +59,11 @@ public:
         ObjectInfos * read_keys,
         std::function<void(FileProgress)> file_progress_callback = {});
 
+    static std::string getUniqueStoragePathIdentifier(
+        const Configuration & configuration,
+        const ObjectInfo & object_info,
+        bool include_connection_info = true);
+
 protected:
     const String name;
     ObjectStoragePtr object_storage;

From 9911f13c77588e089832c05aebfe0aff5b8241cd Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 16:39:53 +0200
Subject: [PATCH 289/392] Update function return type for consistency

---
 .../en/sql-reference/functions/geo/geohash.md |  14 +-
 docs/en/sql-reference/functions/geo/h3.md     | 276 +++++++-----------
 docs/en/sql-reference/functions/geo/s2.md     |  42 ++-
 docs/en/sql-reference/functions/geo/svg.md    |   4 +-
 .../functions/rounding-functions.md           |  16 +-
 .../functions/string-search-functions.md      |   6 +-
 .../sql-reference/functions/uuid-functions.md |   8 +-
 7 files changed, 138 insertions(+), 228 deletions(-)

diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index ce16af44e90..80c55650b9c 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi
 
 **Arguments**
 
-- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
-- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
-- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md).
-- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md).
-- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md).
+- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md).
+- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md).
+- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md).
+- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 :::note    
 All coordinate parameters must be of the same type: either `Float32` or `Float64`.
@@ -86,11 +86,9 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64
 
 **Returned values**
 
-- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items.
+- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
 - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values.
 
-Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
-
 :::note    
 Function throws an exception if resulting array is over 10’000’000 items long.
 :::
diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md
index 29486c58e6a..7faff8288b3 100644
--- a/docs/en/sql-reference/functions/geo/h3.md
+++ b/docs/en/sql-reference/functions/geo/h3.md
@@ -26,14 +26,12 @@ h3IsValid(h3index)
 
 **Parameter**
 
-- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- 1 — The number is a valid H3 index.
-- 0 — The number is not a valid H3 index.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — The number is a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 0 — The number is not a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -63,14 +61,12 @@ h3GetResolution(h3index)
 
 **Parameter**
 
-- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Index resolution. Range: `[0, 15]`.
-- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -100,11 +96,11 @@ h3EdgeAngle(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -134,11 +130,11 @@ h3EdgeLengthM(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -168,11 +164,11 @@ h3EdgeLengthKm(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -202,16 +198,14 @@ geoToH3(lon, lat, resolution)
 
 **Arguments**
 
-- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
-- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md).
+- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Hexagon index number.
-- 0 in case of error.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- 0 in case of error. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -275,12 +269,11 @@ h3ToGeoBoundary(h3Index)
 
 **Arguments**
 
-- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Array of pairs '(lon, lat)'.
-Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
+- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
 
 
 **Example**
@@ -311,14 +304,12 @@ h3kRing(h3index, k)
 
 **Arguments**
 
-- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md)
+- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `k` — Radius. [integer](../../../sql-reference/data-types/int-uint.md)
 
 **Returned values**
 
-- Array of H3 indexes.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -354,13 +345,11 @@ h3GetBaseCell(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Hexagon base cell number.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Hexagon base cell number. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -390,13 +379,11 @@ h3HexAreaM2(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Area in square meters.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Area in square meters. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -426,13 +413,11 @@ h3HexAreaKm2(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Area in square kilometers.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Area in square kilometers. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -462,15 +447,13 @@ h3IndexesAreNeighbors(index1, index2)
 
 **Arguments**
 
-- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index1` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index2` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Indexes are neighbours.
-- `0` — Indexes are not neighbours.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Indexes are neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `0` — Indexes are not neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -500,14 +483,12 @@ h3ToChildren(index, resolution)
 
 **Arguments**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Array of the child H3-indexes.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of the child H3-indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -537,14 +518,12 @@ h3ToParent(index, resolution)
 
 **Arguments**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Parent H3 index.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Parent H3 index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -572,13 +551,11 @@ h3ToString(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- String representation of the H3 index.
-
-Type: [String](../../../sql-reference/data-types/string.md).
+- String representation of the H3 index. [String](../../../sql-reference/data-types/string.md).
 
 **Example**
 
@@ -608,11 +585,11 @@ stringToH3(index_str)
 
 **Parameter**
 
-- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md).
+- `index_str` — String representation of the H3 index. [String](../../../sql-reference/data-types/string.md).
 
 **Returned value**
 
-- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Hexagon index number. Returns 0 on error. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -642,11 +619,11 @@ h3GetResolution(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -676,14 +653,12 @@ h3IsResClassIII(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Index has a resolution with Class III orientation.
-- `0` — Index doesn't have a resolution with Class III orientation.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Index has a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -713,14 +688,12 @@ h3IsPentagon(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Index represents a pentagonal cell.
-- `0` — Index doesn't represent a pentagonal cell.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Index represents a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -750,13 +723,11 @@ h3GetFaces(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Array containing icosahedron faces intersected by a given H3 index.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array containing icosahedron faces intersected by a given H3 index. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -786,13 +757,11 @@ h3CellAreaM2(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Cell area in square meters.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Cell area in square meters. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -822,13 +791,11 @@ h3CellAreaRads2(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Cell area in square radians.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Cell area in square radians. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -858,14 +825,12 @@ h3ToCenterChild(index, resolution)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -895,13 +860,11 @@ h3ExactEdgeLengthM(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in meters.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in meters. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -931,13 +894,11 @@ h3ExactEdgeLengthKm(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in kilometers.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in kilometers. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -967,13 +928,11 @@ h3ExactEdgeLengthRads(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in radians.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in radians. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -1003,13 +962,11 @@ h3NumHexagons(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Number of H3 indices.
-
-Type: [Int64](../../../sql-reference/data-types/int-uint.md).
+- Number of H3 indices. [Int64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1039,14 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in meters.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in meters.[Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -1076,14 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in kilometers.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in kilometers. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -1113,14 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in radians.
-
-Type: [Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in radians. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -1150,9 +1101,7 @@ h3GetRes0Indexes()
 
 **Returned values**
 
-- Array of all the resolution 0 H3 indexes.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of all the resolution 0 H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 
 **Example**
@@ -1183,13 +1132,11 @@ h3GetPentagonIndexes(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Array of all pentagon H3 indexes.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of all pentagon H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1219,14 +1166,12 @@ h3Line(start,end)
 
 **Parameter**
 
-- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-Array of h3 indexes representing the line of indices between the two provided indices:
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1256,14 +1201,12 @@ h3Distance(start,end)
 
 **Parameter**
 
-- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Number of grid cells.
-
-Type: [Int64](../../../sql-reference/data-types/int-uint.md).
+- Number of grid cells. [Int64](../../../sql-reference/data-types/int-uint.md).
 
 Returns a negative number if finding the distance fails.
 
@@ -1297,14 +1240,12 @@ h3HexRing(index, k)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents the origin. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `k` — Distance. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned values**
 
-- Array of H3 indexes.
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1334,14 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex)
 
 **Parameter**
 
-- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `originIndex` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `destinationIndex` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Unidirectional Edge Hexagon Index number.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Unidirectional Edge Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1371,14 +1310,12 @@ h3UnidirectionalEdgeisValid(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- 1 — The H3 index is a valid unidirectional edge.
-- 0 — The H3 index is not a valid unidirectional edge.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1408,13 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Origin Hexagon Index number.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Origin Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1444,13 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Destination Hexagon Index number.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Destination Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -1480,7 +1413,7 @@ h3GetIndexesFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
@@ -1519,13 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-Array of h3 indexes representing each unidirectional edge:
-
-Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+Array of h3 indexes representing each unidirectional edge. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -1555,12 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Returned value**
 
-- Array of pairs '(lon, lat)'.
-     Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
+- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
 
 
 **Example**
diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md
index f4702eff44b..424b547753d 100644
--- a/docs/en/sql-reference/functions/geo/s2.md
+++ b/docs/en/sql-reference/functions/geo/s2.md
@@ -26,9 +26,7 @@ geoToS2(lon, lat)
 
 **Returned values**
 
-- S2 point index.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -62,9 +60,9 @@ s2ToGeo(s2index)
 
 **Returned values**
 
-- A tuple consisting of two values: `tuple(lon,lat)`.
-
-Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
+- A [tuple](../../data-types/tuple.md) consisting of two values: 
+    - `lon`. [Float64](../../../sql-reference/data-types/float.md).
+    - `lat`. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -98,9 +96,7 @@ s2GetNeighbors(s2index)
 
 **Returned values**
 
-- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`.
-
-Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -134,10 +130,8 @@ s2CellsIntersect(s2index1, s2index2)
 
 **Returned values**
 
-- 1 — If the cells intersect.
-- 0 — If the cells don't intersect.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 0 — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -173,10 +167,8 @@ s2CapContains(center, degrees, point)
 
 **Returned values**
 
-- 1 — If the cap contains the S2 point index.
-- 0 — If the cap doesn't contain the S2 point index.
-
-Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 0 — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -211,8 +203,8 @@ s2CapUnion(center1, radius1, center2, radius2)
 
 **Returned values**
 
-- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
+- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -248,8 +240,8 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
 
 **Returned values**
 
-- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
+- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/float.md).
 
 **Example**
 
@@ -321,8 +313,8 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
 
 **Returned values**
 
-- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -357,8 +349,8 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin
 
 **Returned values**
 
-- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/geo/svg.md b/docs/en/sql-reference/functions/geo/svg.md
index c565d1f9de7..320d4542fee 100644
--- a/docs/en/sql-reference/functions/geo/svg.md
+++ b/docs/en/sql-reference/functions/geo/svg.md
@@ -23,13 +23,11 @@ Aliases: `SVG`, `svg`
 
 **Returned value**
 
-- The SVG representation of the geometry:
+- The SVG representation of the geometry. [String](../../data-types/string).
   - SVG circle
   - SVG polygon
   - SVG path
 
-Type: [String](../../data-types/string)
-
 **Examples**
 
 **Circle**
diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 6cbcc4e4ef3..20f73de4410 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -328,15 +328,13 @@ roundAge(num)
 
 **Returned value**
 
-- Returns `0`, for $age \lt 1$.
-- Returns `17`, for $1 \leq age \leq 17$.
-- Returns `18`, for $18 \leq age \leq 24$.
-- Returns `25`, for $25 \leq age \leq 34$.
-- Returns `35`, for $35 \leq age \leq 44$.
-- Returns `45`, for $45 \leq age \leq 54$.
-- Returns `55`, for $age \geq 55$.
-
-Type: [UInt8](../data-types/int-uint.md) in all cases.
+- Returns `0`, for $age \lt 1$. [UInt8](../data-types/int-uint.md).
+- Returns `17`, for $1 \leq age \leq 17$. [UInt8](../data-types/int-uint.md).
+- Returns `18`, for $18 \leq age \leq 24$. [UInt8](../data-types/int-uint.md).
+- Returns `25`, for $25 \leq age \leq 34$. [UInt8](../data-types/int-uint.md).
+- Returns `35`, for $35 \leq age \leq 44$. [UInt8](../data-types/int-uint.md).
+- Returns `45`, for $45 \leq age \leq 54$. [UInt8](../data-types/int-uint.md).
+- Returns `55`, for $age \geq 55$. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 327eb8994db..f02c8f15aa9 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -42,8 +42,8 @@ Alias:
 
 **Returned values**
 
-- Starting position in bytes and counting from 1, if the substring was found.
-- 0, if the substring was not found.
+- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../../sql-reference/data-types/int-uint.md).
+- 0, if the substring was not found. [UInt64](../../sql-reference/data-types/int-uint.md).
 
 If substring `needle` is empty, these rules apply:
 - if no `start_pos` was specified: return `1`
@@ -53,8 +53,6 @@ If substring `needle` is empty, these rules apply:
 
 The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`.
 
-Type: `Integer`.
-
 **Examples**
 
 Query:
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index 0c1da88913d..a16663afc5b 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -640,9 +640,7 @@ UUIDv7ToDateTime(uuid[, timezone])
 
 **Returned value**
 
-- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000.
-
-Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
+- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
 
 **Usage examples**
 
@@ -682,9 +680,7 @@ serverUUID()
 
 **Returned value**
 
-- The UUID of the server.
-
-Type: [UUID](../data-types/uuid.md).
+- The UUID of the server. [UUID](../data-types/uuid.md).
 
 ## See also
 

From 45e4e30cfd13f35bda29629d42f881c69bbf5250 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Thu, 23 May 2024 16:51:17 +0200
Subject: [PATCH 290/392] Update retuurn type of logical functions

---
 .../functions/logical-functions.md            | 24 +++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md
index 138b804a575..1977c5c2a7e 100644
--- a/docs/en/sql-reference/functions/logical-functions.md
+++ b/docs/en/sql-reference/functions/logical-functions.md
@@ -30,11 +30,9 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope
 
 **Returned value**
 
-- `0`, if at least one argument evaluates to `false`,
-- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`,
-- `1`, otherwise.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `0`, if at least one argument evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
+- `1`, otherwise. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -136,11 +134,9 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne
 
 **Returned value**
 
-- `1`, if `val` evaluates to `false`,
-- `0`, if `val` evaluates to `true`,
-- `NULL`, if `val` is `NULL`.
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `1`, if `val` evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `0`, if `val` evaluates to `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
 
 **Example**
 
@@ -172,11 +168,9 @@ xor(val1, val2...)
 
 **Returned value**
 
-- `1`, for two values: if one of the values evaluates to `false` and other does not,
-- `0`, for two values: if both values evaluate to `false` or to both `true`,
-- `NULL`, if at least one of the inputs is `NULL`
-
-Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
 
 **Example**
 

From 60e94af1ecd1e2b3e5b3f3194901d001653b7991 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <mrnovikd@gmail.com>
Date: Thu, 23 May 2024 16:55:02 +0200
Subject: [PATCH 291/392] Return one line change

---
 src/Planner/PlannerExpressionAnalysis.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp
index 6e194b2c03e..7984d97a1ea 100644
--- a/src/Planner/PlannerExpressionAnalysis.cpp
+++ b/src/Planner/PlannerExpressionAnalysis.cpp
@@ -444,7 +444,6 @@ SortAnalysisResult analyzeSort(const QueryNode & query_node,
         for (auto & interpolate_node : interpolate_list_node.getNodes())
         {
             auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
-            interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
             interpolate_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
         }
 

From a4903e6b5583b172496be8fa0dbf6cead2b51d86 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 May 2024 16:55:48 +0200
Subject: [PATCH 292/392] Add supportsDynamicSubcolumns()

---
 src/Storages/ObjectStorage/StorageObjectStorage.h        | 2 ++
 src/Storages/ObjectStorage/StorageObjectStorageCluster.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h
index de75af5035b..f45d8c1f01a 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.h
@@ -84,6 +84,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
 
     bool supportsSubsetOfColumns(const ContextPtr & context) const;
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
index 1c244b1ca36..69fec2b3c77 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.h
@@ -26,6 +26,8 @@ public:
 
     bool supportsSubcolumns() const override { return true; }
 
+    bool supportsDynamicSubcolumns() const override { return true; }
+
     bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
 
     RemoteQueryExecutor::Extension getTaskIteratorExtension(

From 9481f2f32535630694b9c328384b69116f3b535b Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 17:07:55 +0200
Subject: [PATCH 293/392] Update array-functions.md

Add missing ::: for note
---
 docs/en/sql-reference/functions/array-functions.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index 512874d20b7..458adb276fd 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -2373,6 +2373,7 @@ arrayMin([func,] arr)
 
 :::note
 If `func` is specified, then the return type matches the return value type of `func`, otherwise it matches the type of the array elements.
+:::
 
 **Examples**
 

From 9cfd2322d717fc6d2208683b224ee6969932de79 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 17:14:56 +0200
Subject: [PATCH 294/392] Small edits to bit-functions.md

---
 docs/en/sql-reference/functions/bit-functions.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 709f438d67f..2538ad32022 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -186,9 +186,9 @@ SELECT bitTest(number, index)
 - `number` – Integer number.
 - `index` – Position of bit.
 
-**Returned values**
+**Returned value**
 
-Returns a value of bit at specified position. [UInt8](../data-types/int-uint.md).
+- Value of the bit at the specified position. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -249,9 +249,9 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...)
 - `number` – Integer number.
 - `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`).
 
-**Returned values**
+**Returned value**
 
-Returns result of logical conjuction. [UInt8](../data-types/int-uint.md).
+- Result of the logical conjuction. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -312,9 +312,9 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
 - `number` – Integer number.
 - `index1`, `index2`, `index3`, `index4` – Positions of bit.
 
-**Returned values**
+**Returned value**
 
-Returns result of logical disjunction. [UInt8](../data-types/int-uint.md).
+- Result of the logical disjunction. [UInt8](../data-types/int-uint.md).
 
 **Example**
 

From a01b6e8e8278b531a72463eb6f1920fe8d682c0e Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 17:19:03 +0200
Subject: [PATCH 295/392] Numbers in return type should be in ``

---
 docs/en/sql-reference/functions/geo/s2.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md
index 424b547753d..2158ef2d57d 100644
--- a/docs/en/sql-reference/functions/geo/s2.md
+++ b/docs/en/sql-reference/functions/geo/s2.md
@@ -94,7 +94,7 @@ s2GetNeighbors(s2index)
 
 - `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
-**Returned values**
+**Returned value**
 
 - An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
 
@@ -128,10 +128,10 @@ s2CellsIntersect(s2index1, s2index2)
 
 - `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
-**Returned values**
+**Returned value**
 
-- 1 — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- 0 — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `0` — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -165,10 +165,10 @@ s2CapContains(center, degrees, point)
 - `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
 - `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
-**Returned values**
+**Returned value**
 
-- 1 — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- 0 — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
 
 **Example**
 
@@ -275,10 +275,10 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
 - `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
 - `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
 
-**Returned values**
+**Returned value**
 
-- 1 — If the rectangle contains the given S2 point.
-- 0 — If the rectangle doesn't contain the given S2 point.
+- `1` — If the rectangle contains the given S2 point.
+- `0` — If the rectangle doesn't contain the given S2 point.
 
 **Example**
 

From 732b6d1ecc5df7360e0290e950904b7512711777 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 17:22:02 +0200
Subject: [PATCH 296/392] Add hyphens to return values

---
 .../functions/splitting-merging-functions.md         | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 77563713605..8aa171949a3 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -25,7 +25,7 @@ splitByChar(separator, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 :::note
  Empty substrings may be selected when:
@@ -78,7 +78,7 @@ splitByString(separator, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 :::note
 Empty substrings may be selected when:
@@ -135,7 +135,7 @@ splitByRegexp(regexp, s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 :::note
 Empty substrings may be selected when:
@@ -192,7 +192,7 @@ splitByWhitespace(s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
  
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
@@ -231,7 +231,7 @@ splitByNonAlpha(s[, max_substrings]))
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
@@ -293,7 +293,7 @@ Alias: `splitByAlpha`
 
 **Returned value(s)**
 
-Returns an array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
 
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.

From bab94ac56aa0ef568d34dd1e230e29190e8eaec9 Mon Sep 17 00:00:00 2001
From: Shaun Struwig <41984034+Blargian@users.noreply.github.com>
Date: Thu, 23 May 2024 17:24:07 +0200
Subject: [PATCH 297/392] Correct "note:::" to ":::note"

---
 docs/en/sql-reference/functions/hash-functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 89b95888f85..e3968a691a8 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -844,7 +844,7 @@ SELECT xxHash64('')
 
 - Hash value. [UInt32/64](../data-types/int-uint.md).  
 
-note:::
+:::note
 The return type will be `UInt32` for `xxHash32` and `UInt64` for `xxHash64`.
 :::
 

From c1950236ced0b110e679c4042d1fab2c7df26f2f Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 15:24:18 +0000
Subject: [PATCH 298/392] Cosmetics, pt. IV

---
 src/Functions/{serial.cpp => generateSerialID.cpp} | 2 --
 1 file changed, 2 deletions(-)
 rename src/Functions/{serial.cpp => generateSerialID.cpp} (98%)

diff --git a/src/Functions/serial.cpp b/src/Functions/generateSerialID.cpp
similarity index 98%
rename from src/Functions/serial.cpp
rename to src/Functions/generateSerialID.cpp
index d65df83c9f9..db26d0d684b 100644
--- a/src/Functions/serial.cpp
+++ b/src/Functions/generateSerialID.cpp
@@ -12,8 +12,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int KEEPER_EXCEPTION;
 }
 

From e6f135089f300a6e5cc0d1276e748750f2b59454 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 15:25:38 +0000
Subject: [PATCH 299/392] Cosmetics, pt. V

---
 src/Functions/generateSnowflakeID.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 1b26bf44adb..bbae41e4f49 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -11,11 +11,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
-}
-
 namespace
 {
 
@@ -81,7 +76,7 @@ SnowflakeComponents toComponents(uint64_t snowflake) {
 
 uint64_t toSnowflakeID(SnowflakeComponents components) {
     return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
-            components.machind_id << (machine_seq_num_bits_count) | 
+            components.machind_id << (machine_seq_num_bits_count) |
             components.machine_seq_num);
 }
 
@@ -120,7 +115,7 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available,
         end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
     else
         end.timestamp = begin.timestamp;
-    
+
     end.machind_id = begin.machind_id;
     end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
 

From 4611a44c1f76873482fff498f7e7f8414f24e375 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 15:53:14 +0000
Subject: [PATCH 300/392] Cosmetics, pt. VI

---
 src/Functions/generateSnowflakeID.cpp | 100 +++++++++++++-------------
 src/Functions/generateUUIDv7.cpp      |  25 ++++---
 2 files changed, 60 insertions(+), 65 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index bbae41e4f49..4e61bd9fb1c 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -27,7 +27,7 @@ namespace
 
 - The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
 - The middle 10 bits are the machine ID
-- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by differen processes
+- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes
 */
 
 /// bit counts
@@ -36,14 +36,13 @@ constexpr auto machine_id_bits_count = 10;
 constexpr auto machine_seq_num_bits_count = 12;
 
 /// bits masks for Snowflake ID components
-// constexpr uint64_t timestamp_mask = ((1ULL << timestamp_bits_count) - 1) << (machine_id_bits_count + machine_seq_num_bits_count); // unused
-constexpr uint64_t machine_id_mask = ((1ULL << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
-constexpr uint64_t machine_seq_num_mask = (1ULL << machine_seq_num_bits_count) - 1;
+constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
+constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1;
 
 /// max values
 constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
 
-uint64_t getMachineID()
+uint64_t getMachineId()
 {
     UUID server_uuid = ServerUUID::get();
     /// hash into 64 bits
@@ -57,48 +56,44 @@ uint64_t getTimestamp()
 {
     auto now = std::chrono::system_clock::now();
     auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
-    return static_cast<uint64_t>(ticks_since_epoch) & ((1ULL << timestamp_bits_count) - 1);
+    return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
 }
 
-struct SnowflakeComponents {
+struct SnowflakeId
+{
     uint64_t timestamp;
     uint64_t machind_id;
     uint64_t machine_seq_num;
 };
 
-SnowflakeComponents toComponents(uint64_t snowflake) {
-    return {
-        .timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
-        .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
-        .machine_seq_num = (snowflake & machine_seq_num_mask)
-    };
+SnowflakeId toSnowflakeId(uint64_t snowflake)
+{
+    return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
+            .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
+            .machine_seq_num = (snowflake & machine_seq_num_mask)};
 }
 
-uint64_t toSnowflakeID(SnowflakeComponents components) {
+uint64_t fromSnowflakeId(SnowflakeId components)
+{
     return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
             components.machind_id << (machine_seq_num_bits_count) |
             components.machine_seq_num);
 }
 
-struct RangeOfSnowflakeIDs {
-    /// [begin, end)
-    SnowflakeComponents begin, end;
+struct SnowflakeIdRange
+{
+    SnowflakeId begin; /// inclusive
+    SnowflakeId end;   /// exclusive
 };
 
-/* Get range of `input_rows_count` Snowflake IDs from `max(available, now)`
-
-1. Calculate Snowflake ID by current timestamp (`now`)
-2. `begin = max(available, now)`
-3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
-*/
-RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available, size_t input_rows_count)
+/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`:
+/// 1. calculate Snowflake ID by current timestamp (`now`)
+/// 2. `begin = max(available, now)`
+/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
+SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
 {
     /// 1. `now`
-    SnowflakeComponents begin = {
-        .timestamp = getTimestamp(),
-        .machind_id = getMachineID(),
-        .machine_seq_num = 0
-    };
+    SnowflakeId begin = {.timestamp = getTimestamp(), .machind_id = getMachineId(), .machine_seq_num = 0};
 
     /// 2. `begin`
     if (begin.timestamp <= available.timestamp)
@@ -108,7 +103,7 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available,
     }
 
     /// 3. `end = begin + input_rows_count`
-    SnowflakeComponents end;
+    SnowflakeId end;
     const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
     if (input_rows_count >= seq_nums_in_current_timestamp_left)
         /// if sequence numbers in current timestamp is not enough for rows => update timestamp
@@ -125,22 +120,22 @@ RangeOfSnowflakeIDs getRangeOfAvailableIDs(const SnowflakeComponents& available,
 struct GlobalCounterPolicy
 {
     static constexpr auto name = "generateSnowflakeID";
-    static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
+    static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
 
     /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
     struct Data
     {
         static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
 
-        SnowflakeComponents reserveRange(size_t input_rows_count)
+        SnowflakeId reserveRange(size_t input_rows_count)
         {
             uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
-            RangeOfSnowflakeIDs range;
+            SnowflakeIdRange range;
             do
             {
-                range = getRangeOfAvailableIDs(toComponents(available_snowflake_id), input_rows_count);
+                range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
             }
-            while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, toSnowflakeID(range.end)));
+            while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
             /// if `compare_exhange` failed    => another thread updated `lowest_available_snowflake_id` and we should try again
             ///                      completed => range of IDs [begin, end) is reserved, can return the beginning of the range
 
@@ -152,17 +147,17 @@ struct GlobalCounterPolicy
 struct ThreadLocalCounterPolicy
 {
     static constexpr auto name = "generateSnowflakeIDThreadMonotonic";
-    static constexpr auto doc_description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
+    static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
 
     /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
     struct Data
     {
         static inline thread_local uint64_t lowest_available_snowflake_id = 0;
 
-        SnowflakeComponents reserveRange(size_t input_rows_count)
+        SnowflakeId reserveRange(size_t input_rows_count)
         {
-            RangeOfSnowflakeIDs range = getRangeOfAvailableIDs(toComponents(lowest_available_snowflake_id), input_rows_count);
-            lowest_available_snowflake_id = toSnowflakeID(range.end);
+            SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count);
+            lowest_available_snowflake_id = fromSnowflakeId(range.end);
             return range.begin;
         }
     };
@@ -188,7 +183,7 @@ public:
     {
         FunctionArgumentDescriptors mandatory_args;
         FunctionArgumentDescriptors optional_args{
-            {"expr", nullptr, nullptr, "Arbitrary Expression"}
+            {"expr", nullptr, nullptr, "Arbitrary expression"}
         };
         validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
@@ -200,17 +195,18 @@ public:
         auto col_res = ColumnVector<UInt64>::create();
         typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
 
-        vec_to.resize(input_rows_count);
-
         if (input_rows_count != 0)
         {
+            vec_to.resize(input_rows_count);
+
             typename FillPolicy::Data data;
+
             /// get the begin of available snowflake ids range
-            SnowflakeComponents snowflake_id = data.reserveRange(input_rows_count);
+            SnowflakeId snowflake_id = data.reserveRange(input_rows_count);
 
             for (UInt64 & to_row : vec_to)
             {
-                to_row = toSnowflakeID(snowflake_id);
+                to_row = fromSnowflakeId(snowflake_id);
                 if (snowflake_id.machine_seq_num++ == max_machine_seq_num)
                 {
                     snowflake_id.machine_seq_num = 0;
@@ -225,20 +221,20 @@ public:
 };
 
 template<typename FillPolicy>
-void registerSnowflakeIDGenerator(auto& factory)
+void registerSnowflakeIDGenerator(auto & factory)
 {
     static constexpr auto doc_syntax_format = "{}([expression])";
     static constexpr auto example_format = "SELECT {}()";
     static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
 
-    FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
-    FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
-    FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
-    FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UInt64";
-    FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
-    FunctionDocumentation::Categories doc_categories = {"Snowflake ID"};
+    FunctionDocumentation::Description description = FillPolicy::description;
+    FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
+    FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
+    FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
+    FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
+    FunctionDocumentation::Categories categories = {"Snowflake ID"};
 
-    factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
+    factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
 }
 
 REGISTER_FUNCTION(GenerateSnowflakeID)
diff --git a/src/Functions/generateUUIDv7.cpp b/src/Functions/generateUUIDv7.cpp
index 411a3a076ac..f2a82431c0a 100644
--- a/src/Functions/generateUUIDv7.cpp
+++ b/src/Functions/generateUUIDv7.cpp
@@ -76,7 +76,7 @@ void setVariant(UUID & uuid)
 struct FillAllRandomPolicy
 {
     static constexpr auto name = "generateUUIDv7NonMonotonic";
-    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
+    static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
     struct Data
     {
         void generate(UUID & uuid, uint64_t ts)
@@ -136,7 +136,7 @@ struct CounterFields
 struct GlobalCounterPolicy
 {
     static constexpr auto name = "generateUUIDv7";
-    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
+    static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
 
     /// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously.
     struct Data
@@ -159,7 +159,7 @@ struct GlobalCounterPolicy
 struct ThreadLocalCounterPolicy
 {
     static constexpr auto name = "generateUUIDv7ThreadMonotonic";
-    static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
+    static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
 
     /// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
     struct Data
@@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy
 {
 public:
     String getName() const final {  return FillPolicy::name; }
-
     size_t getNumberOfArguments() const final { return 0; }
     bool isDeterministic() const override { return false; }
     bool isDeterministicInScopeOfQuery() const final { return false; }
@@ -198,7 +197,7 @@ public:
     {
         FunctionArgumentDescriptors mandatory_args;
         FunctionArgumentDescriptors optional_args{
-            {"expr", nullptr, nullptr, "Arbitrary Expression"}
+            {"expr", nullptr, nullptr, "Arbitrary expression"}
         };
         validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
 
@@ -264,20 +263,20 @@ private:
 };
 
 template<typename FillPolicy>
-void registerUUIDv7Generator(auto& factory)
+void registerUUIDv7Generator(auto & factory)
 {
     static constexpr auto doc_syntax_format = "{}([expression])";
     static constexpr auto example_format = "SELECT {}()";
     static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
 
-    FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
-    FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
-    FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
-    FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7.";
-    FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
-    FunctionDocumentation::Categories doc_categories = {"UUID"};
+    FunctionDocumentation::Description description = FillPolicy::description;
+    FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
+    FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
+    FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7.";
+    FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
+    FunctionDocumentation::Categories categories = {"UUID"};
 
-    factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
+    factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
 }
 
 REGISTER_FUNCTION(GenerateUUIDv7)

From 91c1456141f2783234d1a7fd6a749e9e0493c46e Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <eduard.karacharov@semrush.com>
Date: Wed, 22 May 2024 22:11:46 +0300
Subject: [PATCH 301/392] CNF with mutually exclusive atoms reduction fix

---
 src/Analyzer/Passes/ConvertQueryToCNFPass.cpp | 20 +++++-
 src/Interpreters/TreeCNFConverter.h           | 21 +++++-
 .../WhereConstraintsOptimizer.cpp             | 19 ++++-
 .../0_stateless/03161_cnf_reduction.reference | 23 ++++++
 .../0_stateless/03161_cnf_reduction.sql       | 72 +++++++++++++++++++
 5 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/03161_cnf_reduction.reference
 create mode 100644 tests/queries/0_stateless/03161_cnf_reduction.sql

diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
index 96bc62212fd..5951e8fc5ea 100644
--- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
+++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
@@ -99,6 +99,23 @@ bool checkIfGroupAlwaysTrueGraph(const Analyzer::CNF::OrGroup & group, const Com
     return false;
 }
 
+bool checkIfGroupAlwaysTrueAtoms(const Analyzer::CNF::OrGroup & group)
+{
+    /// Filters out groups containing mutually exclusive atoms,
+    /// since these groups are always True
+
+    for (const auto & atom : group)
+    {
+        auto negated(atom);
+        negated.negative = !atom.negative;
+        if (group.contains(negated))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
 bool checkIfAtomAlwaysFalseFullMatch(const Analyzer::CNF::AtomicFormula & atom, const ConstraintsDescription::QueryTreeData & query_tree_constraints)
 {
     const auto constraint_atom_ids = query_tree_constraints.getAtomIds(atom.node_with_hash);
@@ -644,7 +661,8 @@ void optimizeWithConstraints(Analyzer::CNF & cnf, const QueryTreeNodes & table_e
         cnf.filterAlwaysTrueGroups([&](const auto & group)
            {
                /// remove always true groups from CNF
-               return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
+               return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints)
+                   && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group);
            })
            .filterAlwaysFalseAtoms([&](const Analyzer::CNF::AtomicFormula & atom)
            {
diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h
index 8258412f1a6..ae1551cd9c2 100644
--- a/src/Interpreters/TreeCNFConverter.h
+++ b/src/Interpreters/TreeCNFConverter.h
@@ -164,6 +164,12 @@ public:
 
 void pushNotIn(CNFQuery::AtomicFormula & atom);
 
+/// Reduces CNF groups by removing mutually exclusive atoms
+/// found across groups, in case other atoms are identical.
+/// Might require multiple passes to complete reduction.
+///
+/// Example:
+/// (x OR y) AND (x OR !y) -> x
 template <typename TAndGroup>
 TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
 {
@@ -175,10 +181,19 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
         bool inserted = false;
         for (const auto & atom : group)
         {
-            copy.erase(atom);
             using AtomType = std::decay_t<decltype(atom)>;
             AtomType negative_atom(atom);
             negative_atom.negative = !atom.negative;
+
+            // Sikpping erase-insert for mutually exclusive atoms within
+            // signle group, since it won't insert negative atom, which
+            // will break the logic of this rule
+            if (copy.contains(negative_atom))
+            {
+                continue;
+            }
+
+            copy.erase(atom);
             copy.insert(negative_atom);
 
             if (groups.contains(copy))
@@ -209,6 +224,10 @@ bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right)
     return true;
 }
 
+/// Removes CNF groups if subset group is found in CNF.
+///
+/// Example:
+/// (x OR y) AND (x) -> x
 template <typename TAndGroup>
 TAndGroup filterCNFSubsets(const TAndGroup & groups)
 {
diff --git a/src/Interpreters/WhereConstraintsOptimizer.cpp b/src/Interpreters/WhereConstraintsOptimizer.cpp
index 979a4f4dbf5..456cf76b987 100644
--- a/src/Interpreters/WhereConstraintsOptimizer.cpp
+++ b/src/Interpreters/WhereConstraintsOptimizer.cpp
@@ -91,6 +91,22 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis
     return false;
 }
 
+bool checkIfGroupAlwaysTrueAtoms(const CNFQuery::OrGroup & group)
+{
+    /// Filters out groups containing mutually exclusive atoms,
+    /// since these groups are always True
+
+    for (const auto & atom : group)
+    {
+        auto negated(atom);
+        negated.negative = !atom.negative;
+        if (group.contains(negated))
+        {
+            return true;
+        }
+    }
+    return false;
+}
 
 bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description)
 {
@@ -158,7 +174,8 @@ void WhereConstraintsOptimizer::perform()
             .filterAlwaysTrueGroups([&compare_graph, this](const auto & group)
             {
                 /// remove always true groups from CNF
-                return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
+                return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints())
+                    && !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group);
             })
             .filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom)
             {
diff --git a/tests/queries/0_stateless/03161_cnf_reduction.reference b/tests/queries/0_stateless/03161_cnf_reduction.reference
new file mode 100644
index 00000000000..5e39c0f3223
--- /dev/null
+++ b/tests/queries/0_stateless/03161_cnf_reduction.reference
@@ -0,0 +1,23 @@
+-- Expected plan with analyzer:
+SELECT id
+FROM `03161_table`
+WHERE f
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1
+
+-- Expected result with analyzer:
+1
+
+-- Expected plan w/o analyzer:
+SELECT id
+FROM `03161_table`
+WHERE f
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0
+
+-- Expected result w/o analyzer:
+1
+
+-- Reproducer from the issue with analyzer
+2
+
+-- Reproducer from the issue w/o analyzer
+2
diff --git a/tests/queries/0_stateless/03161_cnf_reduction.sql b/tests/queries/0_stateless/03161_cnf_reduction.sql
new file mode 100644
index 00000000000..b34e9171d45
--- /dev/null
+++ b/tests/queries/0_stateless/03161_cnf_reduction.sql
@@ -0,0 +1,72 @@
+DROP TABLE IF EXISTS 03161_table;
+
+CREATE TABLE 03161_table (id UInt32, f UInt8) ENGINE = Memory;
+
+INSERT INTO 03161_table VALUES (0, 0), (1, 1), (2, 0);
+
+SELECT '-- Expected plan with analyzer:';
+
+EXPLAIN SYNTAX
+SELECT id
+FROM 03161_table
+WHERE f AND (NOT(f) OR f)
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1;
+
+SELECT '';
+
+SELECT '-- Expected result with analyzer:';
+
+SELECT id
+FROM 03161_table
+WHERE f AND (NOT(f) OR f)
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1;
+
+SELECT '';
+
+SELECT '-- Expected plan w/o analyzer:';
+
+EXPLAIN SYNTAX
+SELECT id
+FROM 03161_table
+WHERE f AND (NOT(f) OR f)
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0;
+
+SELECT '';
+
+SELECT '-- Expected result w/o analyzer:';
+
+SELECT id
+FROM 03161_table
+WHERE f AND (NOT(f) OR f)
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0;
+
+DROP TABLE IF EXISTS 03161_table;
+
+-- Checking reproducer from GitHub issue
+-- https://github.com/ClickHouse/ClickHouse/issues/57400
+
+DROP TABLE IF EXISTS 03161_reproducer;
+
+CREATE TABLE 03161_reproducer (c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8) ENGINE = Memory;
+
+INSERT INTO 03161_reproducer VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0, 0, 0, 0, 1), (0, 0, 0, 0, 0, 0, 0, 0, 1, 0), (0, 0, 0, 0, 0, 0, 0, 0, 1, 1), (0, 0, 0, 0, 0, 0, 0, 1, 0, 0), (0, 0, 0, 0, 0, 0, 0, 1, 0, 1), (0, 0, 0, 0, 0, 0, 0, 1, 1, 0), (0, 0, 0, 0, 0, 0, 0, 1, 1, 1);
+
+SELECT '';
+
+SELECT '-- Reproducer from the issue with analyzer';
+
+SELECT count()
+FROM 03161_reproducer
+WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7))
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 1;
+
+SELECT '';
+
+SELECT '-- Reproducer from the issue w/o analyzer';
+
+SELECT count()
+FROM 03161_reproducer
+WHERE ((NOT c2) AND c2 AND (NOT c1)) OR ((NOT c2) AND c3 AND (NOT c5)) OR ((NOT c7) AND (NOT c8)) OR (c9 AND c6 AND c8 AND (NOT c8) AND (NOT c7))
+SETTINGS convert_query_to_cnf = 1, optimize_using_constraints = 1, allow_experimental_analyzer = 0;
+
+DROP TABLE IF EXISTS 03161_reproducer;

From c7aa283b7a418f6372e67b386342815629e26f39 Mon Sep 17 00:00:00 2001
From: Eduard Karacharov <13005055+korowa@users.noreply.github.com>
Date: Thu, 23 May 2024 14:20:15 +0300
Subject: [PATCH 302/392] Update src/Interpreters/TreeCNFConverter.h

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Interpreters/TreeCNFConverter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h
index ae1551cd9c2..ec4b029eee9 100644
--- a/src/Interpreters/TreeCNFConverter.h
+++ b/src/Interpreters/TreeCNFConverter.h
@@ -186,7 +186,7 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
             negative_atom.negative = !atom.negative;
 
             // Sikpping erase-insert for mutually exclusive atoms within
-            // signle group, since it won't insert negative atom, which
+            // single group, since it won't insert negative atom, which
             // will break the logic of this rule
             if (copy.contains(negative_atom))
             {

From 2315991504b1e95d7bb2594e54e3c6f749897d79 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <davenger@clickhouse.com>
Date: Thu, 23 May 2024 18:41:14 +0200
Subject: [PATCH 303/392] Build fix

---
 src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index df8fb6f6656..fb0f0ba9154 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -67,12 +67,11 @@ void MergeTreeDataPartWriterCompact::initDynamicStreamsIfNeeded(const Block & bl
         return;
 
     is_dynamic_streams_initialized = true;
-    auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
     for (const auto & column : columns_list)
     {
         if (column.type->hasDynamicSubcolumns())
         {
-            auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
+            auto compression = getCodecDescOrDefault(column.name, default_codec);
             addStreams(column, block.getByName(column.name).column, compression);
         }
     }

From 8d697123dac574e727101d241e4d16eae2bce8da Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 23 May 2024 16:36:24 +0200
Subject: [PATCH 304/392] CI: Cancel sync wf on new push

---
 .github/workflows/pull_request.yml |  3 +++
 tests/ci/ci.py                     | 37 +++++++++++++++++++--------
 tests/ci/ci_metadata.py            | 41 +++++++++++++++++++++++++++---
 tests/ci/env_helper.py             |  1 +
 4 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index f20e987db97..48b4a558580 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -33,6 +33,9 @@ jobs:
           clear-repository: true # to ensure correct digests
           fetch-depth: 0 # to get version
           filter: tree:0
+      - name: Cancel Sync PR workflow
+        run: |
+          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
       - name: Labels check
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 99555b06bbf..68db08fbe96 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1908,13 +1908,26 @@ def _get_ext_check_name(check_name: str) -> str:
     return check_name_with_group
 
 
-def _cancel_pr_wf(s3: S3Helper, pr_number: int) -> None:
-    run_id = CiMetadata(s3, pr_number).fetch_meta().run_id
-    if not run_id:
-        print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!")
+def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> None:
+    wf_data = CiMetadata(s3, pr_number).fetch_meta()
+    if not cancel_sync:
+        if not wf_data.run_id:
+            print(f"ERROR: FIX IT: Run id has not been found PR [{pr_number}]!")
+        else:
+            print(
+                f"Canceling PR workflow run_id: [{wf_data.run_id}], pr: [{pr_number}]"
+            )
+            GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), wf_data.run_id)
     else:
-        print(f"Canceling PR workflow run_id: [{run_id}], pr: [{pr_number}]")
-        GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), run_id)
+        if not wf_data.sync_pr_run_id:
+            print("WARNING: Sync PR run id has not been found")
+        else:
+            print(f"Canceling sync PR workflow run_id: [{wf_data.sync_pr_run_id}]")
+            GitHub.cancel_wf(
+                "ClickHouse/clickhouse-private",
+                get_best_robot_token(),
+                wf_data.sync_pr_run_id,
+            )
 
 
 def main() -> int:
@@ -1947,7 +1960,7 @@ def main() -> int:
     if args.configure:
         if CI and pr_info.is_pr:
             # store meta on s3 (now we need it only for PRs)
-            meta = CiMetadata(s3, pr_info.number)
+            meta = CiMetadata(s3, pr_info.number, pr_info.head_ref)
             meta.run_id = int(GITHUB_RUN_ID)
             meta.push_meta()
 
@@ -2245,10 +2258,12 @@ def main() -> int:
 
     ### CANCEL PREVIOUS WORKFLOW RUN
     elif args.cancel_previous_run:
-        assert (
-            pr_info.is_merge_queue
-        ), "Currently it's supposed to be used in MQ wf to cancel running PR wf if any"
-        _cancel_pr_wf(s3, pr_info.merged_pr)
+        if pr_info.is_merge_queue:
+            _cancel_pr_wf(s3, pr_info.merged_pr)
+        elif pr_info.is_pr:
+            _cancel_pr_wf(s3, pr_info.number, cancel_sync=True)
+        else:
+            assert False, "BUG! Not supported scenario"
 
     ### print results
     _print_results(result, args.outfile, args.pretty)
diff --git a/tests/ci/ci_metadata.py b/tests/ci/ci_metadata.py
index 82d44cf1adc..a767d102811 100644
--- a/tests/ci/ci_metadata.py
+++ b/tests/ci/ci_metadata.py
@@ -4,9 +4,13 @@ from typing import Optional
 from env_helper import (
     S3_BUILDS_BUCKET,
     TEMP_PATH,
+    GITHUB_UPSTREAM_REPOSITORY,
+    GITHUB_REPOSITORY,
+    S3_BUILDS_BUCKET_PUBLIC,
 )
 from s3_helper import S3Helper
 from ci_utils import GHActions
+from synchronizer_utils import SYNC_BRANCH_PREFIX
 
 
 # pylint: disable=too-many-lines
@@ -22,13 +26,14 @@ class CiMetadata:
     _LOCAL_PATH = Path(TEMP_PATH) / "ci_meta"
     _FILE_SUFFIX = ".cimd"
     _FILENAME_RUN_ID = "run_id" + _FILE_SUFFIX
+    _FILENAME_SYNC_PR_RUN_ID = "sync_pr_run_id" + _FILE_SUFFIX
 
     def __init__(
         self,
         s3: S3Helper,
         pr_number: Optional[int] = None,
-        sha: Optional[str] = None,
         git_ref: Optional[str] = None,
+        sha: Optional[str] = None,
     ):
         assert pr_number or (sha and git_ref)
 
@@ -37,12 +42,25 @@ class CiMetadata:
         self.git_ref = git_ref
         self.s3 = s3
         self.run_id = 0
+        self.upstream_pr_number = 0
+        self.sync_pr_run_id = 0
 
         if self.pr_number:
             self.s3_path = f"{self._S3_PREFIX}/PRs/{self.pr_number}/"
         else:
             self.s3_path = f"{self._S3_PREFIX}/{self.git_ref}/{self.sha}/"
 
+        # Process upstream StatusNames.SYNC:
+        # metadata path for upstream pr
+        self.s3_path_upstream = ""
+        if (
+            self.git_ref
+            and self.git_ref.startswith(f"{SYNC_BRANCH_PREFIX}/pr/")
+            and GITHUB_REPOSITORY != GITHUB_UPSTREAM_REPOSITORY
+        ):
+            self.upstream_pr_number = int(self.git_ref.split("/pr/", maxsplit=1)[1])
+            self.s3_path_upstream = f"{self._S3_PREFIX}/PRs/{self.upstream_pr_number}/"
+
         self._updated = False
 
         if not self._LOCAL_PATH.exists():
@@ -73,6 +91,8 @@ class CiMetadata:
                 assert len(lines) == 1
             if file_name.name == self._FILENAME_RUN_ID:
                 self.run_id = int(lines[0])
+            elif file_name.name == self._FILENAME_SYNC_PR_RUN_ID:
+                self.sync_pr_run_id = int(lines[0])
 
         self._updated = True
         return self
@@ -84,8 +104,15 @@ class CiMetadata:
         Uploads meta on s3
         """
         assert self.run_id
+        assert self.git_ref, "Push meta only with full info"
+
+        if not self.upstream_pr_number:
+            log_title = f"Storing workflow metadata: PR [{self.pr_number}]"
+        else:
+            log_title = f"Storing workflow metadata: PR [{self.pr_number}], upstream PR [{self.upstream_pr_number}]"
+
         GHActions.print_in_group(
-            f"Storing workflow metadata: PR [{self.pr_number}]",
+            log_title,
             [f"run_id: {self.run_id}"],
         )
 
@@ -96,9 +123,17 @@ class CiMetadata:
         _ = self.s3.upload_file(
             bucket=S3_BUILDS_BUCKET,
             file_path=local_file,
-            s3_path=self.s3_path + local_file.name,
+            s3_path=self.s3_path + self._FILENAME_RUN_ID,
         )
 
+        if self.upstream_pr_number:
+            # store run id in upstream pr meta as well
+            _ = self.s3.upload_file(
+                bucket=S3_BUILDS_BUCKET_PUBLIC,
+                file_path=local_file,
+                s3_path=self.s3_path_upstream + self._FILENAME_SYNC_PR_RUN_ID,
+            )
+
 
 if __name__ == "__main__":
     # TEST:
diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py
index 9b9652d5bd3..64614ffa611 100644
--- a/tests/ci/env_helper.py
+++ b/tests/ci/env_helper.py
@@ -31,6 +31,7 @@ IMAGES_PATH = os.getenv("IMAGES_PATH", TEMP_PATH)
 REPO_COPY = os.getenv("REPO_COPY", GITHUB_WORKSPACE)
 RUNNER_TEMP = os.getenv("RUNNER_TEMP", p.abspath(p.join(module_dir, "./tmp")))
 S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
+S3_BUILDS_BUCKET_PUBLIC = "clickhouse-builds"
 S3_TEST_REPORTS_BUCKET = os.getenv("S3_TEST_REPORTS_BUCKET", "clickhouse-test-reports")
 S3_URL = os.getenv("S3_URL", "https://s3.amazonaws.com")
 S3_DOWNLOAD = os.getenv("S3_DOWNLOAD", S3_URL)

From 741e0aedab78a009840f6346e582c905bb80be17 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 16:53:11 +0000
Subject: [PATCH 305/392] Remove commented code.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 57 +----------------------
 1 file changed, 2 insertions(+), 55 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 3ccecac951d..2d34f1024d5 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -471,6 +471,7 @@ struct TableExpressionData
         return buffer.str();
     }
 };
+
 class ExpressionsStack
 {
 public:
@@ -2857,22 +2858,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons
 
 bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
 {
-    //const auto & identifier_bind_part = identifier_lookup.identifier.front();
     return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
-
-    // auto get_alias_name_to_node_map = [&]() -> const std::unordered_map<std::string, QueryTreeNodePtr> &
-    // {
-    //     if (identifier_lookup.isExpressionLookup())
-    //         return *scope.alias_name_to_expression_node;
-    //     else if (identifier_lookup.isFunctionLookup())
-    //         return scope.alias_name_to_lambda_node;
-
-    //     return scope.alias_name_to_table_expression_node;
-    // };
-
-    // const auto & alias_name_to_node_map = get_alias_name_to_node_map();
-
-    // return alias_name_to_node_map.contains(identifier_bind_part);
 }
 
 /** Resolve identifier from scope aliases.
@@ -2922,23 +2908,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
 {
     const auto & identifier_bind_part = identifier_lookup.identifier.front();
 
-    // auto get_alias_name_to_node_map = [&]() -> std::unordered_map<std::string, QueryTreeNodePtr> &
-    // {
-    //     if (identifier_lookup.isExpressionLookup())
-    //         return *scope.alias_name_to_expression_node;
-    //     else if (identifier_lookup.isFunctionLookup())
-    //         return scope.alias_name_to_lambda_node;
-
-    //     return scope.alias_name_to_table_expression_node;
-    // };
-
-    // auto & alias_name_to_node_map = get_alias_name_to_node_map();
-    // auto it = alias_name_to_node_map.find(identifier_bind_part);
-
-    // if (it == alias_name_to_node_map.end())
-    //     return {};
-
-    auto it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME);
+    auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME);
     if (it == nullptr)
         return {};
 
@@ -2988,20 +2958,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
         }
 
         alias_node = lookup_result.resolved_identifier;
-
-        /** During collection of aliases if node is identifier and has alias, we cannot say if it is
-          * column or function node. Check QueryExpressionsAliasVisitor documentation for clarification.
-          *
-          * If we resolved identifier node as expression, we must remove identifier node alias from
-          * function alias map.
-          * If we resolved identifier node as function, we must remove identifier node alias from
-          * expression alias map.
-          */
-        // if (identifier_lookup.isExpressionLookup())
-        //     scope.alises.alias_name_to_lambda_node.erase(identifier_bind_part);
-        // else if (identifier_lookup.isFunctionLookup())
-        //     scope.aliases.alias_name_to_expression_node->erase(identifier_bind_part);
-
         scope.popExpressionNode();
     }
     else if (node_type == QueryTreeNodeType::FUNCTION)
@@ -4199,7 +4155,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
              */
 
             auto * alias_it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FULL_NAME);
-            //auto alias_it = scope.alias_name_to_expression_node->find(identifier_lookup.identifier.getFullName());
             if (alias_it && (*alias_it)->getNodeType() == QueryTreeNodeType::COLUMN)
             {
                 const auto & column_node = (*alias_it)->as<ColumnNode &>();
@@ -6395,17 +6350,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
                     result_projection_names.push_back(projection_name_it->second);
             }
 
-            // if (resolved_identifier_node && !node_alias.empty())
-            //     scope.alias_name_to_lambda_node.erase(node_alias);
-
             if (!resolved_identifier_node && allow_lambda_expression)
-            {
                 resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::FUNCTION}, scope).resolved_identifier;
 
-                // if (resolved_identifier_node && !node_alias.empty())
-                //     scope.alias_name_to_expression_node->erase(node_alias);
-            }
-
             if (!resolved_identifier_node && allow_table_expression)
             {
                 resolved_identifier_node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;

From dab090e629afd3730457599d84e147bb512a1e81 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 17:14:06 +0000
Subject: [PATCH 306/392] Cosmetics, pt. VII (includes a move of all
 snowflake-related functions in one document)

---
 .../functions/type-conversion-functions.md    | 140 ----------------
 .../sql-reference/functions/uuid-functions.md | 155 +++++++++++++++++-
 2 files changed, 149 insertions(+), 146 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index ea08ffa50e7..bab92ff1e67 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1979,143 +1979,3 @@ Result:
 │ 2,"good"                                  │
 └───────────────────────────────────────────┘
 ```
-
-## snowflakeToDateTime
-
-Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format.
-
-**Syntax**
-
-``` sql
-snowflakeToDateTime(value[, time_zone])
-```
-
-**Arguments**
-
-- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
-- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
-
-**Returned value**
-
-- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
-```
-
-Result:
-
-```response
-
-┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
-│                                              2021-08-15 10:57:56 │
-└──────────────────────────────────────────────────────────────────┘
-```
-
-## snowflakeToDateTime64
-
-Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format.
-
-**Syntax**
-
-``` sql
-snowflakeToDateTime64(value[, time_zone])
-```
-
-**Arguments**
-
-- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
-- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
-
-**Returned value**
-
-- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
-```
-
-Result:
-
-```response
-
-┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
-│                                            2021-08-15 10:58:19.841 │
-└────────────────────────────────────────────────────────────────────┘
-```
-
-## dateTimeToSnowflake
-
-Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
-
-**Syntax**
-
-``` sql
-dateTimeToSnowflake(value)
-```
-
-**Arguments**
-
-- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
-
-**Returned value**
-
-- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
-
-**Example**
-
-Query:
-
-``` sql
-WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
-```
-
-Result:
-
-```response
-┌─dateTimeToSnowflake(dt)─┐
-│     1426860702823350272 │
-└─────────────────────────┘
-```
-
-## dateTime64ToSnowflake
-
-Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
-
-**Syntax**
-
-``` sql
-dateTime64ToSnowflake(value)
-```
-
-**Arguments**
-
-- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
-
-**Returned value**
-
-- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
-
-**Example**
-
-Query:
-
-``` sql
-WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
-```
-
-Result:
-
-```response
-┌─dateTime64ToSnowflake(dt64)─┐
-│         1426860704886947840 │
-└─────────────────────────────┘
-```
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index 80d7215b9ef..7c264450ef0 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -674,7 +674,7 @@ Result:
 └──────────────────────────────────────────────────────────────────────────────────────┘
 ```
 
-## serverUUID()
+## serverUUID
 
 Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts.
 
@@ -692,9 +692,9 @@ Type: [UUID](../data-types/uuid.md).
 
 ## generateSnowflakeID
 
-Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231).
+Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
 
-Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
+The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
 For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
 In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
 
@@ -756,11 +756,14 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2);
 
 ## generateSnowflakeIDThreadMonotonic
 
-Generates a [Snowflake ID](https://github.com/twitter-archive/snowflake/tree/b3f6a3c6ca8e1b6847baa6ff42bf72201e2c2231).
+Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
 
-Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
+The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
+For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
+In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
 
-This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
+This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests.
+Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
 
 ```
  0                   1                   2                   3
@@ -816,6 +819,146 @@ SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic
 └───────────────────────────────────────┴───────────────────────────────────────┘
 ```
 
+## snowflakeToDateTime
+
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format.
+
+**Syntax**
+
+``` sql
+snowflakeToDateTime(value[, time_zone])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
+```
+
+Result:
+
+```response
+
+┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
+│                                              2021-08-15 10:57:56 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+## snowflakeToDateTime64
+
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format.
+
+**Syntax**
+
+``` sql
+snowflakeToDateTime64(value[, time_zone])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
+```
+
+Result:
+
+```response
+
+┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
+│                                            2021-08-15 10:58:19.841 │
+└────────────────────────────────────────────────────────────────────┘
+```
+
+## dateTimeToSnowflake
+
+Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTimeToSnowflake(value)
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
+
+**Returned value**
+
+- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+``` sql
+WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
+```
+
+Result:
+
+```response
+┌─dateTimeToSnowflake(dt)─┐
+│     1426860702823350272 │
+└─────────────────────────┘
+```
+
+## dateTime64ToSnowflake
+
+Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTime64ToSnowflake(value)
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
+
+**Returned value**
+
+- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+``` sql
+WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
+```
+
+Result:
+
+```response
+┌─dateTime64ToSnowflake(dt64)─┐
+│         1426860704886947840 │
+└─────────────────────────────┘
+```
+
 ## See also
 
 - [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other)

From 5d82a94615ef8a9fb7c39787d0e2b191641cbcb8 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 17:22:59 +0000
Subject: [PATCH 307/392] Revert generateSerialID

---
 src/Functions/generateSerialID.cpp            | 167 ------------------
 .../03129_serial_test_zookeeper.reference     |  13 --
 .../03129_serial_test_zookeeper.sql           |  12 --
 3 files changed, 192 deletions(-)
 delete mode 100644 src/Functions/generateSerialID.cpp
 delete mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.reference
 delete mode 100644 tests/queries/0_stateless/03129_serial_test_zookeeper.sql

diff --git a/src/Functions/generateSerialID.cpp b/src/Functions/generateSerialID.cpp
deleted file mode 100644
index db26d0d684b..00000000000
--- a/src/Functions/generateSerialID.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-#include "Common/Exception.h"
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Columns/ColumnVector.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Interpreters/Context.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int KEEPER_EXCEPTION;
-}
-
-constexpr auto function_node_name = "/serial_ids/";
-constexpr size_t MAX_SERIES_NUMBER = 1000; // ?
-
-class FunctionSerial : public IFunction
-{
-private:
-    mutable zkutil::ZooKeeperPtr zk;
-    ContextPtr context;
-
-public:
-    static constexpr auto name = "generateSerialID";
-
-    explicit FunctionSerial(ContextPtr context_) : context(context_)
-    {
-        if (context->hasZooKeeper()) {
-            zk = context->getZooKeeper();
-        }
-    }
-
-    static FunctionPtr create(ContextPtr context)
-    {
-        return std::make_shared<FunctionSerial>(std::move(context));
-    }
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isStateful() const override { return true; }
-    bool isDeterministic() const override { return false; }
-    bool isDeterministicInScopeOfQuery() const override { return false; }
-    bool isSuitableForConstantFolding() const override { return false; }
-    bool useDefaultImplementationForNulls() const override { return false; }
-    bool useDefaultImplementationForNothing() const override { return false; }
-    bool canBeExecutedOnDefaultArguments() const override { return false; }
-    bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; }
-    bool hasInformationAboutMonotonicity() const override { return true; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors mandatory_args{
-            {"series identifier", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isStringOrFixedString), nullptr, "String or FixedString"}
-        };
-        validateFunctionArgumentTypes(*this, arguments, mandatory_args);
-
-        return std::make_shared<DataTypeInt64>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
-    {
-        if (zk == nullptr)
-            throw Exception(ErrorCodes::KEEPER_EXCEPTION,
-            "ZooKeeper is not configured for function {}",
-            getName());
-        if (zk->expired())
-            zk = context->getZooKeeper();
-
-        // slow?
-        if (zk->exists(function_node_name) && zk->getChildren(function_node_name).size() == MAX_SERIES_NUMBER) {
-            throw Exception(ErrorCodes::KEEPER_EXCEPTION,
-            "At most {} serial nodes can be created",
-            MAX_SERIES_NUMBER);
-        }
-
-        auto col_res = ColumnVector<Int64>::create();
-        typename ColumnVector<Int64>::Container & vec_to = col_res->getData();
-
-        vec_to.resize(input_rows_count);
-
-        const auto & serial_path = function_node_name + arguments[0].column->getDataAt(0).toString();
-
-        /// CAS in ZooKeeper
-        /// `get` value and version, `trySet` new with version check
-        /// I didn't get how to do it with `multi`
-
-        Int64 counter;
-        std::string counter_path = serial_path + "/counter";
-
-        // if serial name used first time
-        zk->createAncestors(counter_path);
-        zk->createIfNotExists(counter_path, "1");
-
-        Coordination::Stat stat;
-        while (true)
-        {
-            const String counter_string = zk->get(counter_path, &stat);
-            counter = std::stoll(counter_string);
-            String updated_counter = std::to_string(counter + input_rows_count);
-            const Coordination::Error err = zk->trySet(counter_path, updated_counter);
-            if (err == Coordination::Error::ZOK)
-            {
-                // CAS is done
-                break;
-            }
-            if (err != Coordination::Error::ZBADVERSION)
-            {
-                throw Exception(ErrorCodes::KEEPER_EXCEPTION,
-                "ZooKeeper trySet operation failed with unexpected error = {} in function {}",
-                err, getName());
-            }
-        }
-
-        // Make a result
-        for (auto & val : vec_to)
-        {
-            val = counter;
-            ++counter;
-        }
-
-        return col_res;
-    }
-
-};
-
-REGISTER_FUNCTION(Serial)
-{
-    factory.registerFunction<FunctionSerial>(FunctionDocumentation
-    {
-        .description=R"(
-Generates and returns sequential numbers starting from the previous counter value.
-This function takes a constant string argument - a series identifier.
-The server should be configured with a ZooKeeper.
-)",
-        .syntax = "generateSerialID(identifier)",
-        .arguments{
-            {"series identifier", "Series identifier (String or FixedString)"}
-        },
-        .returned_value = "Sequential numbers of type Int64 starting from the previous counter value",
-        .examples{
-            {"first call", "SELECT generateSerialID('id1')", R"(
-┌─generateSerialID('id1')──┐
-│                        1 │
-└──────────────────────────┘)"},
-            {"second call", "SELECT generateSerialID('id1')", R"(
-┌─generateSerialID('id1')──┐
-│                        2 │
-└──────────────────────────┘)"},
-            {"column call", "SELECT *, generateSerialID('id1') FROM test_table", R"(
-┌─CounterID─┬─UserID─┬─ver─┬─generateSerialID('id1')──┐
-│         1 │      3 │   3 │                        3 │
-│         1 │      1 │   1 │                        4 │
-│         1 │      2 │   2 │                        5 │
-│         1 │      5 │   5 │                        6 │
-│         1 │      4 │   4 │                        7 │
-└───────────┴────────┴─────┴──────────────────────────┘
-                  )"}},
-        .categories{"Unique identifiers"}
-    });
-}
-
-}
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference b/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
deleted file mode 100644
index 479030db4be..00000000000
--- a/tests/queries/0_stateless/03129_serial_test_zookeeper.reference
+++ /dev/null
@@ -1,13 +0,0 @@
-1
-2
-1
-3
-4
-5
-6
-7
-1	1
-2	2
-3	3
-4	4
-5	5
diff --git a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql b/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
deleted file mode 100644
index 2bd60656259..00000000000
--- a/tests/queries/0_stateless/03129_serial_test_zookeeper.sql
+++ /dev/null
@@ -1,12 +0,0 @@
--- Tags: zookeeper
-
-SELECT generateSerialID('x');
-SELECT generateSerialID('x');
-SELECT generateSerialID('y');
-SELECT generateSerialID('x') FROM numbers(5);
-
-SELECT generateSerialID(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT generateSerialID('x', 'y'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT generateSerialID(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-
-SELECT generateSerialID('z'), generateSerialID('z') FROM numbers(5);

From 12f60a4969acda49422aef5d5d6fc431a71109f7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 18:00:53 +0000
Subject: [PATCH 308/392] Cosmetics, pt. VIII

---
 src/Functions/generateSnowflakeID.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 4e61bd9fb1c..617693f017c 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -42,6 +42,13 @@ constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) -
 /// max values
 constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
 
+uint64_t getTimestamp()
+{
+    auto now = std::chrono::system_clock::now();
+    auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+    return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
+}
+
 uint64_t getMachineId()
 {
     UUID server_uuid = ServerUUID::get();
@@ -52,31 +59,24 @@ uint64_t getMachineId()
     return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
 }
 
-uint64_t getTimestamp()
-{
-    auto now = std::chrono::system_clock::now();
-    auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
-    return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
-}
-
 struct SnowflakeId
 {
     uint64_t timestamp;
-    uint64_t machind_id;
+    uint64_t machine_id;
     uint64_t machine_seq_num;
 };
 
 SnowflakeId toSnowflakeId(uint64_t snowflake)
 {
     return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
-            .machind_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
+            .machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
             .machine_seq_num = (snowflake & machine_seq_num_mask)};
 }
 
 uint64_t fromSnowflakeId(SnowflakeId components)
 {
     return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
-            components.machind_id << (machine_seq_num_bits_count) |
+            components.machine_id << (machine_seq_num_bits_count) |
             components.machine_seq_num);
 }
 
@@ -93,7 +93,7 @@ struct SnowflakeIdRange
 SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
 {
     /// 1. `now`
-    SnowflakeId begin = {.timestamp = getTimestamp(), .machind_id = getMachineId(), .machine_seq_num = 0};
+    SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0};
 
     /// 2. `begin`
     if (begin.timestamp <= available.timestamp)
@@ -111,7 +111,7 @@ SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t in
     else
         end.timestamp = begin.timestamp;
 
-    end.machind_id = begin.machind_id;
+    end.machine_id = begin.machine_id;
     end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
 
     return {begin, end};

From ae8ceaa35e0cb6804774881e05bccf07ab23aa19 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 18:38:30 +0000
Subject: [PATCH 309/392] Cosmetics, pt. IX and cached machineId computation

---
 src/Functions/generateSnowflakeID.cpp         | 25 +++++++++++++------
 .../03130_generateSnowflakeId.reference       |  4 +--
 .../0_stateless/03130_generateSnowflakeId.sql | 14 ++++++-----
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
index 617693f017c..c3f7701a05a 100644
--- a/src/Functions/generateSnowflakeID.cpp
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -49,7 +49,7 @@ uint64_t getTimestamp()
     return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
 }
 
-uint64_t getMachineId()
+uint64_t getMachineIdImpl()
 {
     UUID server_uuid = ServerUUID::get();
     /// hash into 64 bits
@@ -59,6 +59,12 @@ uint64_t getMachineId()
     return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
 }
 
+uint64_t getMachineId()
+{
+    static uint64_t machine_id = getMachineIdImpl();
+    return machine_id;
+}
+
 struct SnowflakeId
 {
     uint64_t timestamp;
@@ -106,7 +112,7 @@ SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t in
     SnowflakeId end;
     const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
     if (input_rows_count >= seq_nums_in_current_timestamp_left)
-        /// if sequence numbers in current timestamp is not enough for rows => update timestamp
+        /// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick
         end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
     else
         end.timestamp = begin.timestamp;
@@ -136,8 +142,8 @@ struct GlobalCounterPolicy
                 range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
             }
             while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
-            /// if `compare_exhange` failed    => another thread updated `lowest_available_snowflake_id` and we should try again
-            ///                      completed => range of IDs [begin, end) is reserved, can return the beginning of the range
+            /// if CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
+            ///          else --> our thread reserved ID range [begin, end) and return the beginning of the range
 
             return range.begin;
         }
@@ -200,18 +206,21 @@ public:
             vec_to.resize(input_rows_count);
 
             typename FillPolicy::Data data;
-
-            /// get the begin of available snowflake ids range
-            SnowflakeId snowflake_id = data.reserveRange(input_rows_count);
+            SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range
 
             for (UInt64 & to_row : vec_to)
             {
                 to_row = fromSnowflakeId(snowflake_id);
-                if (snowflake_id.machine_seq_num++ == max_machine_seq_num)
+                if (snowflake_id.machine_seq_num == max_machine_seq_num)
                 {
+                    /// handle overflow
                     snowflake_id.machine_seq_num = 0;
                     ++snowflake_id.timestamp;
                 }
+                else
+                {
+                    ++snowflake_id.machine_seq_num;
+                }
             }
         }
 
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
index 8cdced96770..6ec0cafab16 100644
--- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
@@ -1,11 +1,11 @@
--- generateSnowflakeID --
+-- generateSnowflakeID
 1
 1
 0
 0
 1
 100
--- generateSnowflakeIDThreadMonotonic --
+-- generateSnowflakeIDThreadMonotonic
 1
 1
 100
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
index 3e994149d2b..903be5b786c 100644
--- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
@@ -1,10 +1,11 @@
-SELECT '-- generateSnowflakeID --';
+SELECT '-- generateSnowflakeID';
+
 SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero
 SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero
 
-SELECT generateSnowflakeID(1) = generateSnowflakeID(2);
-SELECT generateSnowflakeID() = generateSnowflakeID(1);
-SELECT generateSnowflakeID(1) = generateSnowflakeID(1);
+SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs
+SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^
+SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination
 
 SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
 
@@ -15,7 +16,8 @@ FROM
     FROM numbers(100)
 );
 
-SELECT '-- generateSnowflakeIDThreadMonotonic --';
+SELECT '-- generateSnowflakeIDThreadMonotonic';
+
 SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero
 SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero
 
@@ -26,4 +28,4 @@ FROM
 (
     SELECT DISTINCT generateSnowflakeIDThreadMonotonic()
     FROM numbers(100)
-);
\ No newline at end of file
+);

From 0383fa5164cb07fdec7c5fc036137122545acd6a Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 23 May 2024 18:30:49 +0000
Subject: [PATCH 310/392] do not convert sparse columns to full on vertical
 merge

---
 src/Columns/ColumnSparse.cpp                  |  1 -
 .../Algorithms/AggregatingSortedAlgorithm.cpp |  5 ++-
 .../FinishAggregatingInOrderAlgorithm.cpp     |  2 ++
 .../Merges/Algorithms/IMergingAlgorithm.h     | 13 +++++++-
 .../IMergingAlgorithmWithSharedChunks.cpp     | 15 ++-------
 .../Algorithms/MergingSortedAlgorithm.cpp     |  5 ++-
 .../Algorithms/SummingSortedAlgorithm.cpp     |  5 ++-
 .../Transforms/ColumnGathererTransform.cpp    | 31 ++++++++++++++-----
 .../Transforms/ColumnGathererTransform.h      |  8 +++--
 src/Storages/MergeTree/MergeTask.cpp          |  6 ++--
 10 files changed, 54 insertions(+), 37 deletions(-)

diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 49947be312d..2e75a2fd4ab 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -8,7 +8,6 @@
 #include <Common/SipHash.h>
 #include <Common/WeakHash.h>
 #include <Common/iota.h>
-#include <Processors/Transforms/ColumnGathererTransform.h>
 
 #include <algorithm>
 #include <bit>
diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
index 857f5040b79..a77bb0dabfc 100644
--- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp
@@ -76,9 +76,6 @@ static void preprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Col
     auto num_rows = chunk.getNumRows();
     auto columns = chunk.detachColumns();
 
-    for (auto & column : columns)
-        column = column->convertToFullColumnIfConst();
-
     for (const auto & desc : def.columns_to_simple_aggregate)
         if (desc.nested_type)
             columns[desc.column_number] = recursiveRemoveLowCardinality(columns[desc.column_number]);
@@ -266,6 +263,7 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm(
 
 void AggregatingSortedAlgorithm::initialize(Inputs inputs)
 {
+    removeConstAndSparse(inputs);
     merged_data.initialize(header, inputs);
 
     for (auto & input : inputs)
@@ -277,6 +275,7 @@ void AggregatingSortedAlgorithm::initialize(Inputs inputs)
 
 void AggregatingSortedAlgorithm::consume(Input & input, size_t source_num)
 {
+    removeConstAndSparse(input);
     preprocessChunk(input.chunk, columns_definition);
     updateCursor(input, source_num);
 }
diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
index a5befca7233..466adf93538 100644
--- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp
@@ -40,6 +40,7 @@ FinishAggregatingInOrderAlgorithm::FinishAggregatingInOrderAlgorithm(
 
 void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs)
 {
+    removeConstAndSparse(inputs);
     current_inputs = std::move(inputs);
     states.resize(num_inputs);
     for (size_t i = 0; i < num_inputs; ++i)
@@ -48,6 +49,7 @@ void FinishAggregatingInOrderAlgorithm::initialize(Inputs inputs)
 
 void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num)
 {
+    removeConstAndSparse(input);
     if (!input.chunk.hasRows())
         return;
 
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
index 6e352c3f104..9a1c7c24270 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h
@@ -39,7 +39,6 @@ public:
 
         void set(Chunk chunk_)
         {
-            convertToFullIfSparse(chunk_);
             chunk = std::move(chunk_);
             skip_last_row = false;
         }
@@ -47,6 +46,18 @@ public:
 
     using Inputs = std::vector<Input>;
 
+    static void removeConstAndSparse(Input & input)
+    {
+        convertToFullIfConst(input.chunk);
+        convertToFullIfSparse(input.chunk);
+    }
+
+    static void removeConstAndSparse(Inputs & inputs)
+    {
+        for (auto & input : inputs)
+            removeConstAndSparse(input);
+    }
+
     virtual const char * getName() const = 0;
     virtual void initialize(Inputs inputs) = 0;
     virtual void consume(Input & input, size_t source_num) = 0;
diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
index fe5186736b5..47b7ddf38dc 100644
--- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
+++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp
@@ -17,18 +17,9 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks(
 {
 }
 
-static void prepareChunk(Chunk & chunk)
-{
-    auto num_rows = chunk.getNumRows();
-    auto columns = chunk.detachColumns();
-    for (auto & column : columns)
-        column = column->convertToFullColumnIfConst();
-
-    chunk.setColumns(std::move(columns), num_rows);
-}
-
 void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
 {
+    removeConstAndSparse(inputs);
     merged_data->initialize(header, inputs);
 
     for (size_t source_num = 0; source_num < inputs.size(); ++source_num)
@@ -36,8 +27,6 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
         if (!inputs[source_num].chunk)
             continue;
 
-        prepareChunk(inputs[source_num].chunk);
-
         auto & source = sources[source_num];
 
         source.skip_last_row = inputs[source_num].skip_last_row;
@@ -55,7 +44,7 @@ void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs)
 
 void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num)
 {
-    prepareChunk(input.chunk);
+    removeConstAndSparse(input);
 
     auto & source = sources[source_num];
     source.skip_last_row = input.skip_last_row;
diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
index d17a4d859ee..3a9cf7ee141 100644
--- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp
@@ -49,17 +49,16 @@ void MergingSortedAlgorithm::addInput()
 
 void MergingSortedAlgorithm::initialize(Inputs inputs)
 {
+    removeConstAndSparse(inputs);
     merged_data.initialize(header, inputs);
     current_inputs = std::move(inputs);
 
     for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num)
     {
         auto & chunk = current_inputs[source_num].chunk;
-
         if (!chunk)
             continue;
 
-        convertToFullIfConst(chunk);
         cursors[source_num] = SortCursorImpl(header, chunk.getColumns(), description, source_num);
     }
 
@@ -83,7 +82,7 @@ void MergingSortedAlgorithm::initialize(Inputs inputs)
 
 void MergingSortedAlgorithm::consume(Input & input, size_t source_num)
 {
-    convertToFullIfConst(input.chunk);
+    removeConstAndSparse(input);
     current_inputs[source_num].swap(input);
     cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), header);
 
diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
index 7329821cf97..e2c6371c44f 100644
--- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp
@@ -387,9 +387,6 @@ static void preprocessChunk(Chunk & chunk, const SummingSortedAlgorithm::Columns
     auto num_rows = chunk.getNumRows();
     auto columns = chunk.detachColumns();
 
-    for (auto & column : columns)
-        column = column->convertToFullColumnIfConst();
-
     for (const auto & desc : def.columns_to_aggregate)
     {
         if (desc.nested_type)
@@ -704,6 +701,7 @@ SummingSortedAlgorithm::SummingSortedAlgorithm(
 
 void SummingSortedAlgorithm::initialize(Inputs inputs)
 {
+    removeConstAndSparse(inputs);
     merged_data.initialize(header, inputs);
 
     for (auto & input : inputs)
@@ -715,6 +713,7 @@ void SummingSortedAlgorithm::initialize(Inputs inputs)
 
 void SummingSortedAlgorithm::consume(Input & input, size_t source_num)
 {
+    removeConstAndSparse(input);
     preprocessChunk(input.chunk, columns_definition);
     updateCursor(input, source_num);
 }
diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp
index b6bcec26c0c..15f8355bdc7 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.cpp
+++ b/src/Processors/Transforms/ColumnGathererTransform.cpp
@@ -2,6 +2,7 @@
 #include <Common/logger_useful.h>
 #include <Common/typeid_cast.h>
 #include <Common/formatReadable.h>
+#include <Columns/ColumnSparse.h>
 #include <IO/WriteHelpers.h>
 #include <iomanip>
 
@@ -20,11 +21,13 @@ ColumnGathererStream::ColumnGathererStream(
     size_t num_inputs,
     ReadBuffer & row_sources_buf_,
     size_t block_preferred_size_rows_,
-    size_t block_preferred_size_bytes_)
+    size_t block_preferred_size_bytes_,
+    bool is_result_sparse_)
     : sources(num_inputs)
     , row_sources_buf(row_sources_buf_)
     , block_preferred_size_rows(block_preferred_size_rows_)
     , block_preferred_size_bytes(block_preferred_size_bytes_)
+    , is_result_sparse(is_result_sparse_)
 {
     if (num_inputs == 0)
         throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather");
@@ -36,17 +39,23 @@ void ColumnGathererStream::initialize(Inputs inputs)
     source_columns.reserve(inputs.size());
     for (size_t i = 0; i < inputs.size(); ++i)
     {
-        if (inputs[i].chunk)
-        {
-            sources[i].update(inputs[i].chunk.detachColumns().at(0));
-            source_columns.push_back(sources[i].column);
-        }
+        if (!inputs[i].chunk)
+            continue;
+
+        if (!is_result_sparse)
+            convertToFullIfSparse(inputs[i].chunk);
+
+        sources[i].update(inputs[i].chunk.detachColumns().at(0));
+        source_columns.push_back(sources[i].column);
     }
 
     if (source_columns.empty())
         return;
 
     result_column = source_columns[0]->cloneEmpty();
+    if (is_result_sparse && !result_column->isSparse())
+        result_column = ColumnSparse::create(std::move(result_column));
+
     if (result_column->hasDynamicStructure())
         result_column->takeDynamicStructureFromSourceColumns(source_columns);
 }
@@ -146,7 +155,12 @@ void ColumnGathererStream::consume(Input & input, size_t source_num)
 {
     auto & source = sources[source_num];
     if (input.chunk)
+    {
+        if (!is_result_sparse)
+            convertToFullIfSparse(input.chunk);
+
         source.update(input.chunk.getColumns().at(0));
+    }
 
     if (0 == source.size)
     {
@@ -159,10 +173,11 @@ ColumnGathererTransform::ColumnGathererTransform(
     size_t num_inputs,
     ReadBuffer & row_sources_buf_,
     size_t block_preferred_size_rows_,
-    size_t block_preferred_size_bytes_)
+    size_t block_preferred_size_bytes_,
+    bool is_result_sparse_)
     : IMergingTransform<ColumnGathererStream>(
         num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false,
-        num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_)
+        num_inputs, row_sources_buf_, block_preferred_size_rows_, block_preferred_size_bytes_, is_result_sparse_)
     , log(getLogger("ColumnGathererStream"))
 {
     if (header.columns() != 1)
diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h
index 4e56cffa46a..ec5691316ce 100644
--- a/src/Processors/Transforms/ColumnGathererTransform.h
+++ b/src/Processors/Transforms/ColumnGathererTransform.h
@@ -60,7 +60,8 @@ public:
         size_t num_inputs,
         ReadBuffer & row_sources_buf_,
         size_t block_preferred_size_rows_,
-        size_t block_preferred_size_bytes_);
+        size_t block_preferred_size_bytes_,
+        bool is_result_sparse_);
 
     const char * getName() const override { return "ColumnGathererStream"; }
     void initialize(Inputs inputs) override;
@@ -97,6 +98,7 @@ private:
 
     const size_t block_preferred_size_rows;
     const size_t block_preferred_size_bytes;
+    const bool is_result_sparse;
 
     Source * source_to_fully_copy = nullptr;
 
@@ -113,7 +115,8 @@ public:
         size_t num_inputs,
         ReadBuffer & row_sources_buf_,
         size_t block_preferred_size_rows_,
-        size_t block_preferred_size_bytes_);
+        size_t block_preferred_size_bytes_,
+        bool is_result_sparse_);
 
     String getName() const override { return "ColumnGathererTransform"; }
 
@@ -145,7 +148,6 @@ void ColumnGathererStream::gather(Column & column_res)
 
     next_required_source = -1;
 
-
     /// We use do ... while here to ensure there will be at least one iteration of this loop.
     /// Because the column_res.byteSize() could be bigger than block_preferred_size_bytes already at this point.
     do
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index a9109832521..888042454a9 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -596,8 +596,9 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
         pipes.emplace_back(std::move(pipe));
     }
 
-    auto pipe = Pipe::unitePipes(std::move(pipes));
+    bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE;
 
+    auto pipe = Pipe::unitePipes(std::move(pipes));
     ctx->rows_sources_read_buf->seek(0, 0);
 
     const auto data_settings = global_ctx->data->getSettings();
@@ -606,7 +607,8 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
         pipe.numOutputPorts(),
         *ctx->rows_sources_read_buf,
         data_settings->merge_max_block_size,
-        data_settings->merge_max_block_size_bytes);
+        data_settings->merge_max_block_size_bytes,
+        is_result_sparse);
 
     pipe.addTransform(std::move(transform));
 

From 40753ddefb0324d50bb8d455615da74828c7be76 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Thu, 23 May 2024 21:10:40 +0200
Subject: [PATCH 311/392] Update hdfs test

---
 tests/integration/test_storage_hdfs/test.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py
index 6ee12a87ebf..eeffa8ed00b 100644
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@@ -326,7 +326,7 @@ def test_virtual_columns(started_cluster):
     hdfs_api.write_data("/file1", "1\n")
     hdfs_api.write_data("/file2", "2\n")
     hdfs_api.write_data("/file3", "3\n")
-    expected = "1\tfile1\t/file1\n2\tfile2\t/file2\n3\tfile3\t/file3\n"
+    expected = "1\tfile1\tfile1\n2\tfile2\tfile2\n3\tfile3\tfile3\n"
     assert (
         node1.query(
             "select id, _file as file_name, _path as file_path from virtual_cols order by id"
@@ -493,13 +493,13 @@ def test_hdfsCluster(started_cluster):
     actual = node1.query(
         "select id, _file as file_name, _path as file_path from hdfs('hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id"
     )
-    expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n"
+    expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n"
     assert actual == expected
 
     actual = node1.query(
         "select id, _file as file_name, _path as file_path from hdfsCluster('test_cluster_two_shards', 'hdfs://hdfs1:9000/test_hdfsCluster/file*', 'TSV', 'id UInt32') order by id"
     )
-    expected = "1\tfile1\t/test_hdfsCluster/file1\n2\tfile2\t/test_hdfsCluster/file2\n3\tfile3\t/test_hdfsCluster/file3\n"
+    expected = "1\tfile1\ttest_hdfsCluster/file1\n2\tfile2\ttest_hdfsCluster/file2\n3\tfile3\ttest_hdfsCluster/file3\n"
     assert actual == expected
     fs.delete(dir, recursive=True)
 
@@ -665,7 +665,7 @@ def test_virtual_columns_2(started_cluster):
     node1.query(f"insert into table function {table_function} SELECT 1, 'kek'")
 
     result = node1.query(f"SELECT _path FROM {table_function}")
-    assert result.strip() == "/parquet_2"
+    assert result.strip() == "parquet_2"
 
     table_function = (
         f"hdfs('hdfs://hdfs1:9000/parquet_3', 'Parquet', 'a Int32, _path String')"
@@ -978,25 +978,25 @@ def test_read_subcolumns(started_cluster):
         f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert res == "2\t/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
+    assert res == "2\ttest_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n"
 
     res = node.query(
         f"select a.b.d, _path, a.b, _file, a.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert res == "2\t/test_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
+    assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n"
 
     res = node.query(
         f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')"
     )
 
-    assert res == "0\t/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
+    assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n"
 
     res = node.query(
         f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')"
     )
 
-    assert res == "42\t/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
+    assert res == "42\ttest_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n"
 
 
 def test_union_schema_inference_mode(started_cluster):

From bd15e1311a949753a234cfed9571600af78eb906 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Thu, 23 May 2024 22:35:21 +0200
Subject: [PATCH 312/392] CI: fix

---
 tests/ci/ci.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 68db08fbe96..4afd3f46f9d 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1917,7 +1917,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No
             print(
                 f"Canceling PR workflow run_id: [{wf_data.run_id}], pr: [{pr_number}]"
             )
-            GitHub.cancel_wf(GITHUB_REPOSITORY, get_best_robot_token(), wf_data.run_id)
+            GitHub.cancel_wf(GITHUB_REPOSITORY, wf_data.run_id, get_best_robot_token())
     else:
         if not wf_data.sync_pr_run_id:
             print("WARNING: Sync PR run id has not been found")
@@ -1925,8 +1925,8 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No
             print(f"Canceling sync PR workflow run_id: [{wf_data.sync_pr_run_id}]")
             GitHub.cancel_wf(
                 "ClickHouse/clickhouse-private",
-                get_best_robot_token(),
                 wf_data.sync_pr_run_id,
+                get_best_robot_token(),
             )
 
 
From dac31fb92a80982ec0a98472485fa02c4b917c07 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Tue, 21 May 2024 17:29:00 +0000
Subject: [PATCH 313/392] Include settings into query cache key

---
 src/Interpreters/Cache/QueryCache.cpp         | 37 ++++++++--
 src/Interpreters/Cache/QueryCache.h           |  5 +-
 src/Interpreters/executeQuery.cpp             |  4 +-
 .../02494_query_cache_key.reference           |  6 ++
 .../0_stateless/02494_query_cache_key.sql     | 70 +++++++++++++++++++
 .../02494_query_cache_use_database.reference  |  2 -
 .../02494_query_cache_use_database.sql        | 30 --------
 7 files changed, 113 insertions(+), 41 deletions(-)
 create mode 100644 tests/queries/0_stateless/02494_query_cache_key.reference
 create mode 100644 tests/queries/0_stateless/02494_query_cache_key.sql
 delete mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.reference
 delete mode 100644 tests/queries/0_stateless/02494_query_cache_use_database.sql

diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 4b10bfd3dcd..a3fe8c2e779 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
 namespace
 {
 
+bool isQueryCacheRelatedSetting(const String & setting_name)
+{
+    return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
+}
+
 class RemoveQueryCacheSettingsMatcher
 {
 public:
@@ -141,7 +146,7 @@ public:
 
             auto is_query_cache_related_setting = [](const auto & change)
             {
-                return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache");
+                return isQueryCacheRelatedSetting(change.name);
             };
 
             std::erase_if(set_clause->changes, is_query_cache_related_setting);
@@ -177,11 +182,11 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
     return transformed_ast;
 }
 
-IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
+IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings)
 {
     ast = removeQueryCacheSettings(ast);
 
-    /// Hash the AST, it must consider aliases (issue #56258)
+    /// Hash the AST, we must consider aliases (issue #56258)
     SipHash hash;
     ast->updateTreeHash(hash, /*ignore_aliases=*/ false);
 
@@ -189,6 +194,25 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
     /// tables (issue #64136)
     hash.update(current_database);
 
+    /// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`).
+    /// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order.
+    ///       Therefore, collect and sort the settings first, then hash them.
+    Settings::Range changed_settings = settings.allChanged();
+    std::vector<std::pair<String, String>> changed_settings_sorted; /// (name, value)
+    for (const auto & setting : changed_settings)
+    {
+        const String & name = setting.getName();
+        const String & value = setting.getValueString();
+        if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea
+            changed_settings_sorted.push_back({name, value});
+    }
+    std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; });
+    for (const auto & setting : changed_settings_sorted)
+    {
+        hash.update(setting.first);
+        hash.update(setting.second);
+    }
+
     return getSipHash128AsPair(hash);
 }
 
@@ -204,12 +228,13 @@ String queryStringFromAST(ASTPtr ast)
 QueryCache::Key::Key(
     ASTPtr ast_,
     const String & current_database,
+    const Settings & settings,
     Block header_,
     std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
     bool is_shared_,
     std::chrono::time_point<std::chrono::system_clock> expires_at_,
     bool is_compressed_)
-    : ast_hash(calculateAstHash(ast_, current_database))
+    : ast_hash(calculateAstHash(ast_, current_database, settings))
     , header(header_)
     , user_id(user_id_)
     , current_user_roles(current_user_roles_)
@@ -220,8 +245,8 @@ QueryCache::Key::Key(
 {
 }
 
-QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
-    : QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
+QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
+    : QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
 {
 }
 
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index b5b6f477137..461197cac32 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -14,6 +14,8 @@
 namespace DB
 {
 
+struct Settings;
+
 /// Does AST contain non-deterministic functions like rand() and now()?
 bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
 
@@ -89,6 +91,7 @@ public:
         /// Ctor to construct a Key for writing into query cache.
         Key(ASTPtr ast_,
             const String & current_database,
+            const Settings & settings,
             Block header_,
             std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
             bool is_shared_,
@@ -96,7 +99,7 @@ public:
             bool is_compressed);
 
         /// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
-        Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
+        Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
 
         bool operator==(const Key & other) const;
     };
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 56f08dbb902..0b5f68f27f6 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1101,7 +1101,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getCurrentDatabase(), context->getUserID(), context->getCurrentRoles());
+                    QueryCache::Key key(ast, context->getCurrentDatabase(), settings, context->getUserID(), context->getCurrentRoles());
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1224,7 +1224,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                             && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
                         {
                             QueryCache::Key key(
-                                ast, context->getCurrentDatabase(), res.pipeline.getHeader(),
+                                ast, context->getCurrentDatabase(), settings, res.pipeline.getHeader(),
                                 context->getUserID(), context->getCurrentRoles(),
                                 settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),
diff --git a/tests/queries/0_stateless/02494_query_cache_key.reference b/tests/queries/0_stateless/02494_query_cache_key.reference
new file mode 100644
index 00000000000..8f5b61192d5
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_key.reference
@@ -0,0 +1,6 @@
+Test (1)
+1
+2
+Test (2)
+4
+4
diff --git a/tests/queries/0_stateless/02494_query_cache_key.sql b/tests/queries/0_stateless/02494_query_cache_key.sql
new file mode 100644
index 00000000000..d8c68e0d267
--- /dev/null
+++ b/tests/queries/0_stateless/02494_query_cache_key.sql
@@ -0,0 +1,70 @@
+-- Tags: no-parallel
+-- Tag no-parallel: Messes with internal cache
+
+-- Tests that the key of the query cache is not only formed by the query AST but also by
+-- (1) the current database (`USE db`, issue #64136),
+-- (2) the query settings
+
+
+SELECT 'Test (1)';
+
+SYSTEM DROP QUERY CACHE;
+
+DROP DATABASE IF EXISTS db1;
+DROP DATABASE IF EXISTS db2;
+
+CREATE DATABASE db1;
+CREATE DATABASE db2;
+
+CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a);
+CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a);
+
+INSERT INTO db1.tab values(1);
+INSERT INTO db2.tab values(2);
+
+USE db1;
+SELECT * FROM tab SETTINGS use_query_cache=1;
+
+USE db2;
+SELECT * FROM tab SETTINGS use_query_cache=1;
+
+DROP DATABASE db1;
+DROP DATABASE db2;
+
+SYSTEM DROP QUERY CACHE;
+
+
+SELECT 'Test (2)';
+
+-- test with query-level settings
+SELECT 1 SETTINGS use_query_cache = 1, limit = 1, use_skip_indexes = 0 Format Null;
+SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 0 Format Null;
+SELECT 1 SETTINGS use_query_cache = 1, use_skip_indexes = 1 Format Null;
+SELECT 1 SETTINGS use_query_cache = 1, max_block_size = 1 Format Null;
+
+-- 4x the same query but with different settings each. There should yield four entries in the query cache.
+SELECT count(query) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
+-- test with mixed session-level/query-level settings
+SET use_query_cache = 1;
+SET limit = 1;
+SELECT 1 SETTINGS use_skip_indexes = 0 Format Null;
+SET limit = default;
+SET use_skip_indexes = 0;
+SELECT 1 Format Null;
+SET use_skip_indexes = 1;
+SELECT 1 SETTINGS use_skip_indexes = 1 Format Null;
+SET use_skip_indexes = default;
+SET max_block_size = 1;
+SELECT 1 Format Null;
+SET max_block_size = default;
+
+SET use_query_cache = default;
+
+-- 4x the same query but with different settings each. There should yield four entries in the query cache.
+SELECT count(query) FROM system.query_cache;
+
+SYSTEM DROP QUERY CACHE;
+
diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.reference b/tests/queries/0_stateless/02494_query_cache_use_database.reference
deleted file mode 100644
index 1191247b6d9..00000000000
--- a/tests/queries/0_stateless/02494_query_cache_use_database.reference
+++ /dev/null
@@ -1,2 +0,0 @@
-1
-2
diff --git a/tests/queries/0_stateless/02494_query_cache_use_database.sql b/tests/queries/0_stateless/02494_query_cache_use_database.sql
deleted file mode 100644
index df560f82ebb..00000000000
--- a/tests/queries/0_stateless/02494_query_cache_use_database.sql
+++ /dev/null
@@ -1,30 +0,0 @@
--- Tags: no-parallel, no-fasttest
--- Tag no-fasttest: Depends on OpenSSL
--- Tag no-parallel: Messes with internal cache
-
--- Test for issue #64136
-
-SYSTEM DROP QUERY CACHE;
-
-DROP DATABASE IF EXISTS db1;
-DROP DATABASE IF EXISTS db2;
-
-CREATE DATABASE db1;
-CREATE DATABASE db2;
-
-CREATE TABLE db1.tab(a UInt64, PRIMARY KEY a);
-CREATE TABLE db2.tab(a UInt64, PRIMARY KEY a);
-
-INSERT INTO db1.tab values(1);
-INSERT INTO db2.tab values(2);
-
-USE db1;
-SELECT * FROM tab SETTINGS use_query_cache=1;
-
-USE db2;
-SELECT * FROM tab SETTINGS use_query_cache=1;
-
-DROP DATABASE db1;
-DROP DATABASE db2;
-
-SYSTEM DROP QUERY CACHE;

From 6e6e2944b56245cd5eefd14deb7dba7b8459b935 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Thu, 23 May 2024 21:26:33 +0000
Subject: [PATCH 314/392] Fix glitch in #62696

---
 src/Functions/FunctionHelpers.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/FunctionHelpers.cpp b/src/Functions/FunctionHelpers.cpp
index 3b057779ffe..d85bb0e7060 100644
--- a/src/Functions/FunctionHelpers.cpp
+++ b/src/Functions/FunctionHelpers.cpp
@@ -21,8 +21,6 @@ namespace ErrorCodes
 
 const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
 {
-    if (!column)
-        return {};
     if (!isColumnConst(*column))
         return {};
 

From 5710b5852f9e067fbcd8809196c9c403a8de43dc Mon Sep 17 00:00:00 2001
From: Nataly Merezhuk <nataly.merezhuk@clickhouse.com>
Date: Thu, 23 May 2024 17:45:58 -0400
Subject: [PATCH 315/392] Adds note - file engine unavailable in ClickHouse
 Cloud.

---
 docs/en/engines/table-engines/special/file.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md
index fdf5242ba3b..0d422f64762 100644
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@@ -14,6 +14,10 @@ Usage scenarios:
 - Convert data from one format to another.
 - Updating data in ClickHouse via editing a file on a disk.
 
+:::note
+This engine is not currently available in ClickHouse Cloud, please [use the S3 table function instead](/docs/en/sql-reference/table-functions/s3.md).
+:::
+
 ## Usage in ClickHouse Server {#usage-in-clickhouse-server}
 
 ``` sql

From 251010f109a538c770f830bc254e031924486c46 Mon Sep 17 00:00:00 2001
From: TTPO100AJIEX <ttpo100ajiex@mail.ru>
Date: Fri, 24 May 2024 02:14:26 +0300
Subject: [PATCH 316/392] Move protocol-server and inter-server management into
 separate classes

Co-authored-by: Alex Koledaev <ax3l3rator@gmail.com>
---
 programs/server/Server.cpp                    | 987 +-----------------
 programs/server/Server.h                      |  95 +-
 src/CMakeLists.txt                            |   1 +
 src/Server/ServersManager/IServersManager.cpp | 268 +++++
 src/Server/ServersManager/IServersManager.h   |  74 ++
 .../ServersManager/InterServersManager.cpp    | 327 ++++++
 .../ServersManager/InterServersManager.h      |  45 +
 .../ServersManager/ProtocolServersManager.cpp | 523 ++++++++++
 .../ServersManager/ProtocolServersManager.h   |  37 +
 9 files changed, 1325 insertions(+), 1032 deletions(-)
 create mode 100644 src/Server/ServersManager/IServersManager.cpp
 create mode 100644 src/Server/ServersManager/IServersManager.h
 create mode 100644 src/Server/ServersManager/InterServersManager.cpp
 create mode 100644 src/Server/ServersManager/InterServersManager.h
 create mode 100644 src/Server/ServersManager/ProtocolServersManager.cpp
 create mode 100644 src/Server/ServersManager/ProtocolServersManager.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 223bc1f77e7..b62ae40924c 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -6,8 +6,6 @@
 #include <sys/types.h>
 #include <pwd.h>
 #include <unistd.h>
-#include <Poco/Net/HTTPServer.h>
-#include <Poco/Net/NetException.h>
 #include <Poco/Util/HelpFormatter.h>
 #include <Poco/Environment.h>
 #include <Common/scope_guard_safe.h>
@@ -44,11 +42,9 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Common/assertProcessUserMatchesDataOwner.h>
-#include <Common/makeSocketAddress.h>
 #include <Common/FailPoint.h>
 #include <Common/CPUID.h>
 #include <Common/HTTPConnectionPool.h>
-#include <Server/waitServersToFinish.h>
 #include <Interpreters/Cache/FileCacheFactory.h>
 #include <Core/ServerUUID.h>
 #include <IO/ReadHelpers.h>
@@ -83,29 +79,19 @@
 #include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
 #include <Common/Scheduler/Nodes/registerResourceManagers.h>
 #include <Common/Config/ConfigReloader.h>
-#include <Server/HTTPHandlerFactory.h>
 #include "MetricsTransmitter.h"
 #include <Common/StatusFile.h>
-#include <Server/TCPHandlerFactory.h>
-#include <Server/TCPServer.h>
 #include <Common/SensitiveDataMasker.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/getHashOfLoadedBinary.h>
 #include <Common/filesystemHelpers.h>
 #include <Compression/CompressionCodecEncrypted.h>
-#include <Server/HTTP/HTTPServerConnectionFactory.h>
-#include <Server/MySQLHandlerFactory.h>
-#include <Server/PostgreSQLHandlerFactory.h>
-#include <Server/ProxyV1HandlerFactory.h>
-#include <Server/TLSHandlerFactory.h>
-#include <Server/ProtocolServerAdapter.h>
-#include <Server/KeeperReadinessHandler.h>
-#include <Server/HTTP/HTTPServer.h>
 #include <Server/CloudPlacementInfo.h>
+#include <Server/ServersManager/InterServersManager.h>
+#include <Server/ServersManager/ProtocolServersManager.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Core/ServerSettings.h>
 #include <filesystem>
-#include <unordered_set>
 
 #include "config.h"
 #include <Common/config_version.h>
@@ -119,19 +105,9 @@
 #endif
 
 #if USE_SSL
-#    include <Poco/Net/SecureServerSocket.h>
 #    include <Server/CertificateReloader.h>
 #endif
 
-#if USE_GRPC
-#   include <Server/GRPCServer.h>
-#endif
-
-#if USE_NURAFT
-#    include <Coordination/FourLetterCommand.h>
-#    include <Server/KeeperTCPHandlerFactory.h>
-#endif
-
 #if USE_JEMALLOC
 #    include <jemalloc/jemalloc.h>
 #endif
@@ -159,18 +135,6 @@ namespace ProfileEvents
 {
     extern const Event MainConfigLoads;
     extern const Event ServerStartupMilliseconds;
-    extern const Event InterfaceNativeSendBytes;
-    extern const Event InterfaceNativeReceiveBytes;
-    extern const Event InterfaceHTTPSendBytes;
-    extern const Event InterfaceHTTPReceiveBytes;
-    extern const Event InterfacePrometheusSendBytes;
-    extern const Event InterfacePrometheusReceiveBytes;
-    extern const Event InterfaceInterserverSendBytes;
-    extern const Event InterfaceInterserverReceiveBytes;
-    extern const Event InterfaceMySQLSendBytes;
-    extern const Event InterfaceMySQLReceiveBytes;
-    extern const Event InterfacePostgreSQLSendBytes;
-    extern const Event InterfacePostgreSQLReceiveBytes;
 }
 
 namespace fs = std::filesystem;
@@ -238,11 +202,9 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NO_ELEMENTS_IN_CONFIG;
-    extern const int SUPPORT_IS_DISABLED;
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
     extern const int INVALID_CONFIG_PARAMETER;
-    extern const int NETWORK_ERROR;
     extern const int CORRUPTED_DATA;
 }
 
@@ -257,115 +219,6 @@ static std::string getCanonicalPath(std::string && path)
     return std::move(path);
 }
 
-Poco::Net::SocketAddress Server::socketBindListen(
-    const Poco::Util::AbstractConfiguration & config,
-    Poco::Net::ServerSocket & socket,
-    const std::string & host,
-    UInt16 port,
-    [[maybe_unused]] bool secure) const
-{
-    auto address = makeSocketAddress(host, port, &logger());
-    socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false));
-    /// If caller requests any available port from the OS, discover it after binding.
-    if (port == 0)
-    {
-        address = socket.address();
-        LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port());
-    }
-
-    socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096));
-
-    return address;
-}
-
-Strings getListenHosts(const Poco::Util::AbstractConfiguration & config)
-{
-    auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host");
-    if (listen_hosts.empty())
-    {
-        listen_hosts.emplace_back("::1");
-        listen_hosts.emplace_back("127.0.0.1");
-    }
-    return listen_hosts;
-}
-
-Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config)
-{
-    auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
-    if (!interserver_listen_hosts.empty())
-      return interserver_listen_hosts;
-
-    /// Use more general restriction in case of emptiness
-    return getListenHosts(config);
-}
-
-bool getListenTry(const Poco::Util::AbstractConfiguration & config)
-{
-    bool listen_try = config.getBool("listen_try", false);
-    if (!listen_try)
-    {
-        Poco::Util::AbstractConfiguration::Keys protocols;
-        config.keys("protocols", protocols);
-        listen_try =
-            DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() &&
-            std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol)
-            {
-                return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port");
-            });
-    }
-    return listen_try;
-}
-
-
-void Server::createServer(
-    Poco::Util::AbstractConfiguration & config,
-    const std::string & listen_host,
-    const char * port_name,
-    bool listen_try,
-    bool start_server,
-    std::vector<ProtocolServerAdapter> & servers,
-    CreateServerFunc && func) const
-{
-    /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file.
-    if (config.getString(port_name, "").empty())
-        return;
-
-    /// If we already have an active server for this listen_host/port_name, don't create it again
-    for (const auto & server : servers)
-    {
-        if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name)
-            return;
-    }
-
-    auto port = config.getInt(port_name);
-    try
-    {
-        servers.push_back(func(port));
-        if (start_server)
-        {
-            servers.back().start();
-            LOG_INFO(&logger(), "Listening for {}", servers.back().getDescription());
-        }
-        global_context->registerServerPort(port_name, port);
-    }
-    catch (const Poco::Exception &)
-    {
-        if (listen_try)
-        {
-            LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
-                "then consider to "
-                "specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
-                "file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
-                " Example for disabled IPv4: <listen_host>::</listen_host>",
-                listen_host, port, getCurrentExceptionMessage(false));
-        }
-        else
-        {
-            throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
-        }
-    }
-}
-
 
 #if defined(OS_LINUX)
 namespace
@@ -665,6 +518,7 @@ try
 
     ServerSettings server_settings;
     server_settings.loadSettingsFromConfig(config());
+    Poco::ThreadPool server_pool(3, server_settings.max_connections);
 
     ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses);
 
@@ -721,11 +575,6 @@ try
     CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
     CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
 
-    Poco::ThreadPool server_pool(3, server_settings.max_connections);
-    std::mutex servers_lock;
-    std::vector<ProtocolServerAdapter> servers;
-    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
-
     /** Context contains all that query execution is dependent:
       *  settings, available functions, data types, aggregate functions, databases, ...
       */
@@ -775,6 +624,10 @@ try
 
     bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log");
 
+    std::mutex servers_lock;
+    ProtocolServersManager servers(context(), &logger());
+    InterServersManager servers_to_start_before_tables(context(), &logger());
+
     // Initialize global thread pool. Do it before we fetch configs from zookeeper
     // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
     // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
@@ -806,32 +659,7 @@ try
 
         LOG_DEBUG(log, "Shut down storages.");
 
-        if (!servers_to_start_before_tables.empty())
-        {
-            LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
-            size_t current_connections = 0;
-            {
-                std::lock_guard lock(servers_lock);
-                for (auto & server : servers_to_start_before_tables)
-                {
-                    server.stop();
-                    current_connections += server.currentConnections();
-                }
-            }
-
-            if (current_connections)
-                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
-            else
-                LOG_INFO(log, "Closed all listening sockets.");
-
-            if (current_connections > 0)
-                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
-
-            if (current_connections)
-                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
-            else
-                LOG_INFO(log, "Closed connections to servers for tables.");
-        }
+        servers_to_start_before_tables.stopServers(server_settings, servers_lock);
 
         global_context->shutdownKeeperDispatcher();
 
@@ -928,19 +756,13 @@ try
         server_settings.asynchronous_heavy_metrics_update_period_s,
         [&]() -> std::vector<ProtocolServerMetrics>
         {
-            std::vector<ProtocolServerMetrics> metrics;
-
             std::lock_guard lock(servers_lock);
-            metrics.reserve(servers_to_start_before_tables.size() + servers.size());
-
-            for (const auto & server : servers_to_start_before_tables)
-                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
-
-            for (const auto & server : servers)
-                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
-            return metrics;
-        }
-    );
+            std::vector<ProtocolServerMetrics> metrics1 = servers_to_start_before_tables.getMetrics();
+            std::vector<ProtocolServerMetrics> metrics2 = servers.getMetrics();
+            metrics1.reserve(metrics1.size() + metrics2.size());
+            metrics1.insert(metrics1.end(), std::make_move_iterator(metrics2.begin()), std::make_move_iterator(metrics2.end()));
+            return metrics1;
+        });
 
     zkutil::validateZooKeeperConfig(config());
     bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
@@ -1588,7 +1410,8 @@ try
                 if (global_context->isServerCompletelyStarted())
                 {
                     std::lock_guard lock(servers_lock);
-                    updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
+                    servers.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config);
+                    servers_to_start_before_tables.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config);
                 }
             }
 
@@ -1635,141 +1458,17 @@ try
             /// Must be the last.
             latest_config = config;
         },
-        /* already_loaded = */ false);  /// Reload it right now (initial loading)
+        /* already_loaded = */ false); /// Reload it right now (initial loading)
 
-    const auto listen_hosts = getListenHosts(config());
-    const auto interserver_listen_hosts = getInterserverListenHosts(config());
-    const auto listen_try = getListenTry(config());
-
-    if (config().has("keeper_server.server_id"))
-    {
-#if USE_NURAFT
-        //// If we don't have configured connection probably someone trying to use clickhouse-server instead
-        //// of clickhouse-keeper, so start synchronously.
-        bool can_initialize_keeper_async = false;
-
-        if (has_zookeeper) /// We have configured connection to some zookeeper cluster
-        {
-            /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
-            /// synchronously.
-            can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
-        }
-        /// Initialize keeper RAFT.
-        global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
-        FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
-
-        auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration &
-        {
-            return global_context->getConfigRef();
-        };
-
-        for (const auto & listen_host : listen_hosts)
-        {
-            /// TCP Keeper
-            const char * port_name = "keeper_server.tcp_port";
-            createServer(
-                config(), listen_host, port_name, listen_try, /* start_server: */ false,
-                servers_to_start_before_tables,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config(), socket, listen_host, port);
-                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
-                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "Keeper (tcp): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new KeeperTCPHandlerFactory(
-                                config_getter, global_context->getKeeperDispatcher(),
-                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
-                                global_context->getSettingsRef().send_timeout.totalSeconds(),
-                                false), server_pool, socket));
-                });
-
-            const char * secure_port_name = "keeper_server.tcp_port_secure";
-            createServer(
-                config(), listen_host, secure_port_name, listen_try, /* start_server: */ false,
-                servers_to_start_before_tables,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-#if USE_SSL
-                    Poco::Net::SecureServerSocket socket;
-                    auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true);
-                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
-                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        secure_port_name,
-                        "Keeper with secure protocol (tcp_secure): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new KeeperTCPHandlerFactory(
-                                config_getter, global_context->getKeeperDispatcher(),
-                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
-                                global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
-#else
-                    UNUSED(port);
-                    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-                });
-
-            /// HTTP control endpoints
-            port_name = "keeper_server.http_control.port";
-            createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false,
-            servers_to_start_before_tables,
-            [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                auto http_context = httpContext();
-                Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
-                Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-                http_params->setTimeout(http_context->getReceiveTimeout());
-                http_params->setKeepAliveTimeout(keep_alive_timeout);
-
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config(), socket, listen_host, port);
-                socket.setReceiveTimeout(http_context->getReceiveTimeout());
-                socket.setSendTimeout(http_context->getSendTimeout());
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "HTTP Control: http://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        std::move(http_context),
-                        createKeeperHTTPControlMainHandlerFactory(
-                            config_getter(),
-                            global_context->getKeeperDispatcher(),
-                            "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params));
-            });
-        }
-#else
-        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
-#endif
-
-    }
-
-    {
-        std::lock_guard lock(servers_lock);
-        /// We should start interserver communications before (and more important shutdown after) tables.
-        /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
-        /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
-        /// communicate with zookeeper, execute merges, etc.
-        createInterserverServers(
-            config(),
-            interserver_listen_hosts,
-            listen_try,
-            server_pool,
-            async_metrics,
-            servers_to_start_before_tables,
-            /* start_servers= */ false);
-
-
-        for (auto & server : servers_to_start_before_tables)
-        {
-            server.start();
-            LOG_INFO(log, "Listening for {}", server.getDescription());
-        }
-    }
+    servers_to_start_before_tables.createServers(
+        config(),
+        *this,
+        servers_lock,
+        server_pool,
+        async_metrics,
+        /* start_servers= */ false,
+        ServerType(ServerType::Type::QUERIES_ALL)
+    );
 
     /// Initialize access storages.
     auto & access_control = global_context->getAccessControl();
@@ -1799,19 +1498,18 @@ try
     global_context->setStopServersCallback([&](const ServerType & server_type)
     {
         std::lock_guard lock(servers_lock);
-        stopServers(servers, server_type);
+        servers.stopServers(server_type);
     });
 
     global_context->setStartServersCallback([&](const ServerType & server_type)
     {
         std::lock_guard lock(servers_lock);
-        createServers(
+        servers.createServers(
             config(),
-            listen_hosts,
-            listen_try,
+            *this,
+            servers_lock,
             server_pool,
             async_metrics,
-            servers,
             /* start_servers= */ true,
             server_type);
     });
@@ -2024,18 +1722,21 @@ try
 
         {
             std::lock_guard lock(servers_lock);
-            createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
+            servers.createServers(
+                config(),
+                *this,
+                servers_lock,
+                server_pool,
+                async_metrics,
+                false,
+                ServerType(ServerType::Type::QUERIES_ALL));
             if (servers.empty())
-                throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                                "No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
-                                "to configuration file.)");
+                throw Exception(
+                    ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+                    "No servers started (add valid listen_host and 'tcp_port' "
+                    "or 'http_port' to configuration file.)");
         }
 
-        if (servers.empty())
-             throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                             "No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
-                             "to configuration file.)");
-
 #if USE_SSL
         CertificateReloader::instance().tryLoad(config());
 #endif
@@ -2107,12 +1808,7 @@ try
 
         {
             std::lock_guard lock(servers_lock);
-            for (auto & server : servers)
-            {
-                server.start();
-                LOG_INFO(log, "Listening for {}", server.getDescription());
-            }
-
+            servers.startServers();
             global_context->setServerCompletelyStarted();
             LOG_INFO(log, "Ready for connections.");
         }
@@ -2148,46 +1844,10 @@ try
             access_control.stopPeriodicReloading();
 
             is_cancelled = true;
-
-            LOG_DEBUG(log, "Waiting for current connections to close.");
-
-            size_t current_connections = 0;
-            {
-                std::lock_guard lock(servers_lock);
-                for (auto & server : servers)
-                {
-                    server.stop();
-                    current_connections += server.currentConnections();
-                }
-            }
-
-            if (current_connections)
-                LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
-            else
-                LOG_INFO(log, "Closed all listening sockets.");
-
-            /// Wait for unfinished backups and restores.
-            /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries
-            /// (because killAllQueries() will cancel all running backups/restores).
-            if (server_settings.shutdown_wait_backups_and_restores)
-                global_context->waitAllBackupsAndRestores();
-
-            /// Killing remaining queries.
-            if (!server_settings.shutdown_wait_unfinished_queries)
-                global_context->getProcessList().killAllQueries();
-
-            if (current_connections)
-                current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
-
-            if (current_connections)
-                LOG_WARNING(log, "Closed connections. But {} remain."
-                    " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>", current_connections);
-            else
-                LOG_INFO(log, "Closed connections.");
-
+            const auto remaining_connections = servers.stopServers(server_settings, servers_lock);
             dns_cache_updater.reset();
 
-            if (current_connections)
+            if (remaining_connections)
             {
                 /// There is no better way to force connections to close in Poco.
                 /// Otherwise connection handlers will continue to live
@@ -2221,561 +1881,4 @@ catch (...)
     return code ? code : -1;
 }
 
-std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
-    const Poco::Util::AbstractConfiguration & config,
-    const std::string & protocol,
-    Poco::Net::HTTPServerParams::Ptr http_params,
-    AsynchronousMetrics & async_metrics,
-    bool & is_secure)
-{
-    auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
-    {
-        if (type == "tcp")
-            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));
-
-        if (type == "tls")
-#if USE_SSL
-            return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name));
-#else
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-
-        if (type == "proxy1")
-            return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name));
-        if (type == "mysql")
-            return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
-        if (type == "postgres")
-            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
-        if (type == "http")
-            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)
-            );
-        if (type == "prometheus")
-            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)
-            );
-        if (type == "interserver")
-            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes)
-            );
-
-        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
-    };
-
-    std::string conf_name = "protocols." + protocol;
-    std::string prefix = conf_name + ".";
-    std::unordered_set<std::string> pset {conf_name};
-
-    auto stack = std::make_unique<TCPProtocolStackFactory>(*this, conf_name);
-
-    while (true)
-    {
-        // if there is no "type" - it's a reference to another protocol and this is just an endpoint
-        if (config.has(prefix + "type"))
-        {
-            std::string type = config.getString(prefix + "type");
-            if (type == "tls")
-            {
-                if (is_secure)
-                    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol);
-                is_secure = true;
-            }
-
-            stack->append(create_factory(type, conf_name));
-        }
-
-        if (!config.has(prefix + "impl"))
-            break;
-
-        conf_name = "protocols." + config.getString(prefix + "impl");
-        prefix = conf_name + ".";
-
-        if (!pset.insert(conf_name).second)
-            throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
-    }
-
-    return stack;
-}
-
-HTTPContextPtr Server::httpContext() const
-{
-    return std::make_shared<HTTPContext>(context());
-}
-
-void Server::createServers(
-    Poco::Util::AbstractConfiguration & config,
-    const Strings & listen_hosts,
-    bool listen_try,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    std::vector<ProtocolServerAdapter> & servers,
-    bool start_servers,
-    const ServerType & server_type)
-{
-    const Settings & settings = global_context->getSettingsRef();
-
-    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-    http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
-
-    Poco::Util::AbstractConfiguration::Keys protocols;
-    config.keys("protocols", protocols);
-
-    for (const auto & protocol : protocols)
-    {
-        if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
-            continue;
-
-        std::string prefix = "protocols." + protocol + ".";
-        std::string port_name = prefix + "port";
-        std::string description {"<undefined> protocol"};
-        if (config.has(prefix + "description"))
-            description = config.getString(prefix + "description");
-
-        if (!config.has(prefix + "port"))
-            continue;
-
-        std::vector<std::string> hosts;
-        if (config.has(prefix + "host"))
-            hosts.push_back(config.getString(prefix + "host"));
-        else
-            hosts = listen_hosts;
-
-        for (const auto & host : hosts)
-        {
-            bool is_secure = false;
-            auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure);
-
-            if (stack->empty())
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol);
-
-            createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, host, port, is_secure);
-                socket.setReceiveTimeout(settings.receive_timeout);
-                socket.setSendTimeout(settings.send_timeout);
-
-                return ProtocolServerAdapter(
-                    host,
-                    port_name.c_str(),
-                    description + ": " + address.toString(),
-                    std::make_unique<TCPServer>(
-                        stack.release(),
-                        server_pool,
-                        socket,
-                        new Poco::Net::TCPServerParams));
-            });
-        }
-    }
-
-    for (const auto & listen_host : listen_hosts)
-    {
-        const char * port_name;
-
-        if (server_type.shouldStart(ServerType::Type::HTTP))
-        {
-            /// HTTP
-            port_name = "http_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port);
-                socket.setReceiveTimeout(settings.http_receive_timeout);
-                socket.setSendTimeout(settings.http_send_timeout);
-
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "http://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::HTTPS))
-        {
-            /// HTTPS
-            port_name = "https_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-#if USE_SSL
-                Poco::Net::SecureServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
-                socket.setReceiveTimeout(settings.http_receive_timeout);
-                socket.setSendTimeout(settings.http_send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "https://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
-#else
-                UNUSED(port);
-                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP))
-        {
-            /// TCP
-            port_name = "tcp_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port);
-                socket.setReceiveTimeout(settings.receive_timeout);
-                socket.setSendTimeout(settings.send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "native protocol (tcp): " + address.toString(),
-                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                        server_pool,
-                        socket,
-                        new Poco::Net::TCPServerParams));
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
-        {
-            /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
-            port_name = "tcp_with_proxy_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port);
-                socket.setReceiveTimeout(settings.receive_timeout);
-                socket.setSendTimeout(settings.send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "native protocol (tcp) with PROXY: " + address.toString(),
-                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                        server_pool,
-                        socket,
-                        new Poco::Net::TCPServerParams));
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
-        {
-            /// TCP with SSL
-            port_name = "tcp_port_secure";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-    #if USE_SSL
-                Poco::Net::SecureServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
-                socket.setReceiveTimeout(settings.receive_timeout);
-                socket.setSendTimeout(settings.send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "secure native protocol (tcp_secure): " + address.toString(),
-                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                        server_pool,
-                        socket,
-                        new Poco::Net::TCPServerParams));
-    #else
-                UNUSED(port);
-                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-    #endif
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::MYSQL))
-        {
-            port_name = "mysql_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
-                socket.setReceiveTimeout(Poco::Timespan());
-                socket.setSendTimeout(settings.send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "MySQL compatibility protocol: " + address.toString(),
-                    std::make_unique<TCPServer>(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
-        {
-            port_name = "postgresql_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
-                socket.setReceiveTimeout(Poco::Timespan());
-                socket.setSendTimeout(settings.send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "PostgreSQL compatibility protocol: " + address.toString(),
-                    std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
-            });
-        }
-
-#if USE_GRPC
-        if (server_type.shouldStart(ServerType::Type::GRPC))
-        {
-            port_name = "grpc_port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::SocketAddress server_address(listen_host, port);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "gRPC protocol: " + server_address.toString(),
-                    std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
-            });
-        }
-#endif
-        if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
-        {
-            /// Prometheus (if defined and not setup yet with http_port)
-            port_name = "prometheus.port";
-            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, listen_host, port);
-                socket.setReceiveTimeout(settings.http_receive_timeout);
-                socket.setSendTimeout(settings.http_send_timeout);
-                return ProtocolServerAdapter(
-                    listen_host,
-                    port_name,
-                    "Prometheus: http://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes));
-            });
-        }
-    }
-}
-
-void Server::createInterserverServers(
-    Poco::Util::AbstractConfiguration & config,
-    const Strings & interserver_listen_hosts,
-    bool listen_try,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    std::vector<ProtocolServerAdapter> & servers,
-    bool start_servers,
-    const ServerType & server_type)
-{
-    const Settings & settings = global_context->getSettingsRef();
-
-    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-    http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
-
-    /// Now iterate over interserver_listen_hosts
-    for (const auto & interserver_listen_host : interserver_listen_hosts)
-    {
-        const char * port_name;
-
-        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
-        {
-            /// Interserver IO HTTP
-            port_name = "interserver_http_port";
-            createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-                Poco::Net::ServerSocket socket;
-                auto address = socketBindListen(config, socket, interserver_listen_host, port);
-                socket.setReceiveTimeout(settings.http_receive_timeout);
-                socket.setSendTimeout(settings.http_send_timeout);
-                return ProtocolServerAdapter(
-                    interserver_listen_host,
-                    port_name,
-                    "replica communication (interserver): http://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        httpContext(),
-                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
-                        server_pool,
-                        socket,
-                        http_params,
-                        ProfileEvents::InterfaceInterserverReceiveBytes,
-                        ProfileEvents::InterfaceInterserverSendBytes));
-            });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
-        {
-            port_name = "interserver_https_port";
-            createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
-            {
-#if USE_SSL
-                Poco::Net::SecureServerSocket socket;
-                auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
-                socket.setReceiveTimeout(settings.http_receive_timeout);
-                socket.setSendTimeout(settings.http_send_timeout);
-                return ProtocolServerAdapter(
-                    interserver_listen_host,
-                    port_name,
-                    "secure replica communication (interserver): https://" + address.toString(),
-                    std::make_unique<HTTPServer>(
-                        httpContext(),
-                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
-                        server_pool,
-                        socket,
-                        http_params,
-                        ProfileEvents::InterfaceInterserverReceiveBytes,
-                        ProfileEvents::InterfaceInterserverSendBytes));
-#else
-                UNUSED(port);
-                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-            });
-        }
-    }
-}
-
-void Server::stopServers(
-    std::vector<ProtocolServerAdapter> & servers,
-    const ServerType & server_type
-) const
-{
-    LoggerRawPtr log = &logger();
-
-    /// Remove servers once all their connections are closed
-    auto check_server = [&log](const char prefix[], auto & server)
-    {
-        if (!server.isStopping())
-            return false;
-        size_t current_connections = server.currentConnections();
-        LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
-            server.getDescription(),
-            prefix,
-            !current_connections ? "finished" : "waiting",
-            current_connections);
-        return !current_connections;
-    };
-
-    std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
-
-    for (auto & server : servers)
-    {
-        if (!server.isStopping())
-        {
-            const std::string server_port_name = server.getPortName();
-
-            if (server_type.shouldStop(server_port_name))
-                server.stop();
-        }
-    }
-
-    std::erase_if(servers, std::bind_front(check_server, ""));
-}
-
-void Server::updateServers(
-    Poco::Util::AbstractConfiguration & config,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    std::vector<ProtocolServerAdapter> & servers,
-    std::vector<ProtocolServerAdapter> & servers_to_start_before_tables)
-{
-    LoggerRawPtr log = &logger();
-
-    const auto listen_hosts = getListenHosts(config);
-    const auto interserver_listen_hosts = getInterserverListenHosts(config);
-    const auto listen_try = getListenTry(config);
-
-    /// Remove servers once all their connections are closed
-    auto check_server = [&log](const char prefix[], auto & server)
-    {
-        if (!server.isStopping())
-            return false;
-        size_t current_connections = server.currentConnections();
-        LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
-            server.getDescription(),
-            prefix,
-            !current_connections ? "finished" : "waiting",
-            current_connections);
-        return !current_connections;
-    };
-
-    std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)"));
-
-    Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config();
-
-    std::vector<ProtocolServerAdapter *> all_servers;
-    all_servers.reserve(servers.size() + servers_to_start_before_tables.size());
-    for (auto & server : servers)
-        all_servers.push_back(&server);
-
-    for (auto & server : servers_to_start_before_tables)
-        all_servers.push_back(&server);
-
-    for (auto * server : all_servers)
-    {
-        if (!server->isStopping())
-        {
-            std::string port_name = server->getPortName();
-            bool has_host = false;
-            bool is_http = false;
-            if (port_name.starts_with("protocols."))
-            {
-                std::string protocol = port_name.substr(0, port_name.find_last_of('.'));
-                has_host = config.has(protocol + ".host");
-
-                std::string conf_name = protocol;
-                std::string prefix = protocol + ".";
-                std::unordered_set<std::string> pset {conf_name};
-                while (true)
-                {
-                    if (config.has(prefix + "type"))
-                    {
-                        std::string type = config.getString(prefix + "type");
-                        if (type == "http")
-                        {
-                            is_http = true;
-                            break;
-                        }
-                    }
-
-                    if (!config.has(prefix + "impl"))
-                        break;
-
-                    conf_name = "protocols." + config.getString(prefix + "impl");
-                    prefix = conf_name + ".";
-
-                    if (!pset.insert(conf_name).second)
-                        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
-                }
-            }
-            else
-            {
-                /// NOTE: better to compare using getPortName() over using
-                /// dynamic_cast<> since HTTPServer is also used for prometheus and
-                /// internal replication communications.
-                is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port";
-            }
-
-            if (!has_host)
-                has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end();
-            bool has_port = !config.getString(port_name, "").empty();
-            bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
-            if (force_restart)
-                LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server->getDescription());
-
-            if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart)
-            {
-                server->stop();
-                LOG_INFO(log, "Stopped listening for {}", server->getDescription());
-            }
-        }
-    }
-
-    createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
-    createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true);
-
-    std::erase_if(servers, std::bind_front(check_server, ""));
-    std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, ""));
-}
-
 }
diff --git a/programs/server/Server.h b/programs/server/Server.h
index 3f03dd137ef..b4931ce53d1 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -1,15 +1,10 @@
 #pragma once
 
 #include <Server/IServer.h>
-
 #include <Daemon/BaseDaemon.h>
-#include <Server/HTTP/HTTPContext.h>
-#include <Server/TCPProtocolStackFactory.h>
-#include <Server/ServerType.h>
-#include <Poco/Net/HTTPServerParams.h>
 
 /** Server provides three interfaces:
-  * 1. HTTP - simple interface for any applications.
+  * 1. HTTP, GRPC - simple interfaces for any applications.
   * 2. TCP - interface for native clickhouse-client and for server to server internal communications.
   *    More rich and efficient, but less compatible
   *     - data is transferred by columns;
@@ -18,43 +13,21 @@
   * 3. Interserver HTTP - for replication.
   */
 
-namespace Poco
-{
-    namespace Net
-    {
-        class ServerSocket;
-    }
-}
-
 namespace DB
 {
-class AsynchronousMetrics;
-class ProtocolServerAdapter;
 
 class Server : public BaseDaemon, public IServer
 {
 public:
     using ServerApplication::run;
 
-    Poco::Util::LayeredConfiguration & config() const override
-    {
-        return BaseDaemon::config();
-    }
+    Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); }
 
-    Poco::Logger & logger() const override
-    {
-        return BaseDaemon::logger();
-    }
+    Poco::Logger & logger() const override { return BaseDaemon::logger(); }
 
-    ContextMutablePtr context() const override
-    {
-        return global_context;
-    }
+    ContextMutablePtr context() const override { return global_context; }
 
-    bool isCancelled() const override
-    {
-        return BaseDaemon::isCancelled();
-    }
+    bool isCancelled() const override { return BaseDaemon::isCancelled(); }
 
     void defineOptions(Poco::Util::OptionSet & _options) override;
 
@@ -73,64 +46,6 @@ private:
     ContextMutablePtr global_context;
     /// Updated/recent config, to compare http_handlers
     ConfigurationPtr latest_config;
-
-    HTTPContextPtr httpContext() const;
-
-    Poco::Net::SocketAddress socketBindListen(
-        const Poco::Util::AbstractConfiguration & config,
-        Poco::Net::ServerSocket & socket,
-        const std::string & host,
-        UInt16 port,
-        [[maybe_unused]] bool secure = false) const;
-
-    std::unique_ptr<TCPProtocolStackFactory> buildProtocolStackFromConfig(
-        const Poco::Util::AbstractConfiguration & config,
-        const std::string & protocol,
-        Poco::Net::HTTPServerParams::Ptr http_params,
-        AsynchronousMetrics & async_metrics,
-        bool & is_secure);
-
-    using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
-    void createServer(
-        Poco::Util::AbstractConfiguration & config,
-        const std::string & listen_host,
-        const char * port_name,
-        bool listen_try,
-        bool start_server,
-        std::vector<ProtocolServerAdapter> & servers,
-        CreateServerFunc && func) const;
-
-    void createServers(
-        Poco::Util::AbstractConfiguration & config,
-        const Strings & listen_hosts,
-        bool listen_try,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        std::vector<ProtocolServerAdapter> & servers,
-        bool start_servers = false,
-        const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
-
-    void createInterserverServers(
-        Poco::Util::AbstractConfiguration & config,
-        const Strings & interserver_listen_hosts,
-        bool listen_try,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        std::vector<ProtocolServerAdapter> & servers,
-        bool start_servers = false,
-        const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
-
-    void updateServers(
-        Poco::Util::AbstractConfiguration & config,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        std::vector<ProtocolServerAdapter> & servers,
-        std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
-
-    void stopServers(
-        std::vector<ProtocolServerAdapter> & servers,
-        const ServerType & server_type
-    ) const;
 };
 
 }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4e8946facda..826204111a0 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -234,6 +234,7 @@ add_object_library(clickhouse_client Client)
 add_object_library(clickhouse_bridge BridgeHelper)
 add_object_library(clickhouse_server Server)
 add_object_library(clickhouse_server_http Server/HTTP)
+add_object_library(clickhouse_server_manager Server/ServersManager)
 add_object_library(clickhouse_formats Formats)
 add_object_library(clickhouse_processors Processors)
 add_object_library(clickhouse_processors_executors Processors/Executors)
diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp
new file mode 100644
index 00000000000..c903d90f766
--- /dev/null
+++ b/src/Server/ServersManager/IServersManager.cpp
@@ -0,0 +1,268 @@
+#include <Server/ServersManager/IServersManager.h>
+
+#include <Interpreters/Context.h>
+#include <Server/waitServersToFinish.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/Config/AbstractConfigurationComparison.h>
+#include <Common/getMultipleKeysFromConfig.h>
+#include <Common/logger_useful.h>
+#include <Common/makeSocketAddress.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int NETWORK_ERROR;
+extern const int INVALID_CONFIG_PARAMETER;
+}
+
+IServersManager::IServersManager(ContextMutablePtr l_global_context, Poco::Logger * l_logger)
+    : global_context(l_global_context), logger(l_logger)
+{
+}
+
+
+bool IServersManager::empty() const
+{
+    return servers.empty();
+}
+
+std::vector<ProtocolServerMetrics> IServersManager::getMetrics() const
+{
+    std::vector<ProtocolServerMetrics> metrics;
+    metrics.reserve(servers.size());
+    for (const auto & server : servers)
+        metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+    return metrics;
+}
+
+void IServersManager::startServers()
+{
+    for (auto & server : servers)
+    {
+        server.start();
+        LOG_INFO(logger, "Listening for {}", server.getDescription());
+    }
+}
+
+void IServersManager::stopServers(const ServerType & server_type)
+{
+    /// Remove servers once all their connections are closed
+    auto check_server = [&](const char prefix[], auto & server)
+    {
+        if (!server.isStopping())
+            return false;
+        size_t current_connections = server.currentConnections();
+        LOG_DEBUG(
+            logger,
+            "Server {}{}: {} ({} connections)",
+            server.getDescription(),
+            prefix,
+            !current_connections ? "finished" : "waiting",
+            current_connections);
+        return !current_connections;
+    };
+
+    std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
+
+    for (auto & server : servers)
+    {
+        if (!server.isStopping() && server_type.shouldStop(server.getPortName()))
+            server.stop();
+    }
+
+    std::erase_if(servers, std::bind_front(check_server, ""));
+}
+
+void IServersManager::updateServers(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & iserver,
+    std::mutex & servers_lock,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    ConfigurationPtr latest_config)
+{
+    stopServersForUpdate(config, latest_config);
+    createServers(config, iserver, servers_lock, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL));
+}
+
+Poco::Net::SocketAddress IServersManager::socketBindListen(
+    const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const
+{
+    auto address = makeSocketAddress(host, port, logger);
+    socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false));
+    /// If caller requests any available port from the OS, discover it after binding.
+    if (port == 0)
+    {
+        address = socket.address();
+        LOG_DEBUG(logger, "Requested any available port (port == 0), actual port is {:d}", address.port());
+    }
+
+    socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096));
+    return address;
+}
+
+void IServersManager::createServer(
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & listen_host,
+    const char * port_name,
+    CreateServerFunc && func,
+    bool start_server)
+{
+    /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file.
+    if (config.getString(port_name, "").empty())
+        return;
+
+    /// If we already have an active server for this listen_host/port_name, don't create it again
+    for (const auto & server : servers)
+    {
+        if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name)
+            return;
+    }
+
+    auto port = config.getInt(port_name);
+    try
+    {
+        servers.push_back(func(port));
+        if (start_server)
+        {
+            servers.back().start();
+            LOG_INFO(logger, "Listening for {}", servers.back().getDescription());
+        }
+        global_context->registerServerPort(port_name, port);
+    }
+    catch (const Poco::Exception &)
+    {
+        if (!getListenTry(config))
+        {
+            throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
+        }
+        LOG_WARNING(
+            logger,
+            "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
+            "then consider to "
+            "specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
+            "file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
+            " Example for disabled IPv4: <listen_host>::</listen_host>",
+            listen_host,
+            port,
+            getCurrentExceptionMessage(false));
+    }
+}
+
+void IServersManager::stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config)
+{
+    /// Remove servers once all their connections are closed
+    auto check_server = [&](const char prefix[], auto & server)
+    {
+        if (!server.isStopping())
+            return false;
+        size_t current_connections = server.currentConnections();
+        LOG_DEBUG(
+            logger,
+            "Server {}{}: {} ({} connections)",
+            server.getDescription(),
+            prefix,
+            !current_connections ? "finished" : "waiting",
+            current_connections);
+        return !current_connections;
+    };
+
+    std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)"));
+
+    const auto listen_hosts = getListenHosts(config);
+    const Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : config;
+
+    for (auto & server : servers)
+    {
+        if (server.isStopping())
+            return;
+        std::string port_name = server.getPortName();
+        bool has_host = false;
+        bool is_http = false;
+        if (port_name.starts_with("protocols."))
+        {
+            std::string protocol = port_name.substr(0, port_name.find_last_of('.'));
+            has_host = config.has(protocol + ".host");
+
+            std::string conf_name = protocol;
+            std::string prefix = protocol + ".";
+            std::unordered_set<std::string> pset{conf_name};
+            while (true)
+            {
+                if (config.has(prefix + "type"))
+                {
+                    std::string type = config.getString(prefix + "type");
+                    if (type == "http")
+                    {
+                        is_http = true;
+                        break;
+                    }
+                }
+
+                if (!config.has(prefix + "impl"))
+                    break;
+
+                conf_name = "protocols." + config.getString(prefix + "impl");
+                prefix = conf_name + ".";
+
+                if (!pset.insert(conf_name).second)
+                    throw Exception(
+                        ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
+            }
+        }
+        else
+        {
+            /// NOTE: better to compare using getPortName() over using
+            /// dynamic_cast<> since HTTPServer is also used for prometheus and
+            /// internal replication communications.
+            is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port";
+        }
+
+        if (!has_host)
+            has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end();
+        bool has_port = !config.getString(port_name, "").empty();
+        bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
+        if (force_restart)
+            LOG_TRACE(logger, "<http_handlers> had been changed, will reload {}", server.getDescription());
+
+        if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart)
+        {
+            server.stop();
+            LOG_INFO(logger, "Stopped listening for {}", server.getDescription());
+        }
+    }
+
+    std::erase_if(servers, std::bind_front(check_server, ""));
+}
+
+Strings IServersManager::getListenHosts(const Poco::Util::AbstractConfiguration & config) const
+{
+    auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host");
+    if (listen_hosts.empty())
+    {
+        listen_hosts.emplace_back("::1");
+        listen_hosts.emplace_back("127.0.0.1");
+    }
+    return listen_hosts;
+}
+
+bool IServersManager::getListenTry(const Poco::Util::AbstractConfiguration & config) const
+{
+    bool listen_try = config.getBool("listen_try", false);
+    if (!listen_try)
+    {
+        Poco::Util::AbstractConfiguration::Keys protocols;
+        config.keys("protocols", protocols);
+        listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty()
+            && std::none_of(
+                         protocols.begin(),
+                         protocols.end(),
+                         [&](const auto & protocol)
+                         { return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); });
+    }
+    return listen_try;
+}
+
+}
diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h
new file mode 100644
index 00000000000..5218ab63554
--- /dev/null
+++ b/src/Server/ServersManager/IServersManager.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <mutex>
+#include <Core/ServerSettings.h>
+#include <Interpreters/Context_fwd.h>
+#include <Server/IServer.h>
+#include <Server/ProtocolServerAdapter.h>
+#include <Server/ServerType.h>
+#include <Poco/Logger.h>
+#include <Poco/Net/ServerSocket.h>
+#include <Poco/ThreadPool.h>
+#include <Poco/Util/AbstractConfiguration.h>
+#include <Common/AsynchronousMetrics.h>
+#include <Common/Config/ConfigProcessor.h>
+
+namespace DB
+{
+
+class IServersManager
+{
+public:
+    IServersManager(ContextMutablePtr global_context, Poco::Logger * logger);
+    virtual ~IServersManager() = default;
+
+    bool empty() const;
+    std::vector<ProtocolServerMetrics> getMetrics() const;
+
+    virtual void createServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        std::mutex & servers_lock,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        bool start_servers,
+        const ServerType & server_type)
+        = 0;
+
+    virtual void startServers();
+
+    virtual void stopServers(const ServerType & server_type);
+    virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0;
+
+    virtual void updateServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        std::mutex & servers_lock,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        ConfigurationPtr latest_config);
+
+protected:
+    ContextMutablePtr global_context;
+    Poco::Logger * logger;
+
+    std::vector<ProtocolServerAdapter> servers;
+
+    Poco::Net::SocketAddress socketBindListen(
+        const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const;
+
+    using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
+    virtual void createServer(
+        const Poco::Util::AbstractConfiguration & config,
+        const std::string & listen_host,
+        const char * port_name,
+        CreateServerFunc && func,
+        bool start_server);
+
+    virtual void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config);
+
+    Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const;
+    bool getListenTry(const Poco::Util::AbstractConfiguration & config) const;
+};
+
+}
diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp
new file mode 100644
index 00000000000..28491a4f4f4
--- /dev/null
+++ b/src/Server/ServersManager/InterServersManager.cpp
@@ -0,0 +1,327 @@
+#include <Server/ServersManager/InterServersManager.h>
+
+#include <Interpreters/Context.h>
+#include <Server/HTTP/HTTPServer.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/KeeperReadinessHandler.h>
+#include <Server/waitServersToFinish.h>
+#include <Poco/Net/HTTPServerParams.h>
+#include <Common/Config/AbstractConfigurationComparison.h>
+#include <Common/ProfileEvents.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Common/getMultipleKeysFromConfig.h>
+#include <Common/logger_useful.h>
+
+#if USE_SSL
+#    include <Poco/Net/SecureServerSocket.h>
+#endif
+
+#if USE_NURAFT
+#    include <Coordination/FourLetterCommand.h>
+#    include <Server/KeeperTCPHandlerFactory.h>
+#endif
+
+namespace ProfileEvents
+{
+extern const Event InterfaceInterserverSendBytes;
+extern const Event InterfaceInterserverReceiveBytes;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int SUPPORT_IS_DISABLED;
+}
+
+void InterServersManager::createServers(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & server,
+    std::mutex & servers_lock,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    bool start_servers,
+    const ServerType & server_type)
+{
+    if (config.has("keeper_server.server_id"))
+    {
+#if USE_NURAFT
+        //// If we don't have configured connection probably someone trying to use clickhouse-server instead
+        //// of clickhouse-keeper, so start synchronously.
+        bool can_initialize_keeper_async = false;
+
+        if (zkutil::hasZooKeeperConfig(config)) /// We have configured connection to some zookeeper cluster
+        {
+            /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
+            /// synchronously.
+            can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
+        }
+        /// Initialize keeper RAFT.
+        global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
+        FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
+
+        auto config_getter = [this]() -> const Poco::Util::AbstractConfiguration & { return global_context->getConfigRef(); };
+
+        for (const auto & listen_host : getListenHosts(config))
+        {
+            /// TCP Keeper
+            constexpr auto port_name = "keeper_server.tcp_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(
+                        Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(
+                        Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "Keeper (tcp): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new KeeperTCPHandlerFactory(
+                                config_getter,
+                                global_context->getKeeperDispatcher(),
+                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
+                                global_context->getSettingsRef().send_timeout.totalSeconds(),
+                                false),
+                            server_pool,
+                            socket));
+                },
+                /* start_server = */ false);
+
+            constexpr auto secure_port_name = "keeper_server.tcp_port_secure";
+            createServer(
+                config,
+                listen_host,
+                secure_port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+#    if USE_SSL
+                    Poco::Net::SecureServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(
+                        Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(
+                        Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        secure_port_name,
+                        "Keeper with secure protocol (tcp_secure): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new KeeperTCPHandlerFactory(
+                                config_getter,
+                                global_context->getKeeperDispatcher(),
+                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
+                                global_context->getSettingsRef().send_timeout.totalSeconds(),
+                                true),
+                            server_pool,
+                            socket));
+#    else
+                    UNUSED(port);
+                    throw Exception(
+                        ErrorCodes::SUPPORT_IS_DISABLED,
+                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#    endif
+                },
+                /* start_server: */ false);
+
+            /// HTTP control endpoints
+            createServer(
+                config,
+                listen_host,
+                /* port_name = */ "keeper_server.http_control.port",
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    auto http_context = std::make_shared<HTTPContext>(global_context);
+                    Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
+                    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+                    http_params->setTimeout(http_context->getReceiveTimeout());
+                    http_params->setKeepAliveTimeout(keep_alive_timeout);
+
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(http_context->getReceiveTimeout());
+                    socket.setSendTimeout(http_context->getSendTimeout());
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "HTTP Control: http://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::move(http_context),
+                            createKeeperHTTPControlMainHandlerFactory(
+                                config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params));
+                },
+                /* start_server: */ false);
+        }
+#else
+        throw Exception(
+            ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
+#endif
+    }
+
+    {
+        std::lock_guard lock(servers_lock);
+        /// We should start interserver communications before (and more important shutdown after) tables.
+        /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
+        /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
+        /// communicate with zookeeper, execute merges, etc.
+        createInterserverServers(config, server, server_pool, async_metrics, start_servers, server_type);
+        startServers();
+    }
+}
+
+size_t InterServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock)
+{
+    if (servers.empty())
+    {
+        return 0;
+    }
+
+    LOG_DEBUG(logger, "Waiting for current connections to servers for tables to finish.");
+
+    size_t current_connections = 0;
+    {
+        std::lock_guard lock(servers_lock);
+        for (auto & server : servers)
+        {
+            server.stop();
+            current_connections += server.currentConnections();
+        }
+    }
+
+    if (current_connections)
+        LOG_INFO(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+    else
+        LOG_INFO(logger, "Closed all listening sockets.");
+
+    if (current_connections > 0)
+        current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
+
+    if (current_connections)
+        LOG_INFO(
+            logger,
+            "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections "
+            "after context shutdown.",
+            current_connections);
+    else
+        LOG_INFO(logger, "Closed connections to servers for tables.");
+    return current_connections;
+}
+
+void InterServersManager::updateServers(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & iserver,
+    std::mutex & /*servers_lock*/,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    ConfigurationPtr latest_config)
+{
+    stopServersForUpdate(config, latest_config);
+    createInterserverServers(config, iserver, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL));
+}
+
+Strings InterServersManager::getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const
+{
+    auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
+    if (!interserver_listen_hosts.empty())
+        return interserver_listen_hosts;
+
+    /// Use more general restriction in case of emptiness
+    return getListenHosts(config);
+}
+
+void InterServersManager::createInterserverServers(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & server,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    bool start_servers,
+    const ServerType & server_type)
+{
+    const Settings & settings = global_context->getSettingsRef();
+
+    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+    http_params->setTimeout(settings.http_receive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
+
+    /// Now iterate over interserver_listen_hosts
+    for (const auto & interserver_listen_host : getInterserverListenHosts(config))
+    {
+        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
+        {
+            /// Interserver IO HTTP
+            constexpr auto port_name = "interserver_http_port";
+            createServer(
+                config,
+                interserver_listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, interserver_listen_host, port);
+                    socket.setReceiveTimeout(settings.http_receive_timeout);
+                    socket.setSendTimeout(settings.http_send_timeout);
+                    return ProtocolServerAdapter(
+                        interserver_listen_host,
+                        port_name,
+                        "replica communication (interserver): http://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::make_shared<HTTPContext>(global_context),
+                            createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params,
+                            ProfileEvents::InterfaceInterserverReceiveBytes,
+                            ProfileEvents::InterfaceInterserverSendBytes));
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
+        {
+            constexpr auto port_name = "interserver_https_port";
+            createServer(
+                config,
+                interserver_listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+#if USE_SSL
+                    Poco::Net::SecureServerSocket socket;
+                    auto address = socketBindListen(config, socket, interserver_listen_host, port);
+                    socket.setReceiveTimeout(settings.http_receive_timeout);
+                    socket.setSendTimeout(settings.http_send_timeout);
+                    return ProtocolServerAdapter(
+                        interserver_listen_host,
+                        port_name,
+                        "secure replica communication (interserver): https://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::make_shared<HTTPContext>(global_context),
+                            createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params,
+                            ProfileEvents::InterfaceInterserverReceiveBytes,
+                            ProfileEvents::InterfaceInterserverSendBytes));
+#else
+                    UNUSED(port);
+                    throw Exception(
+                        ErrorCodes::SUPPORT_IS_DISABLED,
+                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+                },
+                start_servers);
+        }
+    }
+}
+
+}
diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h
new file mode 100644
index 00000000000..2a389e28c22
--- /dev/null
+++ b/src/Server/ServersManager/InterServersManager.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <Server/ServersManager/IServersManager.h>
+
+namespace DB
+{
+
+class InterServersManager : public IServersManager
+{
+public:
+    using IServersManager::IServersManager;
+
+    void createServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        std::mutex & servers_lock,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        bool start_servers,
+        const ServerType & server_type) override;
+
+    using IServersManager::stopServers;
+    size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override;
+
+    void updateServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & iserver,
+        std::mutex & servers_lock,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        ConfigurationPtr latest_config) override;
+
+private:
+    Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const;
+
+    void createInterserverServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        bool start_servers,
+        const ServerType & server_type);
+};
+
+}
diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp
new file mode 100644
index 00000000000..17b028eddbb
--- /dev/null
+++ b/src/Server/ServersManager/ProtocolServersManager.cpp
@@ -0,0 +1,523 @@
+#include <Server/ServersManager/ProtocolServersManager.h>
+
+#include <Interpreters/Context.h>
+#include <Interpreters/ProcessList.h>
+#include <Server/HTTP/HTTPServer.h>
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+#include <Server/HTTPHandlerFactory.h>
+#include <Server/MySQLHandlerFactory.h>
+#include <Server/PostgreSQLHandlerFactory.h>
+#include <Server/ProxyV1HandlerFactory.h>
+#include <Server/TCPHandlerFactory.h>
+#include <Server/TLSHandlerFactory.h>
+#include <Server/waitServersToFinish.h>
+#include <Common/ProfileEvents.h>
+#include <Common/getMultipleKeysFromConfig.h>
+#include <Common/makeSocketAddress.h>
+
+#if USE_SSL
+#    include <Poco/Net/SecureServerSocket.h>
+#endif
+
+#if USE_GRPC
+#    include <Server/GRPCServer.h>
+#endif
+
+namespace ProfileEvents
+{
+extern const Event InterfaceNativeSendBytes;
+extern const Event InterfaceNativeReceiveBytes;
+extern const Event InterfaceHTTPSendBytes;
+extern const Event InterfaceHTTPReceiveBytes;
+extern const Event InterfacePrometheusSendBytes;
+extern const Event InterfacePrometheusReceiveBytes;
+extern const Event InterfaceMySQLSendBytes;
+extern const Event InterfaceMySQLReceiveBytes;
+extern const Event InterfacePostgreSQLSendBytes;
+extern const Event InterfacePostgreSQLReceiveBytes;
+extern const Event InterfaceInterserverSendBytes;
+extern const Event InterfaceInterserverReceiveBytes;
+}
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+extern const int SUPPORT_IS_DISABLED;
+extern const int INVALID_CONFIG_PARAMETER;
+}
+
+void ProtocolServersManager::createServers(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & server,
+    std::mutex & /*servers_lock*/,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    bool start_servers,
+    const ServerType & server_type)
+{
+    auto listen_hosts = getListenHosts(config);
+    const Settings & settings = global_context->getSettingsRef();
+
+    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+    http_params->setTimeout(settings.http_receive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
+
+    Poco::Util::AbstractConfiguration::Keys protocols;
+    config.keys("protocols", protocols);
+
+    for (const auto & protocol : protocols)
+    {
+        if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
+            continue;
+
+        std::string prefix = "protocols." + protocol + ".";
+        std::string port_name = prefix + "port";
+        std::string description{"<undefined> protocol"};
+        if (config.has(prefix + "description"))
+            description = config.getString(prefix + "description");
+
+        if (!config.has(prefix + "port"))
+            continue;
+
+        std::vector<std::string> hosts;
+        if (config.has(prefix + "host"))
+            hosts.push_back(config.getString(prefix + "host"));
+        else
+            hosts = listen_hosts;
+
+        for (const auto & host : hosts)
+        {
+            bool is_secure = false;
+            auto stack = buildProtocolStackFromConfig(config, server, protocol, http_params, async_metrics, is_secure);
+
+            if (stack->empty())
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol);
+
+            createServer(
+                config,
+                host,
+                port_name.c_str(),
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, host, port);
+                    socket.setReceiveTimeout(settings.receive_timeout);
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        host,
+                        port_name.c_str(),
+                        description + ": " + address.toString(),
+                        std::make_unique<TCPServer>(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams));
+                },
+                start_servers);
+        }
+    }
+
+    for (const auto & listen_host : listen_hosts)
+    {
+        if (server_type.shouldStart(ServerType::Type::HTTP))
+        {
+            /// HTTP
+            constexpr auto port_name = "http_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.http_receive_timeout);
+                    socket.setSendTimeout(settings.http_send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "http://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::make_shared<HTTPContext>(global_context),
+                            createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params,
+                            ProfileEvents::InterfaceHTTPReceiveBytes,
+                            ProfileEvents::InterfaceHTTPSendBytes));
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::HTTPS))
+        {
+            /// HTTPS
+            constexpr auto port_name = "https_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+#if USE_SSL
+                    Poco::Net::SecureServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.http_receive_timeout);
+                    socket.setSendTimeout(settings.http_send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "https://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::make_shared<HTTPContext>(global_context),
+                            createHandlerFactory(server, config, async_metrics, "HTTPSHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params,
+                            ProfileEvents::InterfaceHTTPReceiveBytes,
+                            ProfileEvents::InterfaceHTTPSendBytes));
+#else
+                    UNUSED(port);
+                    throw Exception(
+                        ErrorCodes::SUPPORT_IS_DISABLED,
+                        "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP))
+        {
+            /// TCP
+            constexpr auto port_name = "tcp_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.receive_timeout);
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "native protocol (tcp): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new TCPHandlerFactory(
+                                server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                            server_pool,
+                            socket,
+                            new Poco::Net::TCPServerParams));
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
+        {
+            /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
+            constexpr auto port_name = "tcp_with_proxy_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.receive_timeout);
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "native protocol (tcp) with PROXY: " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new TCPHandlerFactory(
+                                server, false, true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                            server_pool,
+                            socket,
+                            new Poco::Net::TCPServerParams));
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
+        {
+            /// TCP with SSL
+            constexpr auto port_name = "tcp_port_secure";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+#if USE_SSL
+                    Poco::Net::SecureServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.receive_timeout);
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "secure native protocol (tcp_secure): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new TCPHandlerFactory(
+                                server, true, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                            server_pool,
+                            socket,
+                            new Poco::Net::TCPServerParams));
+#else
+                    UNUSED(port);
+                    throw Exception(
+                        ErrorCodes::SUPPORT_IS_DISABLED,
+                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::MYSQL))
+        {
+            constexpr auto port_name = "mysql_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(Poco::Timespan());
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "MySQL compatibility protocol: " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new MySQLHandlerFactory(
+                                server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes),
+                            server_pool,
+                            socket,
+                            new Poco::Net::TCPServerParams));
+                },
+                start_servers);
+        }
+
+        if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
+        {
+            constexpr auto port_name = "postgresql_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(Poco::Timespan());
+                    socket.setSendTimeout(settings.send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "PostgreSQL compatibility protocol: " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new PostgreSQLHandlerFactory(
+                                server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes),
+                            server_pool,
+                            socket,
+                            new Poco::Net::TCPServerParams));
+                },
+                start_servers);
+        }
+
+#if USE_GRPC
+        if (server_type.shouldStart(ServerType::Type::GRPC))
+        {
+            constexpr auto port_name = "grpc_port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::SocketAddress server_address(listen_host, port);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "gRPC protocol: " + server_address.toString(),
+                        std::make_unique<GRPCServer>(server, makeSocketAddress(listen_host, port, logger)));
+                },
+                start_servers);
+        }
+#endif
+        if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
+        {
+            /// Prometheus (if defined and not setup yet with http_port)
+            constexpr auto port_name = "prometheus.port";
+            createServer(
+                config,
+                listen_host,
+                port_name,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config, socket, listen_host, port);
+                    socket.setReceiveTimeout(settings.http_receive_timeout);
+                    socket.setSendTimeout(settings.http_send_timeout);
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "Prometheus: http://" + address.toString(),
+                        std::make_unique<HTTPServer>(
+                            std::make_shared<HTTPContext>(global_context),
+                            createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"),
+                            server_pool,
+                            socket,
+                            http_params,
+                            ProfileEvents::InterfacePrometheusReceiveBytes,
+                            ProfileEvents::InterfacePrometheusSendBytes));
+                },
+                start_servers);
+        }
+    }
+}
+
+size_t ProtocolServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock)
+{
+    if (servers.empty())
+    {
+        return 0;
+    }
+
+    LOG_DEBUG(logger, "Waiting for current connections to close.");
+
+    size_t current_connections = 0;
+    {
+        std::lock_guard lock(servers_lock);
+        for (auto & server : servers)
+        {
+            server.stop();
+            current_connections += server.currentConnections();
+        }
+    }
+
+    if (current_connections)
+        LOG_WARNING(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+    else
+        LOG_INFO(logger, "Closed all listening sockets.");
+
+    /// Wait for unfinished backups and restores.
+    /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries
+    /// (because killAllQueries() will cancel all running backups/restores).
+    if (server_settings.shutdown_wait_backups_and_restores)
+        global_context->waitAllBackupsAndRestores();
+    /// Killing remaining queries.
+    if (!server_settings.shutdown_wait_unfinished_queries)
+        global_context->getProcessList().killAllQueries();
+
+    if (current_connections)
+        current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
+
+    if (current_connections)
+        LOG_WARNING(
+            logger,
+            "Closed connections. But {} remain."
+            " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>",
+            current_connections);
+    else
+        LOG_INFO(logger, "Closed connections.");
+    return current_connections;
+}
+
+std::unique_ptr<TCPProtocolStackFactory> ProtocolServersManager::buildProtocolStackFromConfig(
+    const Poco::Util::AbstractConfiguration & config,
+    IServer & server,
+    const std::string & protocol,
+    Poco::Net::HTTPServerParams::Ptr http_params,
+    AsynchronousMetrics & async_metrics,
+    bool & is_secure) const
+{
+    auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
+    {
+        if (type == "tcp")
+            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(
+                server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));
+
+        if (type == "tls")
+#if USE_SSL
+            return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(server, conf_name));
+#else
+            throw Exception(
+                ErrorCodes::SUPPORT_IS_DISABLED,
+                "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+
+        if (type == "proxy1")
+            return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(server, conf_name));
+        if (type == "mysql")
+            return TCPServerConnectionFactory::Ptr(
+                new MySQLHandlerFactory(server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
+        if (type == "postgres")
+            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(
+                server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
+        if (type == "http")
+            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
+                std::make_shared<HTTPContext>(global_context),
+                http_params,
+                createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"),
+                ProfileEvents::InterfaceHTTPReceiveBytes,
+                ProfileEvents::InterfaceHTTPSendBytes));
+        if (type == "prometheus")
+            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
+                std::make_shared<HTTPContext>(global_context),
+                http_params,
+                createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"),
+                ProfileEvents::InterfacePrometheusReceiveBytes,
+                ProfileEvents::InterfacePrometheusSendBytes));
+        if (type == "interserver")
+            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
+                std::make_shared<HTTPContext>(global_context),
+                http_params,
+                createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"),
+                ProfileEvents::InterfaceInterserverReceiveBytes,
+                ProfileEvents::InterfaceInterserverSendBytes));
+
+        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
+    };
+
+    std::string conf_name = "protocols." + protocol;
+    std::string prefix = conf_name + ".";
+    std::unordered_set<std::string> pset{conf_name};
+
+    auto stack = std::make_unique<TCPProtocolStackFactory>(server, conf_name);
+
+    while (true)
+    {
+        // if there is no "type" - it's a reference to another protocol and this is just an endpoint
+        if (config.has(prefix + "type"))
+        {
+            std::string type = config.getString(prefix + "type");
+            if (type == "tls")
+            {
+                if (is_secure)
+                    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol);
+                is_secure = true;
+            }
+
+            stack->append(create_factory(type, conf_name));
+        }
+
+        if (!config.has(prefix + "impl"))
+            break;
+
+        conf_name = "protocols." + config.getString(prefix + "impl");
+        prefix = conf_name + ".";
+
+        if (!pset.insert(conf_name).second)
+            throw Exception(
+                ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
+    }
+
+    return stack;
+}
+
+}
diff --git a/src/Server/ServersManager/ProtocolServersManager.h b/src/Server/ServersManager/ProtocolServersManager.h
new file mode 100644
index 00000000000..e9eaaeb2184
--- /dev/null
+++ b/src/Server/ServersManager/ProtocolServersManager.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include <Server/ServersManager/IServersManager.h>
+#include <Server/TCPProtocolStackFactory.h>
+#include <Poco/Net/HTTPServerParams.h>
+
+namespace DB
+{
+
+class ProtocolServersManager : public IServersManager
+{
+public:
+    using IServersManager::IServersManager;
+
+    void createServers(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        std::mutex & servers_lock,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        bool start_servers,
+        const ServerType & server_type) override;
+
+    using IServersManager::stopServers;
+    size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override;
+
+private:
+    std::unique_ptr<TCPProtocolStackFactory> buildProtocolStackFromConfig(
+        const Poco::Util::AbstractConfiguration & config,
+        IServer & server,
+        const std::string & protocol,
+        Poco::Net::HTTPServerParams::Ptr http_params,
+        AsynchronousMetrics & async_metrics,
+        bool & is_secure) const;
+};
+
+}

From 27627f603fcfcd6df06bfb5210463c1fff8763c6 Mon Sep 17 00:00:00 2001
From: jsc0218 <jsc0218@gmail.com>
Date: Fri, 24 May 2024 03:04:36 +0000
Subject: [PATCH 317/392] fix

---
 .../0_stateless/02319_lightweight_delete_on_merge_tree.sql      | 2 +-
 tests/queries/0_stateless/02792_drop_projection_lwd.sql         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
index 050b8e37722..f82f79dbe44 100644
--- a/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
+++ b/tests/queries/0_stateless/02319_lightweight_delete_on_merge_tree.sql
@@ -102,7 +102,7 @@ ALTER TABLE t_proj ADD PROJECTION p_1 (SELECT avg(a), avg(b), count()) SETTINGS
 
 INSERT INTO t_proj SELECT number + 1, number + 1  FROM numbers(1000);
 
-DELETE FROM t_proj WHERE a < 100; -- { serverError BAD_ARGUMENTS }
+DELETE FROM t_proj WHERE a < 100; -- { serverError NOT_IMPLEMENTED }
 
 SELECT avg(a), avg(b), count() FROM t_proj;
 
diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql
index a1d8a9c90f3..dcde7dcc600 100644
--- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql
+++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql
@@ -7,7 +7,7 @@ CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER
 INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100);
 
 -- LWD does not work, as expected
-DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS }
+DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError NOT_IMPLEMENTED }
 KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null;
 
 -- drop projection

From 029e2ea22624f067d546317faab02f189b143df8 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 05:54:16 +0200
Subject: [PATCH 318/392] Standardize references to data type docs

---
 .../functions/arithmetic-functions.md         |  32 +-
 .../functions/array-functions.md              |  98 +++---
 .../sql-reference/functions/bit-functions.md  |  20 +-
 .../functions/bitmap-functions.md             |  14 +-
 .../functions/date-time-functions.md          | 226 ++++++-------
 .../functions/distance-functions.md           |  78 ++---
 .../functions/encoding-functions.md           |  50 +--
 .../functions/encryption-functions.md         |  44 +--
 .../functions/ext-dict-functions.md           |  32 +-
 docs/en/sql-reference/functions/files.md      |   2 +-
 .../functions/functions-for-nulls.md          |   2 +-
 .../functions/geo/coordinates.md              |   4 +-
 .../en/sql-reference/functions/geo/geohash.md |  12 +-
 docs/en/sql-reference/functions/geo/h3.md     | 214 ++++++-------
 docs/en/sql-reference/functions/geo/s2.md     |  72 ++---
 .../sql-reference/functions/hash-functions.md | 302 +++++++++---------
 docs/en/sql-reference/functions/index.md      |   4 +-
 .../sql-reference/functions/introspection.md  |  28 +-
 .../functions/ip-address-functions.md         |  26 +-
 .../sql-reference/functions/json-functions.md |  50 +--
 .../functions/logical-functions.md            |  24 +-
 .../sql-reference/functions/math-functions.md | 136 ++++----
 .../sql-reference/functions/nlp-functions.md  |  18 +-
 .../functions/other-functions.md              | 116 +++----
 .../functions/random-functions.md             |  34 +-
 .../functions/rounding-functions.md           |   8 +-
 .../functions/splitting-merging-functions.md  |  36 +--
 .../functions/string-functions.md             | 116 +++----
 .../functions/string-replace-functions.md     |   8 +-
 .../functions/string-search-functions.md      |  96 +++---
 .../functions/time-series-functions.md        |   8 +-
 .../functions/time-window-functions.md        |  10 +-
 .../functions/tuple-functions.md              |  56 ++--
 .../functions/tuple-map-functions.md          |  76 ++---
 .../functions/type-conversion-functions.md    | 168 +++++-----
 .../sql-reference/functions/ulid-functions.md |   8 +-
 .../sql-reference/functions/url-functions.md  |  32 +-
 .../sql-reference/functions/uuid-functions.md |  26 +-
 .../functions/ym-dict-functions.md            |   6 +-
 39 files changed, 1146 insertions(+), 1146 deletions(-)

diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 6515ab6d702..e3fb1d91c05 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -77,7 +77,7 @@ Alias: `a * b` (operator)
 
 ## divide
 
-Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function.
+Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../data-types/float.md). Integer division is provided by the `intDiv` function.
 
 Division by 0 returns `inf`, `-inf`, or `nan`.
 
@@ -172,8 +172,8 @@ ifNotFinite(x,y)
 
 **Arguments**
 
-- `x` — Value to check for infinity. [Float\*](../../sql-reference/data-types/float.md).
-- `y` — Fallback value. [Float\*](../../sql-reference/data-types/float.md).
+- `x` — Value to check for infinity. [Float\*](../data-types/float.md).
+- `y` — Fallback value. [Float\*](../data-types/float.md).
 
 **Returned value**
 
@@ -208,7 +208,7 @@ isNaN(x)
 
 Calculates the remainder of the division of two values `a` by `b`.
 
-The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md).
+The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../data-types/float.md).
 
 The remainder is computed like in C++. Truncated division is used for negative numbers.
 
@@ -312,7 +312,7 @@ lcm(a, b)
 
 ## max2
 
-Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md).
+Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md).
 
 **Syntax**
 
@@ -338,7 +338,7 @@ Result:
 
 ## min2
 
-Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md).
+Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../data-types/float.md).
 
 **Syntax**
 
@@ -364,7 +364,7 @@ Result:
 
 ## multiplyDecimal
 
-Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md).
+Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md).
 
 The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values.
 
@@ -378,13 +378,13 @@ multiplyDecimal(a, b[, result_scale])
 
 **Arguments**
 
-- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md).
-- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md).
-- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+- `a` — First value. [Decimal](../data-types/decimal.md).
+- `b` — Second value. [Decimal](../data-types/decimal.md).
+- `result_scale` — Scale of result. [Int/UInt](../data-types/int-uint.md).
 
 **Returned value**
 
-- The result of multiplication with given scale. [Decimal256](../../sql-reference/data-types/decimal.md).
+- The result of multiplication with given scale. [Decimal256](../data-types/decimal.md).
 
 **Example**
 
@@ -438,7 +438,7 @@ Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal m
 ## divideDecimal
 
 
-Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md).
+Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../data-types/decimal.md).
 
 The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values.
 
@@ -452,13 +452,13 @@ divideDecimal(a, b[, result_scale])
 
 **Arguments**
 
-- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md).
-- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md).
-- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md).
+- `a` — First value: [Decimal](../data-types/decimal.md).
+- `b` — Second value: [Decimal](../data-types/decimal.md).
+- `result_scale` — Scale of result: [Int/UInt](../data-types/int-uint.md).
 
 **Returned value**
 
-- The result of division with given scale. [Decimal256](../../sql-reference/data-types/decimal.md).
+- The result of division with given scale. [Decimal256](../data-types/decimal.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md
index ff716804d97..7b52fbff714 100644
--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@@ -19,7 +19,7 @@ empty([x])
 An array is considered empty if it does not contain any elements.
 
 :::note
-Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
+Can be optimized by enabling the [`optimize_functions_to_subcolumns` setting](../../operations/settings/settings.md#optimize-functions-to-subcolumns). With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`.
 :::
 
 The function also works for [strings](string-functions.md#empty) or [UUID](uuid-functions.md#empty).
@@ -61,7 +61,7 @@ notEmpty([x])
 An array is considered non-empty if it contains at least one element.
 
 :::note
-Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`.
+Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`.
 :::
 
 The function also works for [strings](string-functions.md#notempty) or [UUID](uuid-functions.md#notempty).
@@ -96,7 +96,7 @@ Returns the number of items in the array.
 The result type is UInt64.
 The function also works for strings.
 
-Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`.
+Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`.
 
 Alias: `OCTET_LENGTH`
 
@@ -577,7 +577,7 @@ arrayConcat(arrays)
 
 **Arguments**
 
-- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type.
+- `arrays` – Arbitrary number of arguments of [Array](../data-types/array.md) type.
 
 **Example**
 
@@ -1058,7 +1058,7 @@ arrayPushBack(array, single_value)
 **Arguments**
 
 - `array` – Array.
-- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
+- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
 
 **Example**
 
@@ -1083,7 +1083,7 @@ arrayPushFront(array, single_value)
 **Arguments**
 
 - `array` – Array.
-- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
+- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`.
 
 **Example**
 
@@ -1179,12 +1179,12 @@ arrayShingles(array, length)
 
 **Arguments**
 
-- `array` — Input array [Array](../../sql-reference/data-types/array.md).
+- `array` — Input array [Array](../data-types/array.md).
 - `length` — The length of each shingle.
 
 **Returned value**
 
-- An array of generated shingles. [Array](../../sql-reference/data-types/array.md).
+- An array of generated shingles. [Array](../data-types/array.md).
 
 **Examples**
 
@@ -1760,8 +1760,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN)
 
 **Arguments**
 
-- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
-- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function.
+- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md).
+- `arr` — Any number of [array](../data-types/array.md) type columns as the parameters of the aggregation function.
 
 **Returned value**
 
@@ -1829,13 +1829,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN)
 
 **Arguments**
 
-- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md).
-- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range.
-- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function.
+- `agg_func` — The name of an aggregate function which should be a constant [string](../data-types/string.md).
+- `ranges` — The ranges to aggretate which should be an [array](../data-types/array.md) of [tuples](../data-types/tuple.md) which containing the index and the length of each range.
+- `arr` — Any number of [Array](../data-types/array.md) type columns as the parameters of the aggregation function.
 
 **Returned value**
 
-- Array containing results of the aggregate function over specified ranges. [Array](../../sql-reference/data-types/array.md).
+- Array containing results of the aggregate function over specified ranges. [Array](../data-types/array.md).
 
 **Example**
 
@@ -1948,7 +1948,7 @@ Alias: `flatten`.
 
 **Parameters**
 
-- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`.
+- `array_of_arrays` — [Array](../data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`.
 
 **Examples**
 
@@ -1974,7 +1974,7 @@ arrayCompact(arr)
 
 **Arguments**
 
-`arr` — The [array](../../sql-reference/data-types/array.md) to inspect.
+`arr` — The [array](../data-types/array.md) to inspect.
 
 **Returned value**
 
@@ -2008,13 +2008,13 @@ arrayZip(arr1, arr2, ..., arrN)
 
 **Arguments**
 
-- `arrN` — [Array](../../sql-reference/data-types/array.md).
+- `arrN` — [Array](../data-types/array.md).
 
 The function can take any number of arrays of different types. All the input arrays must be of equal size.
 
 **Returned value**
 
-- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../../sql-reference/data-types/array.md).
+- Array with elements from the source arrays grouped into [tuples](../data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. [Array](../data-types/array.md).
 
 **Example**
 
@@ -2364,8 +2364,8 @@ arrayMin([func,] arr)
 
 **Arguments**
 
-- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `func` — Function. [Expression](../data-types/special-data-types/expression.md).
+- `arr` — Array. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -2421,8 +2421,8 @@ arrayMax([func,] arr)
 
 **Arguments**
 
-- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `func` — Function. [Expression](../data-types/special-data-types/expression.md).
+- `arr` — Array. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -2478,8 +2478,8 @@ arraySum([func,] arr)
 
 **Arguments**
 
-- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `func` — Function. [Expression](../data-types/special-data-types/expression.md).
+- `arr` — Array. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -2488,10 +2488,10 @@ arraySum([func,] arr)
 :::note
 Return type:
 
-- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md).
-- For floating point numbers — [Float64](../../sql-reference/data-types/float.md).
-- For numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md). 
-- For numeric signed — [Int64](../../sql-reference/data-types/int-uint.md).
+- For decimal numbers in the source array (or for converted values, if `func` is specified) — [Decimal128](../data-types/decimal.md).
+- For floating point numbers — [Float64](../data-types/float.md).
+- For numeric unsigned — [UInt64](../data-types/int-uint.md). 
+- For numeric signed — [Int64](../data-types/int-uint.md).
 :::
 
 **Examples**
@@ -2540,12 +2540,12 @@ arrayAvg([func,] arr)
 
 **Arguments**
 
-- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md).
-- `arr` — Array. [Array](../../sql-reference/data-types/array.md).
+- `func` — Function. [Expression](../data-types/special-data-types/expression.md).
+- `arr` — Array. [Array](../data-types/array.md).
 
 **Returned value**
 
-- The average of function values (or the array average). [Float64](../../sql-reference/data-types/float.md).
+- The average of function values (or the array average). [Float64](../data-types/float.md).
 
 **Examples**
 
@@ -2589,7 +2589,7 @@ arrayCumSum(arr)
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values.
+- `arr` — [Array](../data-types/array.md) of numeric values.
 
 **Returned value**
 
@@ -2621,7 +2621,7 @@ arrayCumSumNonNegative(arr)
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values.
+- `arr` — [Array](../data-types/array.md) of numeric values.
 
 **Returned value**
 
@@ -2641,7 +2641,7 @@ Note that the `arraySumNonNegative` is a [higher-order function](../../sql-refer
 
 ## arrayProduct
 
-Multiplies elements of an [array](../../sql-reference/data-types/array.md).
+Multiplies elements of an [array](../data-types/array.md).
 
 **Syntax**
 
@@ -2651,11 +2651,11 @@ arrayProduct(arr)
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values.
+- `arr` — [Array](../data-types/array.md) of numeric values.
 
 **Returned value**
 
-- A product of array's elements. [Float64](../../sql-reference/data-types/float.md).
+- A product of array's elements. [Float64](../data-types/float.md).
 
 **Examples**
 
@@ -2679,7 +2679,7 @@ Query:
 SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as res, toTypeName(res);
 ```
 
-Return value type is always [Float64](../../sql-reference/data-types/float.md). Result:
+Return value type is always [Float64](../data-types/float.md). Result:
 
 ``` text
 ┌─res─┬─toTypeName(arrayProduct(array(toDecimal64(1, 8), toDecimal64(2, 8), toDecimal64(3, 8))))─┐
@@ -2689,7 +2689,7 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md).
 
 ## arrayRotateLeft
 
-Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
+Rotates an [array](../data-types/array.md) to the left by the specified number of elements.
 If the number of elements is negative, the array is rotated to the right.
 
 **Syntax**
@@ -2700,12 +2700,12 @@ arrayRotateLeft(arr, n)
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md).
+- `arr` — [Array](../data-types/array.md).
 - `n` — Number of elements to rotate.
 
 **Returned value**
 
-- An array rotated to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
+- An array rotated to the left by the specified number of elements. [Array](../data-types/array.md).
 
 **Examples**
 
@@ -2753,7 +2753,7 @@ Result:
 
 ## arrayRotateRight
 
-Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
+Rotates an [array](../data-types/array.md) to the right by the specified number of elements.
 If the number of elements is negative, the array is rotated to the left.
 
 **Syntax**
@@ -2764,12 +2764,12 @@ arrayRotateRight(arr, n)
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md).
+- `arr` — [Array](../data-types/array.md).
 - `n` — Number of elements to rotate.
 
 **Returned value**
 
-- An array rotated to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
+- An array rotated to the right by the specified number of elements. [Array](../data-types/array.md).
 
 **Examples**
 
@@ -2817,7 +2817,7 @@ Result:
 
 ## arrayShiftLeft
 
-Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements.
+Shifts an [array](../data-types/array.md) to the left by the specified number of elements.
 New elements are filled with the provided argument or the default value of the array element type.
 If the number of elements is negative, the array is shifted to the right.
 
@@ -2829,13 +2829,13 @@ arrayShiftLeft(arr, n[, default])
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md).
+- `arr` — [Array](../data-types/array.md).
 - `n` — Number of elements to shift.
 - `default` — Optional. Default value for new elements.
 
 **Returned value**
 
-- An array shifted to the left by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
+- An array shifted to the left by the specified number of elements. [Array](../data-types/array.md).
 
 **Examples**
 
@@ -2911,7 +2911,7 @@ Result:
 
 ## arrayShiftRight
 
-Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements.
+Shifts an [array](../data-types/array.md) to the right by the specified number of elements.
 New elements are filled with the provided argument or the default value of the array element type.
 If the number of elements is negative, the array is shifted to the left.
 
@@ -2923,13 +2923,13 @@ arrayShiftRight(arr, n[, default])
 
 **Arguments**
 
-- `arr` — [Array](../../sql-reference/data-types/array.md).
+- `arr` — [Array](../data-types/array.md).
 - `n` — Number of elements to shift.
 - `default` — Optional. Default value for new elements.
 
 **Returned value**
 
-- An array shifted to the right by the specified number of elements. [Array](../../sql-reference/data-types/array.md).
+- An array shifted to the right by the specified number of elements. [Array](../data-types/array.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md
index 2538ad32022..a48893b93bf 100644
--- a/docs/en/sql-reference/functions/bit-functions.md
+++ b/docs/en/sql-reference/functions/bit-functions.md
@@ -34,8 +34,8 @@ bitShiftLeft(a, b)
 
 **Arguments**
 
-- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed.
+- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed.
 
 **Returned value**
 
@@ -81,8 +81,8 @@ bitShiftRight(a, b)
 
 **Arguments**
 
-- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed.
+- `a` — A value to shift. [Integer types](../data-types/int-uint.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `b` — The number of shift positions. [Unsigned integer types](../data-types/int-uint.md), 64 bit types or less are allowed.
 
 **Returned value**
 
@@ -131,13 +131,13 @@ bitSlice(s, offset[, length])
 
 **Arguments**
 
-- `s` — s is [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `s` — s is [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 - `offset` — The start index with bit, A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the bits begins with 1.
 - `length` — The length of substring with bit. If you specify a negative value, the function returns an open substring \[offset, array_length - length\]. If you omit the value, the function returns the substring \[offset, the_end_string\]. If length exceeds s, it will be truncate.If length isn't multiple of 8, will fill 0 on the right.
 
 **Returned value**
 
-- The substring. [String](../../sql-reference/data-types/string.md)
+- The substring. [String](../data-types/string.md)
 
 **Example**
 
@@ -362,7 +362,7 @@ bitCount(x)
 
 **Arguments**
 
-- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers.
+- `x` — [Integer](../data-types/int-uint.md) or [floating-point](../data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers.
 
 **Returned value**
 
@@ -402,12 +402,12 @@ bitHammingDistance(int1, int2)
 
 **Arguments**
 
-- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md).
-- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md).
+- `int1` — First integer value. [Int64](../data-types/int-uint.md).
+- `int2` — Second integer value. [Int64](../data-types/int-uint.md).
 
 **Returned value**
 
-- The Hamming distance. [UInt8](../../sql-reference/data-types/int-uint.md).
+- The Hamming distance. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md
index e546de039da..a5c8a663b71 100644
--- a/docs/en/sql-reference/functions/bitmap-functions.md
+++ b/docs/en/sql-reference/functions/bitmap-functions.md
@@ -75,8 +75,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
-- `range_end` – End of the range (exclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
+- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
+- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -105,8 +105,8 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `range_start` – Start of the range (inclusive). [UInt32](../../sql-reference/data-types/int-uint.md).
-- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
+- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
+- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -135,8 +135,8 @@ subBitmap(bitmap, offset, cardinality_limit)
 **Arguments**
 
 - `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild).
-- `offset` – The position of the first element of the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
-- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../../sql-reference/data-types/int-uint.md).
+- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md).
+- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -163,7 +163,7 @@ bitmapContains(bitmap, needle)
 **Arguments**
 
 - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
-- `needle` – Searched bit value. [UInt32](../../sql-reference/data-types/int-uint.md).
+- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md).
 
 **Returned values**
 
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index a1d6dbb5930..6ad26f452ad 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -26,7 +26,7 @@ SELECT
 
 ## makeDate
 
-Creates a [Date](../../sql-reference/data-types/date.md)
+Creates a [Date](../data-types/date.md)
 - from a year, month and day argument, or
 - from a year and day of year argument.
 
@@ -43,14 +43,14 @@ Alias:
 
 **Arguments**
 
-- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `day_of_year` — Day of the year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 
 **Returned value**
 
-- A date created from the arguments. [Date](../../sql-reference/data-types/date.md).
+- A date created from the arguments. [Date](../data-types/date.md).
 
 **Example**
 
@@ -83,11 +83,11 @@ Result:
 ```
 ## makeDate32
 
-Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md).
+Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md).
 
 ## makeDateTime
 
-Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument.
+Creates a [DateTime](../data-types/datetime.md) from a year, month, day, hour, minute and second argument.
 
 **Syntax**
 
@@ -97,17 +97,17 @@ makeDateTime(year, month, day, hour, minute, second[, timezone])
 
 **Arguments**
 
-- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
-- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `year` — Year. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `month` — Month. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `day` — Day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `hour` — Hour. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `minute` — Minute. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
+- `second` — Second. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
 
 **Returned value**
 
-- A date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md).
+- A date with time created from the arguments. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -125,7 +125,7 @@ Result:
 
 ## makeDateTime64
 
-Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
+Like [makeDateTime](#makedatetime) but produces a [DateTime64](../data-types/datetime64.md).
 
 **Syntax**
 
@@ -135,7 +135,7 @@ makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, t
 
 ## timestamp
 
-Converts the first argument 'expr' to type [DateTime64(6)](../../sql-reference/data-types/datetime64.md).
+Converts the first argument 'expr' to type [DateTime64(6)](../data-types/datetime64.md).
 If a second argument 'expr_time' is provided, it adds the specified time to the converted value.
 
 **Syntax**
@@ -148,8 +148,8 @@ Alias: `TIMESTAMP`
 
 **Arguments**
 
-- `expr` - Date or date with time. [String](../../sql-reference/data-types/string.md).
-- `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md).
+- `expr` - Date or date with time. [String](../data-types/string.md).
+- `expr_time` - Optional parameter. Time to add. [String](../data-types/string.md).
 
 **Examples**
 
@@ -179,7 +179,7 @@ Result:
 
 **Returned value**
 
-- [DateTime64](../../sql-reference/data-types/datetime64.md)(6)
+- [DateTime64](../data-types/datetime64.md)(6)
 
 ## timeZone
 
@@ -196,7 +196,7 @@ Alias: `timezone`.
 
 **Returned value**
 
-- Timezone. [String](../../sql-reference/data-types/string.md).
+- Timezone. [String](../data-types/string.md).
 
 **Example**
 
@@ -231,7 +231,7 @@ Alias: `serverTimezone`.
 
 **Returned value**
 
--   Timezone. [String](../../sql-reference/data-types/string.md).
+-   Timezone. [String](../data-types/string.md).
 
 **Example**
 
@@ -265,12 +265,12 @@ Alias: `toTimezone`.
 
 **Arguments**
 
-- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
+- `value` — Time or date and time. [DateTime64](../data-types/datetime64.md).
+- `timezone` — Timezone for the returned value. [String](../data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types).
 
 **Returned value**
 
-- Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
+- Date and time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -310,7 +310,7 @@ int32samoa: 1546300800
 
 ## timeZoneOf
 
-Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types.
+Returns the timezone name of [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) data types.
 
 **Syntax**
 
@@ -322,11 +322,11 @@ Alias: `timezoneOf`.
 
 **Arguments**
 
-- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-- Timezone name. [String](../../sql-reference/data-types/string.md).
+- Timezone name. [String](../data-types/string.md).
 
 **Example**
 
@@ -357,11 +357,11 @@ Alias: `timezoneOffset`.
 
 **Arguments**
 
-- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-- Offset from UTC in seconds. [Int32](../../sql-reference/data-types/int-uint.md).
+- Offset from UTC in seconds. [Int32](../data-types/int-uint.md).
 
 **Example**
 
@@ -1192,12 +1192,12 @@ toStartOfSecond(value, [timezone])
 
 **Arguments**
 
-- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
+- `value` — Date and time. [DateTime64](../data-types/datetime64.md).
+- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
 
 **Returned value**
 
-- Input value without sub-seconds. [DateTime64](../../sql-reference/data-types/datetime64.md).
+- Input value without sub-seconds. [DateTime64](../data-types/datetime64.md).
 
 **Examples**
 
@@ -1534,12 +1534,12 @@ Alias: `TO_DAYS`
 
 **Arguments**
 
-- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `date` — The date to calculate the number of days passed since year zero from. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md)
 
 **Returned value**
 
-The number of days passed since date 0000-01-01. [UInt32](../../sql-reference/data-types/int-uint.md).
+The number of days passed since date 0000-01-01. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -1563,7 +1563,7 @@ Result:
 
 Returns for a given number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) the corresponding date in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`FROM_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_from-days) function.
 
-The result is undefined if it cannot be represented within the bounds of the [Date](../../sql-reference/data-types/date.md) type.
+The result is undefined if it cannot be represented within the bounds of the [Date](../data-types/date.md) type.
 
 **Syntax**
 
@@ -1579,7 +1579,7 @@ Alias: `FROM_DAYS`
 
 **Returned value**
 
-The date corresponding to the number of days passed since year zero. [Date](../../sql-reference/data-types/date.md).
+The date corresponding to the number of days passed since year zero. [Date](../data-types/date.md).
 
 **Example**
 
@@ -1601,7 +1601,7 @@ Result:
 
 ## fromDaysSinceYearZero32
 
-Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../../sql-reference/data-types/date32.md).
+Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md).
 
 ## age
 
@@ -1618,7 +1618,7 @@ age('unit', startdate, enddate, [timezone])
 
 **Arguments**
 
-- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
+- `unit` — The type of interval for result. [String](../data-types/string.md).
     Possible values:
 
     - `nanosecond`, `nanoseconds`, `ns`
@@ -1633,15 +1633,15 @@ age('unit', startdate, enddate, [timezone])
     - `quarter`, `quarters`, `qq`, `q`
     - `year`, `years`, `yyyy`, `yy`
 
-- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
-- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
 
 **Returned value**
 
-Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md).
+Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md).
 
 **Example**
 
@@ -1694,7 +1694,7 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
 
 **Arguments**
 
-- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
+- `unit` — The type of interval for result. [String](../data-types/string.md).
     Possible values:
 
     - `nanosecond`, `nanoseconds`, `ns`
@@ -1709,15 +1709,15 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
     - `quarter`, `quarters`, `qq`, `q`
     - `year`, `years`, `yyyy`, `yy`
 
-- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `startdate` — The first time value to subtract (the subtrahend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
-- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `enddate` — The second time value to subtract from (the minuend). [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../data-types/string.md).
 
 **Returned value**
 
-Difference between `enddate` and `startdate` expressed in `unit`. [Int](../../sql-reference/data-types/int-uint.md).
+Difference between `enddate` and `startdate` expressed in `unit`. [Int](../data-types/int-uint.md).
 
 **Example**
 
@@ -1781,12 +1781,12 @@ Alias: `dateTrunc`.
 
     `unit` argument is case-insensitive.
 
-- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
+- `value` — Date and time. [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../data-types/string.md).
 
 **Returned value**
 
-- Value, truncated to the specified part of date. [DateTime](../../sql-reference/data-types/datetime.md).
+- Value, truncated to the specified part of date. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -1844,7 +1844,7 @@ Aliases: `dateAdd`, `DATE_ADD`.
 
 **Arguments**
 
-- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted.
+- `unit` — The type of interval to add. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted.
     Possible values:
 
     - `second`
@@ -1856,12 +1856,12 @@ Aliases: `dateAdd`, `DATE_ADD`.
     - `quarter`
     - `year`
 
-- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
-- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `value` — Value of interval to add. [Int](../data-types/int-uint.md).
+- `date` — The date or date with time to which `value` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -1918,7 +1918,7 @@ Aliases: `dateSub`, `DATE_SUB`.
 
 **Arguments**
 
-- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted.
+- `unit` — The type of interval to subtract. Note: This is not a [String](../data-types/string.md) and must therefore not be quoted.
 
     Possible values:
 
@@ -1931,12 +1931,12 @@ Aliases: `dateSub`, `DATE_SUB`.
     - `quarter`
     - `year`
 
-- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
-- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md).
+- `date` — The date or date with time from which `value` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -1985,9 +1985,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
 
 **Arguments**
 
-- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
-- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
+- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
+- `value` — Value of interval to add. [Int](../data-types/int-uint.md).
+- `unit` — The type of interval to add. [String](../data-types/string.md).
     Possible values:
 
     - `second`
@@ -2001,7 +2001,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
 
 **Returned value**
 
-Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time with the specified `value` expressed in `unit` added to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -2033,7 +2033,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
 
 **Arguments**
 
-- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md).
+- `unit` — The type of interval to subtract. [String](../data-types/string.md).
     Possible values:
 
     - `second`
@@ -2045,12 +2045,12 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
     - `quarter`
     - `year`
 
-- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
-- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `value` — Value of interval to subtract. [Int](../data-types/int-uint.md).
+- `date` — Date or date with time. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -2080,12 +2080,12 @@ addDate(date, interval)
 
 **Arguments**
 
-- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md)
-- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
+- `date` — The date or date with time to which `interval` is added. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md)
+- `interval` — Interval to add. [Interval](../data-types/special-data-types/interval.md).
 
 **Returned value**
 
-Date or date with time obtained by adding `interval` to `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by adding `interval` to `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -2121,12 +2121,12 @@ subDate(date, interval)
 
 **Arguments**
 
-- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), or [String](../../sql-reference/data-types/string.md)
-- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
+- `date` — The date or date with time from which `interval` is subtracted. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md), [DateTime64](../data-types/datetime64.md), or [String](../data-types/string.md)
+- `interval` — Interval to subtract. [Interval](../data-types/special-data-types/interval.md).
 
 **Returned value**
 
-Date or date with time obtained by subtracting `interval` from `date`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+Date or date with time obtained by subtracting `interval` from `date`. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -2162,11 +2162,11 @@ now([timezone])
 
 **Arguments**
 
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
 
 **Returned value**
 
-- Current date and time. [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date and time. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -2211,11 +2211,11 @@ now64([scale], [timezone])
 **Arguments**
 
 - `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ]. Typically, are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds).
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
 
 **Returned value**
 
-- Current date and time with sub-second precision. [DateTime64](../../sql-reference/data-types/datetime64.md).
+- Current date and time with sub-second precision. [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -2245,11 +2245,11 @@ nowInBlock([timezone])
 
 **Arguments**
 
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
 
 **Returned value**
 
-- Current date and time at the moment of processing of each block of data. [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date and time at the moment of processing of each block of data. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -2289,7 +2289,7 @@ today()
 
 **Returned value**
 
-- Current date. [DateTime](../../sql-reference/data-types/datetime.md).
+- Current date. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -2379,7 +2379,7 @@ Result:
 
 ## YYYYMMDDToDate
 
-Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md).
+Converts a number containing the year, month and day number to a [Date](../data-types/date.md).
 
 This function is the opposite of function `toYYYYMMDD()`.
 
@@ -2393,11 +2393,11 @@ YYYYMMDDToDate(yyyymmdd);
 
 **Arguments**
 
-- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `yyyymmdd` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 
 **Returned value**
 
-- a date created from the arguments. [Date](../../sql-reference/data-types/date.md).
+- a date created from the arguments. [Date](../data-types/date.md).
 
 **Example**
 
@@ -2415,11 +2415,11 @@ Result:
 
 ## YYYYMMDDToDate32
 
-Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md).
+Like function `YYYYMMDDToDate()` but produces a [Date32](../data-types/date32.md).
 
 ## YYYYMMDDhhmmssToDateTime
 
-Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md).
+Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../data-types/datetime.md).
 
 The output is undefined if the input does not encode a valid DateTime value.
 
@@ -2433,12 +2433,12 @@ YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]);
 
 **Arguments**
 
-- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 - `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional).
 
 **Returned value**
 
-- a date with time created from the arguments. [DateTime](../../sql-reference/data-types/datetime.md).
+- a date with time created from the arguments. [DateTime](../data-types/datetime.md).
 
 **Example**
 
@@ -2456,7 +2456,7 @@ Result:
 
 ## YYYYMMDDhhmmssToDateTime64
 
-Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
+Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../data-types/datetime64.md).
 
 Accepts an additional, optional `precision` parameter after the `timezone` parameter.
 
@@ -3453,7 +3453,7 @@ Formats a Time according to the given Format string. Format is a constant expres
 
 formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.
 
-The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
+The opposite operation of this function is [parseDateTime](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).
 
 Alias: `DATE_FORMAT`.
 
@@ -3579,7 +3579,7 @@ LIMIT 10
 
 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
 
-The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).
+The opposite operation of this function is [parseDateTimeInJodaSyntax](../functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).
 
 **Replacement fields**
 
@@ -3639,13 +3639,13 @@ dateName(date_part, date)
 
 **Arguments**
 
-- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
-- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md).
+- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../data-types/string.md).
+- `date` — Date. [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
+- `timezone` — Timezone. Optional. [String](../data-types/string.md).
 
 **Returned value**
 
-- The specified part of date. [String](../../sql-reference/data-types/string.md#string)
+- The specified part of date. [String](../data-types/string.md#string)
 
 **Example**
 
@@ -3677,11 +3677,11 @@ monthName(date)
 
 **Arguments**
 
-- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `date` — Date or date with time. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-- The name of the month. [String](../../sql-reference/data-types/string.md#string)
+- The name of the month. [String](../data-types/string.md#string)
 
 **Example**
 
@@ -3704,7 +3704,7 @@ This function converts a Unix timestamp to a calendar date and a time of a day.
 
 It can be called in two ways:
 
-When given a single argument of type [Integer](../../sql-reference/data-types/int-uint.md), it returns a value of type [DateTime](../../sql-reference/data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
+When given a single argument of type [Integer](../data-types/int-uint.md), it returns a value of type [DateTime](../data-types/datetime.md), i.e. behaves like [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime).
 
 Alias: `FROM_UNIXTIME`.
 
@@ -3722,7 +3722,7 @@ Result:
 └──────────────────────────────┘
 ```
 
-When given two or three arguments where the first argument is a value of type [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../../sql-reference/data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
+When given two or three arguments where the first argument is a value of type [Integer](../data-types/int-uint.md), [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), the second argument is a constant format string and the third argument is an optional constant time zone string, the function returns a value of type [String](../data-types/string.md#string), i.e. it behaves like [formatDateTime](#formatdatetime). In this case, [MySQL's datetime format style](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format) is used.
 
 **Example:**
 
@@ -3772,11 +3772,11 @@ toModifiedJulianDay(date)
 
 **Arguments**
 
-- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
-- Modified Julian Day number. [Int32](../../sql-reference/data-types/int-uint.md).
+- Modified Julian Day number. [Int32](../data-types/int-uint.md).
 
 **Example**
 
@@ -3804,11 +3804,11 @@ toModifiedJulianDayOrNull(date)
 
 **Arguments**
 
-- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `date` — Date in text form. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
-- Modified Julian Day number. [Nullable(Int32)](../../sql-reference/data-types/int-uint.md).
+- Modified Julian Day number. [Nullable(Int32)](../data-types/int-uint.md).
 
 **Example**
 
@@ -3836,11 +3836,11 @@ fromModifiedJulianDay(day)
 
 **Arguments**
 
-- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
+- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md).
 
 **Returned value**
 
-- Date in text form. [String](../../sql-reference/data-types/string.md)
+- Date in text form. [String](../data-types/string.md)
 
 **Example**
 
@@ -3868,11 +3868,11 @@ fromModifiedJulianDayOrNull(day)
 
 **Arguments**
 
-- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md).
+- `day` — Modified Julian Day number. [Any integral types](../data-types/int-uint.md).
 
 **Returned value**
 
-- Date in text form. [Nullable(String)](../../sql-reference/data-types/string.md)
+- Date in text form. [Nullable(String)](../data-types/string.md)
 
 **Example**
 
@@ -3900,8 +3900,8 @@ toUTCTimestamp(time_val, time_zone)
 
 **Arguments**
 
-- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
-- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md)
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md)
 
 **Returned value**
 
@@ -3933,8 +3933,8 @@ fromUTCTimestamp(time_val, time_zone)
 
 **Arguments**
 
-- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
-- `time_zone` — A String type const value or an expression represent the time zone. [String types](../../sql-reference/data-types/string.md)
+- `time_val` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md)
+- `time_zone` — A String type const value or an expression represent the time zone. [String types](../data-types/string.md)
 
 **Returned value**
 
@@ -3965,8 +3965,8 @@ timeDiff(first_datetime, second_datetime)
 
 *Arguments**
 
-- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
-- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
+- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md)
+- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../data-types/datetime.md)
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md
index 9fda491ac50..a455d0af91b 100644
--- a/docs/en/sql-reference/functions/distance-functions.md
+++ b/docs/en/sql-reference/functions/distance-functions.md
@@ -20,11 +20,11 @@ Alias: `normL1`.
 
 **Arguments**
 
-- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 
 **Examples**
 
@@ -56,11 +56,11 @@ Alias: `normL2`.
 
 **Arguments**
 
-- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../../sql-reference/data-types/float.md).
+- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). [Float](../data-types/float.md).
 
 **Example**
 
@@ -91,11 +91,11 @@ Alias: `normL2Squared`.
 
 ***Arguments**
 
-- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- L2-norm squared. [Float](../../sql-reference/data-types/float.md).
+- L2-norm squared. [Float](../data-types/float.md).
 
 **Example**
 
@@ -127,11 +127,11 @@ Alias: `normLinf`.
 
 **Arguments**
 
-- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- Linf-norm or the maximum absolute value. [Float](../../sql-reference/data-types/float.md).
+- Linf-norm or the maximum absolute value. [Float](../data-types/float.md).
 
 **Example**
 
@@ -163,12 +163,12 @@ Alias: `normLp`.
 
 **Arguments**
 
-- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
+- `vector` — [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md).
 
 **Returned value**
 
-- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../../sql-reference/data-types/float.md).
+- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm). [Float](../data-types/float.md).
 
 **Example**
 
@@ -200,12 +200,12 @@ Alias: `distanceL1`.
 
 **Arguments**
 
-- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- 1-norm distance. [Float](../../sql-reference/data-types/float.md).
+- 1-norm distance. [Float](../data-types/float.md).
 
 **Example**
 
@@ -237,12 +237,12 @@ Alias: `distanceL2`.
 
 **Arguments**
 
-- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- 2-norm distance. [Float](../../sql-reference/data-types/float.md).
+- 2-norm distance. [Float](../data-types/float.md).
 
 **Example**
 
@@ -274,12 +274,12 @@ Alias: `distanceL2Squared`.
 
 **Arguments**
 
-- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../../sql-reference/data-types/float.md).
+- Sum of the squares of the difference between the corresponding elements of two vectors. [Float](../data-types/float.md).
 
 **Example**
 
@@ -311,12 +311,12 @@ Alias: `distanceLinf`.
 
 **Arguments**
 
-- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector1` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- Infinity-norm distance. [Float](../../sql-reference/data-types/float.md).
+- Infinity-norm distance. [Float](../data-types/float.md).
 
 **Example**
 
@@ -348,13 +348,13 @@ Alias: `distanceLp`.
 
 **Arguments**
 
-- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
+- `vector1` — First vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector2` — Second vector. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md).
 
 **Returned value**
 
-- p-norm distance. [Float](../../sql-reference/data-types/float.md).
+- p-norm distance. [Float](../data-types/float.md).
 
 **Example**
 
@@ -387,11 +387,11 @@ Alias: `normalizeL1`.
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md).
 
 **Example**
 
@@ -423,11 +423,11 @@ Alias: `normalizeL1`.
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md).
 
 **Example**
 
@@ -459,11 +459,11 @@ Alias: `normalizeLinf `.
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md).
 
 **Example**
 
@@ -495,12 +495,12 @@ Alias: `normalizeLp `.
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
-- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
+- `p` — The power. Possible values: any number from [1;inf). [UInt](../data-types/int-uint.md) or [Float](../data-types/float.md).
 
 **Returned value**
 
-- Unit vector. [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md).
+- Unit vector. [Tuple](../data-types/tuple.md) of [Float](../data-types/float.md).
 
 **Example**
 
@@ -530,12 +530,12 @@ cosineDistance(vector1, vector2)
 
 **Arguments**
 
-- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
-- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md).
+- `vector1` — First tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
+- `vector2` — Second tuple. [Tuple](../data-types/tuple.md) or [Array](../data-types/array.md).
 
 **Returned value**
 
-- Cosine of the angle between two vectors subtracted from one. [Float](../../sql-reference/data-types/float.md).
+- Cosine of the angle between two vectors subtracted from one. [Float](../data-types/float.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index bc64fdea427..408b605727d 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -18,7 +18,7 @@ char(number_1, [number_2, ..., number_n]);
 
 **Arguments**
 
-- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md).
+- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../data-types/int-uint.md), [Float](../data-types/float.md).
 
 **Returned value**
 
@@ -86,21 +86,21 @@ The function is using uppercase letters `A-F` and not using any prefixes (like `
 
 For integer arguments, it prints hex digits (“nibbles”) from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints both digits of every byte even if the leading digit is zero.
 
-Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
+Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for Date and the value of Unix Timestamp for DateTime).
 
-For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
+For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as two hexadecimal numbers. Zero bytes are not omitted.
 
-Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
+Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
 
 Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string.
 
 **Arguments**
 
-- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `arg` — A value to convert to hexadecimal. Types: [String](../data-types/string.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md).
 
 **Returned value**
 
-- A string with the hexadecimal representation of the argument. [String](../../sql-reference/data-types/string.md).
+- A string with the hexadecimal representation of the argument. [String](../data-types/string.md).
 
 **Examples**
 
@@ -181,13 +181,13 @@ unhex(arg)
 
 **Arguments**
 
-- `arg` — A string containing any number of hexadecimal digits. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `arg` — A string containing any number of hexadecimal digits. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md).
 
 Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex().
 
 **Returned value**
 
-- A binary string (BLOB). [String](../../sql-reference/data-types/string.md).
+- A binary string (BLOB). [String](../data-types/string.md).
 
 **Example**
 
@@ -231,21 +231,21 @@ Alias: `BIN`.
 
 For integer arguments, it prints bin digits from the most significant to least significant (big-endian or “human-readable” order). It starts with the most significant non-zero byte (leading zero bytes are omitted) but always prints eight digits of every byte if the leading digit is zero.
 
-Values of type [Date](../../sql-reference/data-types/date.md) and [DateTime](../../sql-reference/data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`).
+Values of type [Date](../data-types/date.md) and [DateTime](../data-types/datetime.md) are formatted as corresponding integers (the number of days since Epoch for `Date` and the value of Unix Timestamp for `DateTime`).
 
-For [String](../../sql-reference/data-types/string.md) and [FixedString](../../sql-reference/data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
+For [String](../data-types/string.md) and [FixedString](../data-types/fixedstring.md), all bytes are simply encoded as eight binary numbers. Zero bytes are not omitted.
 
-Values of [Float](../../sql-reference/data-types/float.md) and [Decimal](../../sql-reference/data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
+Values of [Float](../data-types/float.md) and [Decimal](../data-types/decimal.md) types are encoded as their representation in memory. As we support little-endian architecture, they are encoded in little-endian. Zero leading/trailing bytes are not omitted.
 
 Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order string.
 
 **Arguments**
 
-- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md).
+- `arg` — A value to convert to binary. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md), [Date](../data-types/date.md), or [DateTime](../data-types/datetime.md).
 
 **Returned value**
 
-- A string with the binary representation of the argument. [String](../../sql-reference/data-types/string.md).
+- A string with the binary representation of the argument. [String](../data-types/string.md).
 
 **Examples**
 
@@ -330,11 +330,11 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to
 
 **Arguments**
 
-- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md).
+- `arg` — A string containing any number of binary digits. [String](../data-types/string.md).
 
 **Returned value**
 
-- A binary string (BLOB). [String](../../sql-reference/data-types/string.md).
+- A binary string (BLOB). [String](../data-types/string.md).
 
 **Examples**
 
@@ -386,11 +386,11 @@ bitPositionsToArray(arg)
 
 **Arguments**
 
-- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md).
+- `arg` — Integer value. [Int/UInt](../data-types/int-uint.md).
 
 **Returned value**
 
-- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- An array containing a list of positions of bits that equal `1`, in ascending order. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -442,11 +442,11 @@ mortonEncode(args)
 
 **Parameters**
 
-- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type.
 
 **Returned value**
 
-- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md)
+- A UInt64 code. [UInt64](../data-types/int-uint.md)
 
 **Example**
 
@@ -463,7 +463,7 @@ Result:
 
 ### Expanded mode
 
-Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
+Accepts a range mask ([tuple](../data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../data-types/int-uint.md) as other arguments.
 
 Each number in the mask configures the amount of range expansion:<br/>
 1 - no expansion<br/>
@@ -480,13 +480,13 @@ mortonEncode(range_mask, args)
 
 **Parameters**
 - `range_mask`: 1-8.
-- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type.
 
 Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. 
 
 **Returned value**
 
-- A UInt64 code. [UInt64](../../sql-reference/data-types/int-uint.md)
+- A UInt64 code. [UInt64](../data-types/int-uint.md)
 
 
 **Example**
@@ -579,7 +579,7 @@ Result:
 
 **implementation details**
 
-Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
+Please note that you can fit only so many bits of information into Morton code as [UInt64](../data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
 
 ## mortonDecode
 
@@ -601,11 +601,11 @@ mortonDecode(tuple_size, code)
 
 **Parameters**
 - `tuple_size`: integer value no more than 8.
-- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
+- `code`: [UInt64](../data-types/int-uint.md) code.
 
 **Returned value**
 
-- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. [UInt64](../../sql-reference/data-types/int-uint.md)
+- [tuple](../data-types/tuple.md) of the specified size. [UInt64](../data-types/int-uint.md)
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index 00c9ef376d3..5d82e26eb32 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -30,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
 
 **Arguments**
 
-- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
-- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string).
-- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string).
-- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string).
-- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
+- `mode` — Encryption mode. [String](../data-types/string.md#string).
+- `plaintext` — Text that need to be encrypted. [String](../data-types/string.md#string).
+- `key` — Encryption key. [String](../data-types/string.md#string).
+- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../data-types/string.md#string).
+- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string).
 
 **Returned value**
 
-- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
+- Ciphertext binary string. [String](../data-types/string.md#string).
 
 **Examples**
 
@@ -123,14 +123,14 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
 
 **Arguments**
 
-- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
-- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
-- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
-- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
+- `mode` — Encryption mode. [String](../data-types/string.md#string).
+- `plaintext` — Text that needs to be encrypted. [String](../data-types/string.md#string).
+- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../data-types/string.md#string).
+- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../data-types/string.md#string).
 
 **Returned value**
 
-- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
+- Ciphertext binary string. [String](../data-types/string.md#string).
 
 **Examples**
 
@@ -230,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
 
 **Arguments**
 
-- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
-- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
-- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
-- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string).
-- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string).
+- `mode` — Decryption mode. [String](../data-types/string.md#string).
+- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string).
+- `key` — Decryption key. [String](../data-types/string.md#string).
+- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../data-types/string.md#string).
+- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../data-types/string.md#string).
 
 **Returned value**
 
-- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
+- Decrypted String. [String](../data-types/string.md#string).
 
 **Examples**
 
@@ -361,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
 
 **Arguments**
 
-- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string).
-- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string).
-- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string).
-- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string).
+- `mode` — Decryption mode. [String](../data-types/string.md#string).
+- `ciphertext` — Encrypted text that needs to be decrypted. [String](../data-types/string.md#string).
+- `key` — Decryption key. [String](../data-types/string.md#string).
+- `iv` — Initialization vector. Optional. [String](../data-types/string.md#string).
 
 **Returned value**
 
-- Decrypted String. [String](../../sql-reference/data-types/string.md#string).
+- Decrypted String. [String](../data-types/string.md#string).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 41657aafbbe..82c21ce40c8 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -25,9 +25,9 @@ dictGetOrNull('dict_name', attr_name, id_expr)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
-- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
-- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute.
+- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
+- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration.
+- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute.
 
 **Returned value**
 
@@ -239,7 +239,7 @@ dictHas('dict_name', id_expr)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
+- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration.
 
 **Returned value**
 
@@ -259,11 +259,11 @@ dictGetHierarchy('dict_name', key)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
+- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value.
 
 **Returned value**
 
-- Parents for the key. [Array(UInt64)](../../sql-reference/data-types/array.md).
+- Parents for the key. [Array(UInt64)](../data-types/array.md).
 
 ## dictIsIn
 
@@ -276,8 +276,8 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
-- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
+- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value.
+- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value.
 
 **Returned value**
 
@@ -297,11 +297,11 @@ dictGetChildren(dict_name, key)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
+- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value.
 
 **Returned values**
 
-- First-level descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- First-level descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -344,12 +344,12 @@ dictGetDescendants(dict_name, key, level)
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value.
-- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md)-type value.
+- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../data-types/int-uint.md).
 
 **Returned values**
 
-- Descendants for the key. [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)).
+- Descendants for the key. [Array](../data-types/array.md)([UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -409,8 +409,8 @@ dictGetAll('dict_name', attr_names, id_expr[, limit])
 **Arguments**
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
-- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
+- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)).
+- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning array of dictionary key-type value or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration.
 - `limit` - Maximum length for each value array returned. When truncating, child nodes are given precedence over parent nodes, and otherwise the defined list order for the regexp tree dictionary is respected. If unspecified, array length is unlimited.
 
 **Returned value**
@@ -499,7 +499,7 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
 
 - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
 - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal).
-- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration.
+- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../data-types/int-uint.md) or [Tuple](../data-types/tuple.md)-type value depending on the dictionary configuration.
 - `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute.
 
 **Returned value**
diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md
index d62cd1db88d..ac9e21cd416 100644
--- a/docs/en/sql-reference/functions/files.md
+++ b/docs/en/sql-reference/functions/files.md
@@ -19,7 +19,7 @@ file(path[, default])
 **Arguments**
 
 - `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings.
-- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
+- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md
index 90520145b9d..a0dfbebc8ae 100644
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@@ -351,7 +351,7 @@ Result:
 
 ## assumeNotNull
 
-Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
+Returns the corresponding non-`Nullable` value for a value of [Nullable](../data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`.
 
 ``` sql
 assumeNotNull(x)
diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md
index 1cbc1933206..d10573b8995 100644
--- a/docs/en/sql-reference/functions/geo/coordinates.md
+++ b/docs/en/sql-reference/functions/geo/coordinates.md
@@ -152,8 +152,8 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...)
 
 **Input values**
 
-- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers.
-- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
+- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../data-types/tuple.md) — A tuple of two numbers.
+- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant.
 - The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons.
 
 **Returned values**
diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md
index 80c55650b9c..8abc8006e5d 100644
--- a/docs/en/sql-reference/functions/geo/geohash.md
+++ b/docs/en/sql-reference/functions/geo/geohash.md
@@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi
 
 **Arguments**
 
-- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md).
-- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md).
-- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../../sql-reference/data-types/float.md).
-- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../../sql-reference/data-types/float.md).
-- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md).
+- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md).
+- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. [Float](../../data-types/float.md).
+- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. [Float](../../data-types/float.md).
+- `precision` — Geohash precision. Range: `[1, 12]`. [UInt8](../../data-types/int-uint.md).
 
 :::note    
 All coordinate parameters must be of the same type: either `Float32` or `Float64`.
@@ -86,7 +86,7 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64
 
 **Returned values**
 
-- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)).
+- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. [Array](../../data-types/array.md)([String](../../data-types/string.md)).
 - `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values.
 
 :::note    
diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md
index 7faff8288b3..bcdd457964a 100644
--- a/docs/en/sql-reference/functions/geo/h3.md
+++ b/docs/en/sql-reference/functions/geo/h3.md
@@ -26,12 +26,12 @@ h3IsValid(h3index)
 
 **Parameter**
 
-- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- 1 — The number is a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- 0 — The number is not a valid H3 index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — The number is a valid H3 index. [UInt8](../../data-types/int-uint.md).
+- 0 — The number is not a valid H3 index. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -61,12 +61,12 @@ h3GetResolution(h3index)
 
 **Parameter**
 
-- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
+- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -96,11 +96,11 @@ h3EdgeAngle(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -130,11 +130,11 @@ h3EdgeLengthM(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -164,11 +164,11 @@ h3EdgeLengthKm(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`.
+- `resolution` — Index resolution. [UInt8](../../data-types/int-uint.md). Range: `[0, 15]`.
 
 **Returned values**
 
-- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../../sql-reference/data-types/float.md).
+- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -198,14 +198,14 @@ geoToH3(lon, lat, resolution)
 
 **Arguments**
 
-- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md).
-- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `lon` — Longitude. [Float64](../../data-types/float.md).
+- `lat` — Latitude. [Float64](../../data-types/float.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- 0 in case of error. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- 0 in case of error. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -235,11 +235,11 @@ h3ToGeo(h3Index)
 
 **Arguments**
 
-- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
+- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../data-types/float.md). `lat` — Latitude. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -269,11 +269,11 @@ h3ToGeoBoundary(h3Index)
 
 **Arguments**
 
-- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `h3Index` — H3 Index. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
+- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)).
 
 
 **Example**
@@ -304,12 +304,12 @@ h3kRing(h3index, k)
 
 **Arguments**
 
-- `h3index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `k` — Radius. [integer](../../../sql-reference/data-types/int-uint.md)
+- `h3index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `k` — Radius. [integer](../../data-types/int-uint.md)
 
 **Returned values**
 
-- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -345,11 +345,11 @@ h3GetBaseCell(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Hexagon base cell number. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Hexagon base cell number. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -379,11 +379,11 @@ h3HexAreaM2(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Area in square meters. [Float64](../../../sql-reference/data-types/float.md).
+- Area in square meters. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -413,11 +413,11 @@ h3HexAreaKm2(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Area in square kilometers. [Float64](../../../sql-reference/data-types/float.md).
+- Area in square kilometers. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -447,13 +447,13 @@ h3IndexesAreNeighbors(index1, index2)
 
 **Arguments**
 
-- `index1` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `index2` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index1` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `index2` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Indexes are neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- `0` — Indexes are not neighbours. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Indexes are neighbours. [UInt8](../../data-types/int-uint.md).
+- `0` — Indexes are not neighbours. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -483,12 +483,12 @@ h3ToChildren(index, resolution)
 
 **Arguments**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Array of the child H3-indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of the child H3-indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -518,12 +518,12 @@ h3ToParent(index, resolution)
 
 **Arguments**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Parent H3 index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Parent H3 index. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -551,11 +551,11 @@ h3ToString(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- String representation of the H3 index. [String](../../../sql-reference/data-types/string.md).
+- String representation of the H3 index. [String](../../data-types/string.md).
 
 **Example**
 
@@ -585,11 +585,11 @@ stringToH3(index_str)
 
 **Parameter**
 
-- `index_str` — String representation of the H3 index. [String](../../../sql-reference/data-types/string.md).
+- `index_str` — String representation of the H3 index. [String](../../data-types/string.md).
 
 **Returned value**
 
-- Hexagon index number. Returns 0 on error. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Hexagon index number. Returns 0 on error. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -619,11 +619,11 @@ h3GetResolution(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -653,12 +653,12 @@ h3IsResClassIII(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Index has a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Index has a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md).
+- `0` — Index doesn't have a resolution with Class III orientation. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -688,12 +688,12 @@ h3IsPentagon(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — Index represents a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — Index represents a pentagonal cell. [UInt8](../../data-types/int-uint.md).
+- `0` — Index doesn't represent a pentagonal cell. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -723,11 +723,11 @@ h3GetFaces(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Array containing icosahedron faces intersected by a given H3 index. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array containing icosahedron faces intersected by a given H3 index. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -757,11 +757,11 @@ h3CellAreaM2(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Cell area in square meters. [Float64](../../../sql-reference/data-types/float.md).
+- Cell area in square meters. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -791,11 +791,11 @@ h3CellAreaRads2(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Cell area in square radians. [Float64](../../../sql-reference/data-types/float.md).
+- Cell area in square radians. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -825,12 +825,12 @@ h3ToCenterChild(index, resolution)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned values**
 
-- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -860,11 +860,11 @@ h3ExactEdgeLengthM(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in meters. [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in meters. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -894,11 +894,11 @@ h3ExactEdgeLengthKm(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in kilometers. [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in kilometers. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -928,11 +928,11 @@ h3ExactEdgeLengthRads(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Exact edge length in radians. [Float64](../../../sql-reference/data-types/float.md).
+- Exact edge length in radians. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -962,11 +962,11 @@ h3NumHexagons(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Number of H3 indices. [Int64](../../../sql-reference/data-types/int-uint.md).
+- Number of H3 indices. [Int64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -996,12 +996,12 @@ h3PointDistM(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in meters.[Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in meters.[Float64](../../data-types/float.md).
 
 **Example**
 
@@ -1031,12 +1031,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in kilometers. [Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in kilometers. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -1066,12 +1066,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2)
 
 **Arguments**
 
-- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../../sql-reference/data-types/float.md).
-- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. [Float64](../../data-types/float.md).
+- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. [Float64](../../data-types/float.md).
 
 **Returned values**
 
-- Haversine or great circle distance in radians. [Float64](../../../sql-reference/data-types/float.md).
+- Haversine or great circle distance in radians. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -1101,7 +1101,7 @@ h3GetRes0Indexes()
 
 **Returned values**
 
-- Array of all the resolution 0 H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of all the resolution 0 H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 
 **Example**
@@ -1132,11 +1132,11 @@ h3GetPentagonIndexes(resolution)
 
 **Parameter**
 
-- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `resolution` — Index resolution. Range: `[0, 15]`. [UInt8](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Array of all pentagon H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of all pentagon H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -1166,12 +1166,12 @@ h3Line(start,end)
 
 **Parameter**
 
-- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md).
+- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+Array of h3 indexes representing the line of indices between the two provided indices. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -1201,12 +1201,12 @@ h3Distance(start,end)
 
 **Parameter**
 
-- `start` — Hexagon index number that represents a starting point. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `end` — Hexagon index number that represents an ending point. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `start` — Hexagon index number that represents a starting point. [UInt64](../../data-types/int-uint.md).
+- `end` — Hexagon index number that represents an ending point. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Number of grid cells. [Int64](../../../sql-reference/data-types/int-uint.md).
+- Number of grid cells. [Int64](../../data-types/int-uint.md).
 
 Returns a negative number if finding the distance fails.
 
@@ -1240,12 +1240,12 @@ h3HexRing(index, k)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents the origin. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `k` — Distance. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents the origin. [UInt64](../../data-types/int-uint.md).
+- `k` — Distance. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- Array of H3 indexes. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- Array of H3 indexes. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -1275,12 +1275,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex)
 
 **Parameter**
 
-- `originIndex` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `destinationIndex` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `originIndex` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `destinationIndex` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Unidirectional Edge Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Unidirectional Edge Hexagon Index number. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -1310,12 +1310,12 @@ h3UnidirectionalEdgeisValid(index)
 
 **Parameter**
 
-- `index` — Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- 1 — The H3 index is a valid unidirectional edge. [UInt8](../../data-types/int-uint.md).
+- 0 — The H3 index is not a valid unidirectional edge. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -1345,11 +1345,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Origin Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Origin Hexagon Index number. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -1379,11 +1379,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Destination Hexagon Index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- Destination Hexagon Index number. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -1413,14 +1413,14 @@ h3GetIndexesFromUnidirectionalEdge(edge)
 
 **Parameter**
 
-- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `edge` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
 A tuple consisting of two values `tuple(origin,destination)`:
 
-- `origin` — Origin Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `destination` — Destination Hexagon index number. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `origin` — Origin Hexagon index number. [UInt64](../../data-types/int-uint.md).
+- `destination` — Destination Hexagon index number. [UInt64](../../data-types/int-uint.md).
 
 Returns `(0,0)` if the provided input is not valid.
 
@@ -1452,11 +1452,11 @@ h3GetUnidirectionalEdgesFromHexagon(index)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-Array of h3 indexes representing each unidirectional edge. [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+Array of h3 indexes representing each unidirectional edge. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -1486,11 +1486,11 @@ h3GetUnidirectionalEdgeBoundary(index)
 
 **Parameter**
 
-- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `index` — Hexagon index number that represents a unidirectional edge. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- Array of pairs '(lon, lat)'. [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)).
+- Array of pairs '(lon, lat)'. [Array](../../data-types/array.md)([Float64](../../data-types/float.md), [Float64](../../data-types/float.md)).
 
 
 **Example**
diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md
index 2158ef2d57d..3165b21318b 100644
--- a/docs/en/sql-reference/functions/geo/s2.md
+++ b/docs/en/sql-reference/functions/geo/s2.md
@@ -21,12 +21,12 @@ geoToS2(lon, lat)
 
 **Arguments**
 
-- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md).
-- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
+- `lon` — Longitude. [Float64](../../data-types/float.md).
+- `lat` — Latitude. [Float64](../../data-types/float.md).
 
 **Returned values**
 
-- S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- S2 point index. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -56,13 +56,13 @@ s2ToGeo(s2index)
 
 **Arguments**
 
-- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
 - A [tuple](../../data-types/tuple.md) consisting of two values: 
-    - `lon`. [Float64](../../../sql-reference/data-types/float.md).
-    - `lat`. [Float64](../../../sql-reference/data-types/float.md).
+    - `lon`. [Float64](../../data-types/float.md).
+    - `lat`. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -92,11 +92,11 @@ s2GetNeighbors(s2index)
 
 **Arguments**
 
-- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2index` — S2 Index. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)).
+- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
 
 **Example**
 
@@ -126,12 +126,12 @@ s2CellsIntersect(s2index1, s2index2)
 
 **Arguments**
 
-- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — If the cells intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- `0` — If the cells don't intersect. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — If the cells intersect. [UInt8](../../data-types/int-uint.md).
+- `0` — If the cells don't intersect. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -161,14 +161,14 @@ s2CapContains(center, degrees, point)
 
 **Arguments**
 
-- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
-- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `center` — S2 point index corresponding to the cap. [UInt64](../../data-types/int-uint.md).
+- `degrees` — Radius of the cap in degrees. [Float64](../../data-types/float.md).
+- `point` — S2 point index. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
-- `1` — If the cap contains the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
-- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../../sql-reference/data-types/int-uint.md).
+- `1` — If the cap contains the S2 point index. [UInt8](../../data-types/int-uint.md).
+- `0` — If the cap doesn't contain the S2 point index. [UInt8](../../data-types/int-uint.md).
 
 **Example**
 
@@ -198,13 +198,13 @@ s2CapUnion(center1, radius1, center2, radius2)
 
 **Arguments**
 
-- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
+- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../data-types/int-uint.md).
+- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../data-types/float.md).
 
 **Returned values**
 
-- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../../sql-reference/data-types/float.md).
+- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. [UInt64](../../data-types/int-uint.md).
+- `radius` — Radius of the smallest cap containing the two input caps. [Float64](../../data-types/float.md).
 
 **Example**
 
@@ -234,14 +234,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
 
 **Arguments**
 
-- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../../sql-reference/data-types/float.md).
+- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. [UInt64](../../data-types/float.md).
 
 **Example**
 
@@ -271,9 +271,9 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
 
 **Arguments**
 
-- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2Point` — Target S2 point index. [UInt64](../../data-types/int-uint.md).
 
 **Returned value**
 
@@ -308,13 +308,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
 
 **Arguments**
 
-- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
@@ -344,13 +344,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin
 
 **Arguments**
 
-- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../data-types/int-uint.md).
+- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../data-types/int-uint.md).
 
 **Returned values**
 
-- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md).
-- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../../sql-reference/data-types/int-uint.md).
+- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md).
+- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. [UInt64](../../data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index e3968a691a8..506114038f7 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -12,7 +12,7 @@ Simhash is a hash function, which returns close hash values for close (similar)
 
 ## halfMD5
 
-[Interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
+[Interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
 
 ```sql
 halfMD5(par1, ...)
@@ -23,11 +23,11 @@ Consider using the [sipHash64](#siphash64) function instead.
 
 **Arguments**
 
-The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Example**
 
@@ -61,7 +61,7 @@ sipHash64(par1,...)
 
 This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function.
 
-The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
+The function [interprets](../functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
 
 1. The first and the second hash value are concatenated to an array which is hashed.
 2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
@@ -69,11 +69,11 @@ The function [interprets](/docs/en/sql-reference/functions/type-conversion-funct
 
 **Arguments**
 
-The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md).
+The function takes a variable number of input parameters of any of the [supported data types](../data-types/index.md).
 
 **Returned Value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data.
 
@@ -105,7 +105,7 @@ Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64
 
 **Returned value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Example**
 
@@ -143,7 +143,7 @@ Same as for [sipHash64](#siphash64).
 
 **Returned value**
 
-A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -183,7 +183,7 @@ Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt
 
 **Returned value**
 
-A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -217,7 +217,7 @@ Same as for [sipHash128](#siphash128).
 
 **Returned value**
 
-A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -251,7 +251,7 @@ Same as [sipHash128Reference](#siphash128reference), but the first argument is a
 
 **Returned value**
 
-A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `SipHash` hash value of type [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -283,11 +283,11 @@ Note that Google changed the algorithm of CityHash after it has been added to Cl
 
 **Arguments**
 
-The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Examples**
 
@@ -321,7 +321,7 @@ It works faster than intHash32. Average quality.
 
 ## SHA1, SHA224, SHA256, SHA512, SHA512_256
 
-Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+Calculates SHA-1, SHA-224, SHA-256, SHA-512, SHA-512-256 hash from a string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md).
 
 **Syntax**
 
@@ -337,15 +337,15 @@ Even in these cases, we recommend applying the function offline and pre-calculat
 
 **Arguments**
 
-- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md).
+- `s` — Input string for SHA hash calculation. [String](../data-types/string.md).
 
 **Returned value**
 
-- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). [FixedString](../data-types/fixedstring.md).
 
 **Example**
 
-Use the [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
+Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
 
 Query:
 
@@ -363,7 +363,7 @@ Result:
 
 ## BLAKE3
 
-Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+Calculates BLAKE3 hash string and returns the resulting set of bytes as [FixedString](../data-types/fixedstring.md).
 
 **Syntax**
 
@@ -375,15 +375,15 @@ This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust
 
 **Arguments**
 
-- s - input string for BLAKE3 hash calculation. [String](/docs/en/sql-reference/data-types/string.md).
+- s - input string for BLAKE3 hash calculation. [String](../data-types/string.md).
 
 **Return value**
 
-- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+- BLAKE3 hash as a byte array with type FixedString(32). [FixedString](../data-types/fixedstring.md).
 
 **Example**
 
-Use function [hex](/docs/en/sql-reference/functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string.
+Use function [hex](../functions/encoding-functions.md/#hex) to represent the result as a hex-encoded string.
 
 Query:
 ```sql
@@ -419,11 +419,11 @@ These functions use the `Fingerprint64` and `Hash64` methods respectively from a
 
 **Arguments**
 
-The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Example**
 
@@ -564,11 +564,11 @@ metroHash64(par1, ...)
 
 **Arguments**
 
-The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+A [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Example**
 
@@ -602,12 +602,12 @@ Alias: `yandexConsistentHash` (left for backwards compatibility sake).
 
 **Parameters**
 
-- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
-- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md).
+- `input`: A UInt64-type key [UInt64](../data-types/int-uint.md).
+- `n`: Number of buckets. [UInt16](../data-types/int-uint.md).
 
 **Returned value**
 
-- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+- A [UInt16](../data-types/int-uint.md) data type hash value.
 
 **Implementation details**
 
@@ -638,12 +638,12 @@ murmurHash2_64(par1, ...)
 
 **Arguments**
 
-Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type.
-- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type.
+- The `murmurHash2_32` function returns hash value having the [UInt32](../data-types/int-uint.md) data type.
+- The `murmurHash2_64` function returns hash value having the [UInt64](../data-types/int-uint.md) data type.
 
 **Example**
 
@@ -669,11 +669,11 @@ gccMurmurHash(par1, ...)
 
 **Arguments**
 
-- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
+- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types).
 
 **Returned value**
 
-- Calculated hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Calculated hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -706,11 +706,11 @@ MurmurHash(par1, ...)
 
 **Arguments**
 
-- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
+- `par1, ...` — A variable number of parameters that can be any of the [supported data types](../data-types/index.md/#data_types).
 
 **Returned value**
 
-- Calculated hash value. [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+- Calculated hash value. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -741,12 +741,12 @@ murmurHash3_64(par1, ...)
 
 **Arguments**
 
-Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
+Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
 
 **Returned Value**
 
-- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
-- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
+- The `murmurHash3_32` function returns a [UInt32](../data-types/int-uint.md) data type hash value.
+- The `murmurHash3_64` function returns a [UInt64](../data-types/int-uint.md) data type hash value.
 
 **Example**
 
@@ -772,11 +772,11 @@ murmurHash3_128(expr)
 
 **Arguments**
 
-- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md).
+- `expr` — A list of [expressions](../syntax.md/#syntax-expressions). [String](../data-types/string.md).
 
 **Returned value**
 
-A 128-bit `MurmurHash3` hash value. [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
+A 128-bit `MurmurHash3` hash value. [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -806,11 +806,11 @@ xxh3(expr)
 
 **Arguments**
 
-- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type.
+- `expr` — A list of [expressions](../syntax.md/#syntax-expressions) of any data type.
 
 **Returned value**
 
-A 64-bit `xxh3` hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+A 64-bit `xxh3` hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -872,7 +872,7 @@ Result:
 
 Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -882,12 +882,12 @@ ngramSimHash(string[, ngramsize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -909,7 +909,7 @@ Result:
 
 Splits a ASCII string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -919,12 +919,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -946,7 +946,7 @@ Result:
 
 Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -956,12 +956,12 @@ ngramSimHashUTF8(string[, ngramsize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -983,7 +983,7 @@ Result:
 
 Splits a UTF-8 string into n-grams of `ngramsize` symbols and returns the n-gram `simhash`. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -993,12 +993,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1020,7 +1020,7 @@ Result:
 
 Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -1030,12 +1030,12 @@ wordShingleSimHash(string[, shinglesize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1057,7 +1057,7 @@ Result:
 
 Splits a ASCII string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -1067,12 +1067,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1094,7 +1094,7 @@ Result:
 
 Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -1104,12 +1104,12 @@ wordShingleSimHashUTF8(string[, shinglesize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1131,7 +1131,7 @@ Result:
 
 Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
+Can be used for detection of semi-duplicate strings with [bitHammingDistance](../functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same.
 
 **Syntax**
 
@@ -1141,12 +1141,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1176,11 +1176,11 @@ wyHash64(string)
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
+- `string` — String. [String](../data-types/string.md).
 
 **Returned value**
 
-- Hash value. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Hash value. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1202,7 +1202,7 @@ Result:
 
 Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1212,13 +1212,13 @@ ngramMinHash(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1240,7 +1240,7 @@ Result:
 
 Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1250,13 +1250,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1278,7 +1278,7 @@ Result:
 
 Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1288,13 +1288,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1316,7 +1316,7 @@ Result:
 
 Splits a UTF-8 string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1326,13 +1326,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1362,13 +1362,13 @@ ngramMinHashArg(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1398,13 +1398,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1434,13 +1434,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1470,13 +1470,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` n-grams each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` n-grams each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1498,7 +1498,7 @@ Result:
 
 Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1508,13 +1508,13 @@ wordShingleMinHash(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1536,7 +1536,7 @@ Result:
 
 Splits a ASCII string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1546,13 +1546,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1574,7 +1574,7 @@ Result:
 
 Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1584,13 +1584,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1612,7 +1612,7 @@ Result:
 
 Splits a UTF-8 string into parts (shingles) of `shinglesize` words and calculates hash values for each word shingle. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case insensitive.
 
-Can be used for detection of semi-duplicate strings with [tupleHammingDistance](/docs/en/sql-reference/functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
+Can be used for detection of semi-duplicate strings with [tupleHammingDistance](../functions/tuple-functions.md/#tuplehammingdistance). For two strings: if one of the returned hashes is the same for both strings, we think that those strings are the same.
 
 **Syntax**
 
@@ -1622,13 +1622,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two hashes — the minimum and the maximum. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)).
+- Tuple with two hashes — the minimum and the maximum. [Tuple](../data-types/tuple.md)([UInt64](../data-types/int-uint.md), [UInt64](../data-types/int-uint.md)).
 
 **Example**
 
@@ -1658,13 +1658,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1694,13 +1694,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1730,13 +1730,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1766,13 +1766,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum])
 
 **Arguments**
 
-- `string` — String. [String](/docs/en/sql-reference/data-types/string.md).
-- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md).
+- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](../data-types/int-uint.md).
+- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](../data-types/int-uint.md).
 
 **Returned value**
 
-- Tuple with two tuples with `hashnum` word shingles each. [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))).
+- Tuple with two tuples with `hashnum` word shingles each. [Tuple](../data-types/tuple.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md)), [Tuple](../data-types/tuple.md)([String](../data-types/string.md))).
 
 **Example**
 
@@ -1810,7 +1810,7 @@ Alias: `sqid`
 
 **Returned Value**
 
-A sqid [String](/docs/en/sql-reference/data-types/string.md).
+A sqid [String](../data-types/string.md).
 
 **Example**
 
@@ -1837,11 +1837,11 @@ sqidDecode(sqid)
 
 **Arguments**
 
-- A sqid - [String](/docs/en/sql-reference/data-types/string.md)
+- A sqid - [String](../data-types/string.md)
 
 **Returned Value**
 
-The sqid transformed to numbers [Array(UInt64)](/docs/en/sql-reference/data-types/array.md).
+The sqid transformed to numbers [Array(UInt64)](../data-types/array.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md
index d07a5292431..c0256ba4735 100644
--- a/docs/en/sql-reference/functions/index.md
+++ b/docs/en/sql-reference/functions/index.md
@@ -11,7 +11,7 @@ There are at least\* two types of functions - regular functions (they are just c
 In this section we discuss regular functions. For aggregate functions, see the section “Aggregate functions”.
 
 :::note 
-There is a third type of function that the [‘arrayJoin’ function](/docs/en/sql-reference/functions/array-join.md) belongs to. And [table functions](/docs/en/sql-reference/table-functions/index.md) can also be mentioned separately.
+There is a third type of function that the [‘arrayJoin’ function](../functions/array-join.md) belongs to. And [table functions](../table-functions/index.md) can also be mentioned separately.
 :::
 
 ## Strong Typing
@@ -63,4 +63,4 @@ For some functions the first argument (the lambda function) can be omitted. In t
 
 ## User Defined Functions (UDFs)
 
-ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md).
+ClickHouse supports user-defined functions. See [UDFs](../functions/udf.md).
diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index be8a2956d41..540e148e3f1 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -36,14 +36,14 @@ addressToLine(address_of_binary_instruction)
 
 **Arguments**
 
-- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
+- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process.
 
 **Returned value**
 
-- Source code filename and the line number in this file delimited by colon. [String](../../sql-reference/data-types/string.md).
+- Source code filename and the line number in this file delimited by colon. [String](../data-types/string.md).
     - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number.
-- Name of a binary, if the function couldn’t find the debug information. [String](../../sql-reference/data-types/string.md).
-- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md).
+- Name of a binary, if the function couldn’t find the debug information. [String](../data-types/string.md).
+- Empty string, if the address is not valid. [String](../data-types/string.md).
 
 **Example**
 
@@ -124,7 +124,7 @@ addressToLineWithInlines(address_of_binary_instruction)
 
 **Arguments**
 
-- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
+- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process.
 
 **Returned value**
 
@@ -132,7 +132,7 @@ addressToLineWithInlines(address_of_binary_instruction)
 
 - Array with single element which is name of a binary, if the function couldn’t find the debug information.
 
-- Empty array, if the address is not valid. [Array(String)](../../sql-reference/data-types/array.md).
+- Empty array, if the address is not valid. [Array(String)](../data-types/array.md).
 
 **Example**
 
@@ -225,12 +225,12 @@ addressToSymbol(address_of_binary_instruction)
 
 **Arguments**
 
-- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process.
+- `address_of_binary_instruction` ([UInt64](../data-types/int-uint.md)) — Address of instruction in a running process.
 
 **Returned value**
 
-- Symbol from ClickHouse object files. [String](../../sql-reference/data-types/string.md).
-- Empty string, if the address is not valid. [String](../../sql-reference/data-types/string.md).
+- Symbol from ClickHouse object files. [String](../data-types/string.md).
+- Empty string, if the address is not valid. [String](../data-types/string.md).
 
 **Example**
 
@@ -320,12 +320,12 @@ demangle(symbol)
 
 **Arguments**
 
-- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file.
+- `symbol` ([String](../data-types/string.md)) — Symbol from an object file.
 
 **Returned value**
 
-- Name of the C++ function. [String](../../sql-reference/data-types/string.md).
-- Empty string if a symbol is not valid. [String](../../sql-reference/data-types/string.md).
+- Name of the C++ function. [String](../data-types/string.md).
+- Empty string if a symbol is not valid. [String](../data-types/string.md).
 
 **Example**
 
@@ -414,7 +414,7 @@ tid()
 
 **Returned value**
 
-- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- Current thread id. [Uint64](../data-types/int-uint.md#uint-ranges).
 
 **Example**
 
@@ -444,7 +444,7 @@ logTrace('message')
 
 **Arguments**
 
-- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string).
+- `message` — Message that is emitted to server log. [String](../data-types/string.md#string).
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md
index 21beffbd0a8..5b6a3aef2c8 100644
--- a/docs/en/sql-reference/functions/ip-address-functions.md
+++ b/docs/en/sql-reference/functions/ip-address-functions.md
@@ -147,11 +147,11 @@ IPv6StringToNum(string)
 
 **Argument**
 
-- `string` — IP address. [String](../../sql-reference/data-types/string.md).
+- `string` — IP address. [String](../data-types/string.md).
 
 **Returned value**
 
-- IPv6 address in binary format. [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
+- IPv6 address in binary format. [FixedString(16)](../data-types/fixedstring.md).
 
 **Example**
 
@@ -246,7 +246,7 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32);
 
 ## toIPv4(string)
 
-An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../../sql-reference/data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`.
+An alias to `IPv4StringToNum()` that takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type, which is binary equal to value returned by `IPv4StringToNum()`.
 
 ``` sql
 WITH
@@ -294,7 +294,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null
 
 ## toIPv6
 
-Converts a string form of IPv6 address to [IPv6](../../sql-reference/data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
+Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
 Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format.
 
 If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned.
@@ -307,11 +307,11 @@ toIPv6(string)
 
 **Argument**
 
-- `string` — IP address. [String](../../sql-reference/data-types/string.md)
+- `string` — IP address. [String](../data-types/string.md)
 
 **Returned value**
 
-- IP address. [IPv6](../../sql-reference/data-types/ipv6.md).
+- IP address. [IPv6](../data-types/ipv6.md).
 
 **Examples**
 
@@ -366,11 +366,11 @@ isIPv4String(string)
 
 **Arguments**
 
-- `string` — IP address. [String](../../sql-reference/data-types/string.md).
+- `string` — IP address. [String](../data-types/string.md).
 
 **Returned value**
 
-- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `string` is IPv4 address, `0` otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -402,11 +402,11 @@ isIPv6String(string)
 
 **Arguments**
 
-- `string` — IP address. [String](../../sql-reference/data-types/string.md).
+- `string` — IP address. [String](../data-types/string.md).
 
 **Returned value**
 
-- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `string` is IPv6 address, `0` otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -441,12 +441,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as
 
 **Arguments**
 
-- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md).
-- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md).
+- `address` — An IPv4 or IPv6 address. [String](../data-types/string.md).
+- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../data-types/string.md).
 
 **Returned value**
 
-- `1` or `0`. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` or `0`. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md
index dc4a3d871e7..8359d5f9fbc 100644
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@@ -31,7 +31,7 @@ simpleJSONHas(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
@@ -71,7 +71,7 @@ simpleJSONExtractUInt(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
@@ -118,7 +118,7 @@ simpleJSONExtractInt(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
@@ -165,7 +165,7 @@ simpleJSONExtractFloat(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
@@ -212,7 +212,7 @@ simpleJSONExtractBool(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
@@ -259,12 +259,12 @@ simpleJSONExtractRaw(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
 
-It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
+It returns the value of the field as a [`String`](../data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
 
 **Example**
 
@@ -306,12 +306,12 @@ simpleJSONExtractString(json, field_name)
 
 **Parameters**
 
-- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `json`: The JSON in which the field is searched for. [String](../data-types/string.md#string)
 - `field_name`: The name of the field to search for. [String literal](../syntax#string)
 
 **Returned value**
 
-It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
+It returns the value of a field as a [`String`](../data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
 
 **Implementation details**
 
@@ -528,12 +528,12 @@ JSONExtractKeys(json[, a, b, c...])
 
 **Arguments**
 
-- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
-- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
+- `json` — [String](../data-types/string.md) with valid JSON.
+- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../data-types/string.md) to get the field by the key or an [Integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
 
 **Returned value**
 
-Array with the keys of the JSON. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+Array with the keys of the JSON. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 **Example**
 
@@ -588,13 +588,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h])
 
 **Arguments**
 
-- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
-- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
+- `json` — [String](../data-types/string.md) with valid JSON.
+- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../data-types/string.md) to get the field by the key or an [integer](../data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter.
 
 **Returned values**
 
-- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
-- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
+- Array with `('key', 'value')` tuples. Both tuple members are strings. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)).
+- Empty array if the requested object does not exist, or input JSON is invalid. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), [String](../data-types/string.md)).
 
 **Examples**
 
@@ -719,9 +719,9 @@ Before version 21.11 the order of arguments was wrong, i.e. JSON_VALUE(path, jso
 ## toJSONString
 
 Serializes a value to its JSON representation. Various data types and nested structures are supported.
-64-bit [integers](../../sql-reference/data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior.
+64-bit [integers](../data-types/int-uint.md) or bigger (like `UInt64` or `Int128`) are enclosed in quotes by default. [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) controls this behavior.
 Special values `NaN` and `inf` are replaced with `null`. Enable [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) setting to show them.
-When serializing an [Enum](../../sql-reference/data-types/enum.md) value, the function outputs its name.
+When serializing an [Enum](../data-types/enum.md) value, the function outputs its name.
 
 **Syntax**
 
@@ -735,12 +735,12 @@ toJSONString(value)
 
 **Returned value**
 
-- JSON representation of the value. [String](../../sql-reference/data-types/string.md).
+- JSON representation of the value. [String](../data-types/string.md).
 
 **Example**
 
-The first example shows serialization of a [Map](../../sql-reference/data-types/map.md).
-The second example shows some special values wrapped into a [Tuple](../../sql-reference/data-types/tuple.md).
+The first example shows serialization of a [Map](../data-types/map.md).
+The second example shows some special values wrapped into a [Tuple](../data-types/tuple.md).
 
 Query:
 
@@ -776,11 +776,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`.
 
 **Arguments**
 
-- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
+- `json` — [String](../data-types/string.md) with valid JSON.
 
 **Returned value**
 
-- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md).
+- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. [Nullable(UInt64)](../data-types/int-uint.md).
 
 **Example**
 
@@ -807,11 +807,11 @@ jsonMergePatch(json1, json2, ...)
 
 **Arguments**
 
-- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON.
+- `json` — [String](../data-types/string.md) with valid JSON.
 
 **Returned value**
 
-- If JSON object strings are valid, return the merged JSON object string. [String](../../sql-reference/data-types/string.md).
+- If JSON object strings are valid, return the merged JSON object string. [String](../data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md
index 1977c5c2a7e..8448dd4ff12 100644
--- a/docs/en/sql-reference/functions/logical-functions.md
+++ b/docs/en/sql-reference/functions/logical-functions.md
@@ -6,7 +6,7 @@ sidebar_label: Logical
 
 # Logical Functions
 
-Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`.
+Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../data-types/int-uint.md) or in some cases `NULL`.
 
 Zero as an argument is considered `false`, non-zero values are considered `true`.
 
@@ -26,13 +26,13 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope
 
 **Arguments**
 
-- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md).
+- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md).
 
 **Returned value**
 
-- `0`, if at least one argument evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `0`, if at least one argument evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
 - `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
-- `1`, otherwise. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `1`, otherwise. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
 
 **Example**
 
@@ -78,7 +78,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera
 
 **Arguments**
 
-- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md).
+- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md).
 
 **Returned value**
 
@@ -86,7 +86,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera
 - `0`, if all arguments evaluate to `false`,
 - `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
 
-Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+Type: [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
 
 **Example**
 
@@ -130,12 +130,12 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne
 
 **Arguments**
 
-- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md).
+- `val` — The value. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md).
 
 **Returned value**
 
-- `1`, if `val` evaluates to `false`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
-- `0`, if `val` evaluates to `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `1`, if `val` evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
+- `0`, if `val` evaluates to `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
 - `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
 
 **Example**
@@ -164,12 +164,12 @@ xor(val1, val2...)
 
 **Arguments**
 
-- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md).
+- `val1, val2, ...` — List of at least two values. [Int](../data-types/int-uint.md), [UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Nullable](../data-types/nullable.md).
 
 **Returned value**
 
-- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
-- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
+- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
+- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
 - `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
 
 **Example**
diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 03ddc38ef50..7f50fa933b6 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -18,7 +18,7 @@ e()
 
 **Returned value**
 
-Type: [Float64](../../sql-reference/data-types/float.md).
+Type: [Float64](../data-types/float.md).
 
 ## pi
 
@@ -31,7 +31,7 @@ pi()
 ```
 **Returned value**
 
-Type: [Float64](../../sql-reference/data-types/float.md).
+Type: [Float64](../data-types/float.md).
 
 ## exp
 
@@ -45,11 +45,11 @@ exp(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## log
 
@@ -65,11 +65,11 @@ Alias: `ln(x)`
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## exp2
 
@@ -83,11 +83,11 @@ exp2(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## intExp2
 
@@ -111,11 +111,11 @@ log2(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## exp10
 
@@ -129,11 +129,11 @@ exp10(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## intExp10
 
@@ -157,11 +157,11 @@ log10(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## sqrt
 
@@ -173,11 +173,11 @@ sqrt(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## cbrt
 
@@ -189,11 +189,11 @@ cbrt(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## erf
 
@@ -207,11 +207,11 @@ erf(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 **Example**
 
@@ -239,11 +239,11 @@ erfc(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## lgamma
 
@@ -257,11 +257,11 @@ lgamma(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## tgamma
 
@@ -275,11 +275,11 @@ gamma(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## sin
 
@@ -293,11 +293,11 @@ sin(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 **Example**
 
@@ -323,11 +323,11 @@ cos(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## tan
 
@@ -341,11 +341,11 @@ tan(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## asin
 
@@ -359,11 +359,11 @@ asin(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## acos
 
@@ -377,11 +377,11 @@ acos(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## atan
 
@@ -395,11 +395,11 @@ atan(x)
 
 **Arguments**
 
-- `x` - [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` - [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-Type: [Float*](../../sql-reference/data-types/float.md).
+Type: [Float*](../data-types/float.md).
 
 ## pow
 
@@ -415,12 +415,12 @@ Alias: `power(x, y)`
 
 **Arguments**
 
-- `x` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
-- `y` - [(U)Int8/16/32/64](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md)
+- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
+- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
 
 **Returned value**
 
-Type: [Float64](../../sql-reference/data-types/float.md).
+Type: [Float64](../data-types/float.md).
 
 ## cosh
 
@@ -434,13 +434,13 @@ cosh(x)
 
 **Arguments**
 
-- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - Values from the interval: `1 <= cosh(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -468,13 +468,13 @@ acosh(x)
 
 **Arguments**
 
-- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -502,13 +502,13 @@ sinh(x)
 
 **Arguments**
 
-- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - Values from the interval: `-∞ < sinh(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -536,13 +536,13 @@ asinh(x)
 
 **Arguments**
 
-- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -569,13 +569,13 @@ tanh(x)
 
 **Arguments**
 
-- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - Values from the interval: `-1 < tanh(x) < 1`.
 
-Type: [Float*](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float*](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -601,13 +601,13 @@ atanh(x)
 
 **Arguments**
 
-- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -635,14 +635,14 @@ atan2(y, x)
 
 **Arguments**
 
-- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
-- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
+- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
+- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
 
 **Returned value**
 
 - The angle `θ` such that `−π < θ ≤ π`, in radians.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -670,14 +670,14 @@ hypot(x, y)
 
 **Arguments**
 
-- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
-- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md).
+- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
+- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
 
 **Returned value**
 
 - The length of the hypotenuse of a right-angle triangle.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -705,13 +705,13 @@ log1p(x)
 
 **Arguments**
 
-- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — Values from the interval: `-1 < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - Values from the interval: `-∞ < log1p(x) < +∞`.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -747,7 +747,7 @@ sign(x)
 - 0 for `x = 0`
 - 1 for `x > 0`
 
-Type: [Int8](../../sql-reference/data-types/int-uint.md).
+Type: [Int8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -804,11 +804,11 @@ sigmoid(x)
 
 **Parameters**
 
-- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — input value. Values from the interval: `-∞ < x < +∞`. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
-- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../../sql-reference/data-types/float.md).
+- Corresponding value along the sigmoid curve between 0 and 1. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -838,11 +838,11 @@ degrees(x)
 
 **Arguments**
 
-- `x` — Input in radians. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).  
+- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).  
 
 **Returned value**
 
-- Value in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64).
+- Value in degrees. [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
@@ -870,13 +870,13 @@ radians(x)
 
 **Arguments**
 
-- `x` — Input in degrees. [(U)Int*](../../sql-reference/data-types/int-uint.md), [Float*](../../sql-reference/data-types/float.md) or [Decimal*](../../sql-reference/data-types/decimal.md).
+- `x` — Input in degrees. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
 
 **Returned value**
 
 - Value in radians.
 
-Type: [Float64](../../sql-reference/data-types/float.md#float32-float64).
+Type: [Float64](../data-types/float.md#float32-float64).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md
index 3e0458d226d..4bfa181a35f 100644
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@@ -23,7 +23,7 @@ stem('language', word)
 ### Arguments
 
 - `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
-- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
+- `word` — word that needs to be stemmed. Must be in lowercase. [String](../data-types/string.md#string).
 
 ### Examples
 
@@ -88,8 +88,8 @@ lemmatize('language', word)
 
 ### Arguments
 
-- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string).
-- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string).
+- `language` — Language which rules will be applied. [String](../data-types/string.md#string).
+- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../data-types/string.md#string).
 
 ### Examples
 
@@ -139,8 +139,8 @@ synonyms('extension_name', word)
 
 ### Arguments
 
-- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
-- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).
+- `extension_name` — Name of the extension in which search will be performed. [String](../data-types/string.md#string).
+- `word` — Word that will be searched in extension. [String](../data-types/string.md#string).
 
 ### Examples
 
@@ -188,7 +188,7 @@ detectLanguage('text_to_be_analyzed')
 
 ### Arguments
 
-- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
 
 ### Returned value
 
@@ -226,7 +226,7 @@ detectLanguageMixed('text_to_be_analyzed')
 
 ### Arguments
 
-- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
 
 ### Returned value
 
@@ -262,7 +262,7 @@ detectLanguageUnknown('text_to_be_analyzed')
 
 ### Arguments
 
-- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
 
 ### Returned value
 
@@ -302,7 +302,7 @@ detectCharset('text_to_be_analyzed')
 
 ### Arguments
 
-- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).
+- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../data-types/string.md#string).
 
 ### Returned value
 
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index 45fc12388fe..dfe1224f7b8 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -33,11 +33,11 @@ getMacro(name);
 
 **Arguments**
 
-- `name` — Macro name to retrieve from the `<macros>` section. [String](../../sql-reference/data-types/string.md#string).
+- `name` — Macro name to retrieve from the `<macros>` section. [String](../data-types/string.md#string).
 
 **Returned value**
 
-- Value of the specified macro. [String](../../sql-reference/data-types/string.md).
+- Value of the specified macro. [String](../data-types/string.md).
 
 **Example**
 
@@ -116,7 +116,7 @@ basename(expr)
 
 **Arguments**
 
-- `expr` — A value of type [String](../../sql-reference/data-types/string.md). Backslashes must be escaped.
+- `expr` — A value of type [String](../data-types/string.md). Backslashes must be escaped.
 
 **Returned Value**
 
@@ -237,11 +237,11 @@ byteSize(argument [, ...])
 
 **Returned value**
 
-- Estimation of byte size of the arguments in memory. [UInt64](../../sql-reference/data-types/int-uint.md).
+- Estimation of byte size of the arguments in memory. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
-For [String](../../sql-reference/data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length).
+For [String](../data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length).
 
 Query:
 
@@ -350,7 +350,7 @@ sleep(seconds)
 
 **Arguments**
 
-- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
+- `seconds`: [UInt*](../data-types/int-uint.md) or [Float](../data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
 
 **Returned value**
 
@@ -400,7 +400,7 @@ sleepEachRow(seconds)
 
 **Arguments**
 
-- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
+- `seconds`: [UInt*](../data-types/int-uint.md) or [Float*](../data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds.
 
 **Returned value**
 
@@ -494,8 +494,8 @@ isConstant(x)
 
 **Returned values**
 
-- `1` if `x` is constant. [UInt8](../../sql-reference/data-types/int-uint.md).
-- `0` if `x` is non-constant. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `x` is constant. [UInt8](../data-types/int-uint.md).
+- `0` if `x` is non-constant. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -963,7 +963,7 @@ uptime()
 
 **Returned value**
 
-- Time value of seconds. [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+- Time value of seconds. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -1226,7 +1226,7 @@ To prevent that you can create a subquery with [ORDER BY](../../sql-reference/st
 **Arguments**
 
 - `column` — A column name or scalar expression.
-- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../../sql-reference/data-types/int-uint.md).
+- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../data-types/int-uint.md).
 - `default_value` — Optional. The returned value if offset is beyond the block boundaries. Type of data blocks affected.
 
 **Returned values**
@@ -1446,12 +1446,12 @@ runningConcurrency(start, end)
 
 **Arguments**
 
-- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `start` — A column with the start time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md).
+- `end` — A column with the end time of events. [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), or [DateTime64](../data-types/datetime64.md).
 
 **Returned values**
 
-- The number of concurrent events at each event start time. [UInt32](../../sql-reference/data-types/int-uint.md)
+- The number of concurrent events at each event start time. [UInt32](../data-types/int-uint.md)
 
 **Example**
 
@@ -1515,7 +1515,7 @@ MACStringToOUI(s)
 
 ## getSizeOfEnumType
 
-Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md).
+Returns the number of fields in [Enum](../data-types/enum.md).
 An exception is thrown if the type is not `Enum`.
 
 **Syntax**
@@ -1674,7 +1674,7 @@ defaultValueOfArgumentType(expression)
 
 - `0` for numbers.
 - Empty string for strings.
-- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md).
+- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md).
 
 **Example**
 
@@ -1724,7 +1724,7 @@ defaultValueOfTypeName(type)
 
 - `0` for numbers.
 - Empty string for strings.
-- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md).
+- `ᴺᵁᴸᴸ` for [Nullable](../data-types/nullable.md).
 
 **Example**
 
@@ -1937,7 +1937,7 @@ filesystemAvailable()
 
 **Returned value**
 
-- The amount of remaining space available in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of remaining space available in bytes. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1967,7 +1967,7 @@ filesystemFree()
 
 **Returned value**
 
-- The amount of free space in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The amount of free space in bytes. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -1997,7 +1997,7 @@ filesystemCapacity()
 
 **Returned value**
 
-- Capacity of the filesystem in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
+- Capacity of the filesystem in bytes. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -2017,7 +2017,7 @@ Result:
 
 ## initializeAggregation
 
-Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values.
+Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values.
 
 **Syntax**
 
@@ -2027,7 +2027,7 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN)
 
 **Arguments**
 
-- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md).
+- `aggregate_function` — Name of the aggregation function to initialize. [String](../data-types/string.md).
 - `arg` — Arguments of aggregate function.
 
 **Returned value(s)**
@@ -2102,7 +2102,7 @@ finalizeAggregation(state)
 
 **Arguments**
 
-- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
+- `state` — State of aggregation. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction).
 
 **Returned value(s)**
 
@@ -2210,8 +2210,8 @@ runningAccumulate(agg_state[, grouping]);
 
 **Arguments**
 
-- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction).
-- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined.
+- `agg_state` — State of the aggregate function. [AggregateFunction](../data-types/aggregatefunction.md#data-type-aggregatefunction).
+- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../data-types/index.md) for which the equality operator is defined.
 
 **Returned value**
 
@@ -2485,7 +2485,7 @@ getSetting('custom_setting');
 
 **Parameter**
 
-- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md).
+- `custom_setting` — The setting name. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -2510,7 +2510,7 @@ Result:
 
 ## isDecimalOverflow
 
-Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is outside its precision or outside the specified precision.
+Checks whether the [Decimal](../data-types/decimal.md) value is outside its precision or outside the specified precision.
 
 **Syntax**
 
@@ -2520,8 +2520,8 @@ isDecimalOverflow(d, [p])
 
 **Arguments**
 
-- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md).
-- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- `d` — value. [Decimal](../data-types/decimal.md).
+- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../data-types/int-uint.md#uint-ranges).
 
 **Returned values**
 
@@ -2557,11 +2557,11 @@ countDigits(x)
 
 **Arguments**
 
-- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value.
+- `x` — [Int](../data-types/int-uint.md) or [Decimal](../data-types/decimal.md) value.
 
 **Returned value**
 
-- Number of digits. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges).
 
 :::note
 For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
@@ -2585,7 +2585,7 @@ Result:
 
 ## errorCodeToName
 
-- The textual name of an error code. [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md).
+- The textual name of an error code. [LowCardinality(String)](../data-types/lowcardinality.md).
 
 **Syntax**
 
@@ -2616,7 +2616,7 @@ tcpPort()
 
 **Returned value**
 
-- The TCP port number. [UInt16](../../sql-reference/data-types/int-uint.md).
+- The TCP port number. [UInt16](../data-types/int-uint.md).
 
 **Example**
 
@@ -2652,7 +2652,7 @@ currentProfiles()
 
 **Returned value**
 
-- List of the current user settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the current user settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## enabledProfiles
 
@@ -2666,7 +2666,7 @@ enabledProfiles()
 
 **Returned value**
 
-- List of the enabled settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## defaultProfiles
 
@@ -2680,7 +2680,7 @@ defaultProfiles()
 
 **Returned value**
 
-- List of the default settings profiles. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default settings profiles. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## currentRoles
 
@@ -2694,7 +2694,7 @@ currentRoles()
 
 **Returned value**
 
-- A list of the current roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- A list of the current roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## enabledRoles
 
@@ -2708,7 +2708,7 @@ enabledRoles()
 
 **Returned value**
 
-- List of the enabled roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the enabled roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## defaultRoles
 
@@ -2722,7 +2722,7 @@ defaultRoles()
 
 **Returned value**
 
-- List of the default roles for the current user. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- List of the default roles for the current user. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 ## getServerPort
 
@@ -2736,7 +2736,7 @@ getServerPort(port_name)
 
 **Arguments**
 
-- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values:
+- `port_name` — The name of the server port. [String](../data-types/string.md#string). Possible values:
 
   - 'tcp_port'
   - 'tcp_port_secure'
@@ -2751,7 +2751,7 @@ getServerPort(port_name)
 
 **Returned value**
 
-- The number of the server port. [UInt16](../../sql-reference/data-types/int-uint.md).
+- The number of the server port. [UInt16](../data-types/int-uint.md).
 
 **Example**
 
@@ -2783,7 +2783,7 @@ queryID()
 
 **Returned value**
 
-- The ID of the current query. [String](../../sql-reference/data-types/string.md)
+- The ID of the current query. [String](../data-types/string.md)
 
 **Example**
 
@@ -2817,7 +2817,7 @@ initialQueryID()
 
 **Returned value**
 
-- The ID of the initial current query. [String](../../sql-reference/data-types/string.md)
+- The ID of the initial current query. [String](../data-types/string.md)
 
 **Example**
 
@@ -2850,7 +2850,7 @@ shardNum()
 
 **Returned value**
 
-- Shard index or constant `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
+- Shard index or constant `0`. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -2890,7 +2890,7 @@ shardCount()
 
 **Returned value**
 
-- Total number of shards or `0`. [UInt32](../../sql-reference/data-types/int-uint.md).
+- Total number of shards or `0`. [UInt32](../data-types/int-uint.md).
 
 **See Also**
 
@@ -2912,7 +2912,7 @@ getOSKernelVersion()
 
 **Returned value**
 
-- The current OS kernel version. [String](../../sql-reference/data-types/string.md).
+- The current OS kernel version. [String](../data-types/string.md).
 
 **Example**
 
@@ -2946,7 +2946,7 @@ zookeeperSessionUptime()
 
 **Returned value**
 
-- Uptime of the current ZooKeeper session in seconds. [UInt32](../../sql-reference/data-types/int-uint.md).
+- Uptime of the current ZooKeeper session in seconds. [UInt32](../data-types/int-uint.md).
 
 **Example**
 
@@ -2983,7 +2983,7 @@ All arguments must be constant.
 
 **Returned value**
 
-- Randomly generated table structure. [String](../../sql-reference/data-types/string.md).
+- Randomly generated table structure. [String](../data-types/string.md).
 
 **Examples**
 
@@ -3050,7 +3050,7 @@ structureToCapnProtoSchema(structure)
 
 **Returned value**
 
-- CapnProto schema. [String](../../sql-reference/data-types/string.md).
+- CapnProto schema. [String](../data-types/string.md).
 
 **Examples**
 
@@ -3149,7 +3149,7 @@ structureToProtobufSchema(structure)
 
 **Returned value**
 
-- Protobuf schema. [String](../../sql-reference/data-types/string.md).
+- Protobuf schema. [String](../data-types/string.md).
 
 **Examples**
 
@@ -3229,11 +3229,11 @@ formatQueryOrNull(query)
 
 **Arguments**
 
-- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)
+- `query` - The SQL query to be formatted. [String](../data-types/string.md)
 
 **Returned value**
 
-- The formatted query. [String](../../sql-reference/data-types/string.md).
+- The formatted query. [String](../data-types/string.md).
 
 **Example**
 
@@ -3268,11 +3268,11 @@ formatQuerySingleLineOrNull(query)
 
 **Arguments**
 
-- `query` - The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)
+- `query` - The SQL query to be formatted. [String](../data-types/string.md)
 
 **Returned value**
 
-- The formatted query. [String](../../sql-reference/data-types/string.md).
+- The formatted query. [String](../data-types/string.md).
 
 **Example**
 
@@ -3300,8 +3300,8 @@ variantElement(variant, type_name, [, default_value])
 
 **Arguments**
 
-- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
-- `type_name` — The name of the variant type to extract. [String](../../sql-reference/data-types/string.md).
+- `variant` — Variant column. [Variant](../data-types/variant.md).
+- `type_name` — The name of the variant type to extract. [String](../data-types/string.md).
 - `default_value` - The default value that will be used if variant doesn't have variant with specified type. Can be any type. Optional.
 
 **Returned value**
@@ -3337,7 +3337,7 @@ variantType(variant)
 
 **Arguments**
 
-- `variant` — Variant column. [Variant](../../sql-reference/data-types/variant.md).
+- `variant` — Variant column. [Variant](../data-types/variant.md).
 
 **Returned value**
 
@@ -3553,7 +3553,7 @@ showCertificate()
 
 **Returned value**
 
-- Map of key-value pairs relating to the configured SSL certificate. [Map](../../sql-reference/data-types/map.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)).
+- Map of key-value pairs relating to the configured SSL certificate. [Map](../data-types/map.md)([String](../data-types/string.md), [String](../data-types/string.md)).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index a7866c6d12e..a9b483aa0e5 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -169,7 +169,7 @@ randUniform(min, max)
 
 ### Returned value
 
-A random number of type [Float64](/docs/en/sql-reference/data-types/float.md).
+A random number of type [Float64](../data-types/float.md).
 
 ### Example
 
@@ -204,7 +204,7 @@ randNormal(mean, variance)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -241,7 +241,7 @@ randLogNormal(mean, variance)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -278,7 +278,7 @@ randBinomial(experiments, probability)
 
 **Returned value**
 
-- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -315,7 +315,7 @@ randNegativeBinomial(experiments, probability)
 
 **Returned value**
 
-- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -351,7 +351,7 @@ randPoisson(n)
 
 **Returned value**
 
-- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -387,7 +387,7 @@ randBernoulli(probability)
 
 **Returned value**
 
-- Random number. [UInt64](/docs/en/sql-reference/data-types/int-uint.md).
+- Random number. [UInt64](../data-types/int-uint.md).
 
 **Example**
 
@@ -423,7 +423,7 @@ randExponential(lambda)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -459,7 +459,7 @@ randChiSquared(degree_of_freedom)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -495,7 +495,7 @@ randStudentT(degree_of_freedom)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -532,7 +532,7 @@ randFisherF(d1, d2)
 
 **Returned value**
 
-- Random number. [Float64](/docs/en/sql-reference/data-types/float.md).
+- Random number. [Float64](../data-types/float.md).
 
 **Example**
 
@@ -568,7 +568,7 @@ randomString(length)
 
 **Returned value**
 
-- String filled with random bytes. [String](../../sql-reference/data-types/string.md).
+- String filled with random bytes. [String](../data-types/string.md).
 
 **Example**
 
@@ -604,11 +604,11 @@ randomFixedString(length);
 
 **Arguments**
 
-- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md).
+- `length` — String length in bytes. [UInt64](../data-types/int-uint.md).
 
 **Returned value(s)**
 
-- String filled with random bytes. [FixedString](../../sql-reference/data-types/fixedstring.md).
+- String filled with random bytes. [FixedString](../data-types/fixedstring.md).
 
 **Example**
 
@@ -643,7 +643,7 @@ randomPrintableASCII(length)
 
 **Returned value**
 
-- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../../sql-reference/data-types/string.md)
+- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. [String](../data-types/string.md)
 
 **Example**
 
@@ -671,11 +671,11 @@ randomStringUTF8(length);
 
 **Arguments**
 
-- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
+- `length` — Length of the string in code points. [UInt64](../data-types/int-uint.md).
 
 **Returned value(s)**
 
-- UTF-8 random string. [String](../../sql-reference/data-types/string.md).
+- UTF-8 random string. [String](../data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index 20f73de4410..ab344f664fd 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -36,8 +36,8 @@ Alias: `truncate`.
 
 **Parameters**
 
-- `input`: A numeric type ([Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md) or [Integer](/docs/en/sql-reference/data-types/int-uint.md)).
-- `precision`: An [Integer](/docs/en/sql-reference/data-types/int-uint.md) type.
+- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)).
+- `precision`: An [Integer](../data-types/int-uint.md) type.
 
 **Returned value**
 
@@ -69,7 +69,7 @@ round(expression [, decimal_places])
 
 **Arguments**
 
-- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
+- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types).
 - `decimal-places` — An integer value.
     - If `decimal-places > 0` then the function rounds the value to the right of the decimal point.
     - If `decimal-places < 0` then the function rounds the value to the left of the decimal point.
@@ -171,7 +171,7 @@ roundBankers(expression [, decimal_places])
 
 **Arguments**
 
-- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types).
+- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types).
 - `decimal-places` — Decimal places. An integer number.
     - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`.
     - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`.
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 8aa171949a3..9ec4ee974c4 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -19,13 +19,13 @@ splitByChar(separator, s[, max_substrings]))
 
 **Arguments**
 
-- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `separator` — The separator which should contain exactly one character. [String](../data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible.
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 :::note
  Empty substrings may be selected when:
@@ -72,13 +72,13 @@ splitByString(separator, s[, max_substrings]))
 
 **Arguments**
 
-- `separator` — The separator. [String](../../sql-reference/data-types/string.md).
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `separator` — The separator. [String](../data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 :::note
 Empty substrings may be selected when:
@@ -129,13 +129,13 @@ splitByRegexp(regexp, s[, max_substrings]))
 **Arguments**
 
 - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
 
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 :::note
 Empty substrings may be selected when:
@@ -186,13 +186,13 @@ splitByWhitespace(s[, max_substrings]))
 
 **Arguments**
 
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
 
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
  
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
@@ -225,13 +225,13 @@ splitByNonAlpha(s[, max_substrings]))
 
 **Arguments**
 
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
 
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
@@ -288,12 +288,12 @@ Alias: `splitByAlpha`
 
 **Arguments**
 
-- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `s` — The string to split. [String](../data-types/string.md).
 - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
 
 **Returned value(s)**
 
-- An array of selected substrings. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 :::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
@@ -357,12 +357,12 @@ ngrams(string, ngramsize)
 
 **Arguments**
 
-- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md).
+- `string` — String. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `ngramsize` — The size of an n-gram. [UInt](../data-types/int-uint.md).
 
 **Returned values**
 
-- Array with n-grams. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+- Array with n-grams. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
 **Example**
 
@@ -384,7 +384,7 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator
 
 **Arguments**
 
-- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
+- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object.
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index f45ceb99617..342ca2b9f03 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -183,7 +183,7 @@ left(s, offset)
 
 **Parameters**
 
-- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
 
 **Returned value**
@@ -230,7 +230,7 @@ leftUTF8(s, offset)
 
 **Parameters**
 
-- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
 
 **Returned value**
@@ -347,7 +347,7 @@ right(s, offset)
 
 **Parameters**
 
-- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `s`: The string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
 
 **Returned value**
@@ -394,7 +394,7 @@ rightUTF8(s, offset)
 
 **Parameters**
 
-- `s`: The UTF-8 encoded string to calculate a substring from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `s`: The UTF-8 encoded string to calculate a substring from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 - `offset`: The number of bytes of the offset. [UInt*](../data-types/int-uint).
 
 **Returned value**
@@ -513,11 +513,11 @@ Alias: `lcase`
 
 **Parameters**
 
-- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
+- `input`: A string type [String](../data-types/string.md).
 
 **Returned value**
 
-- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
+- A [String](../data-types/string.md) data type value.
 
 **Example**
 
@@ -547,11 +547,11 @@ Alias: `ucase`
 
 **Parameters**
 
-- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
+- `input`: A string type [String](../data-types/string.md).
 
 **Returned value**
 
-- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
+- A [String](../data-types/string.md) data type value.
 
 **Examples**
 
@@ -591,11 +591,11 @@ upperUTF8(input)
 
 **Parameters**
 
-- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md).
+- `input`: A string type [String](../data-types/string.md).
 
 **Returned value**
 
-- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
+- A [String](../data-types/string.md) data type value.
 
 **Example**
 
@@ -627,7 +627,7 @@ toValidUTF8(input_string)
 
 **Arguments**
 
-- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
+- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object.
 
 **Returned value**
 
@@ -659,8 +659,8 @@ Alias: `REPEAT`
 
 **Arguments**
 
-- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md).
-- `n` — The number of times to repeat the string. [UInt* or Int*](../../sql-reference/data-types/int-uint.md).
+- `s` — The string to repeat. [String](../data-types/string.md).
+- `n` — The number of times to repeat the string. [UInt* or Int*](../data-types/int-uint.md).
 
 **Returned value**
 
@@ -694,7 +694,7 @@ Alias: `SPACE`.
 
 **Arguments**
 
-- `n` — The number of times to repeat the space. [UInt* or Int*](../../sql-reference/data-types/int-uint.md).
+- `n` — The number of times to repeat the space. [UInt* or Int*](../data-types/int-uint.md).
 
 **Returned value**
 
@@ -738,7 +738,7 @@ concat(s1, s2, ...)
 
 At least one value of arbitrary type.
 
-Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
+Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
 
 **Returned values**
 
@@ -845,8 +845,8 @@ Alias: `concat_ws`
 
 **Arguments**
 
-- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- exprN — expression to be concatenated. Arguments which are not of types [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
+- sep — separator. Const [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- exprN — expression to be concatenated. Arguments which are not of types [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md) are converted to strings using their default serialization. As this decreases performance, it is not recommended to use non-String/FixedString arguments.
 
 **Returned values**
 
@@ -891,9 +891,9 @@ Alias:
 
 **Arguments**
 
-- `s` — The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
-- `offset` — The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
-- `length` — The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
+- `s` — The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
+- `offset` — The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
+- `length` — The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.
 
 **Returned value**
 
@@ -927,9 +927,9 @@ substringUTF8(s, offset[, length])
 
 **Arguments**
 
-- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
-- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
-- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
+- `s`: The string to calculate a substring from. [String](../data-types/string.md), [FixedString](../data-types/fixedstring.md) or [Enum](../data-types/enum.md)
+- `offset`: The starting position of the substring in `s` . [(U)Int*](../data-types/int-uint.md).
+- `length`: The maximum length of the substring. [(U)Int*](../data-types/int-uint.md). Optional.
 
 **Returned value**
 
@@ -965,8 +965,8 @@ Alias: `SUBSTRING_INDEX`
 
 **Arguments**
 
-- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
-- delim: The character to split. [String](../../sql-reference/data-types/string.md).
+- s: The string to extract substring from. [String](../data-types/string.md).
+- delim: The character to split. [String](../data-types/string.md).
 - count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
 
 **Example**
@@ -996,13 +996,13 @@ substringIndexUTF8(s, delim, count)
 
 **Arguments**
 
-- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
-- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
+- `s`: The string to extract substring from. [String](../data-types/string.md).
+- `delim`: The character to split. [String](../data-types/string.md).
 - `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
 
 **Returned value**
 
-A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
+A substring [String](../data-types/string.md) of `s` before `count` occurrences of `delim`.
 
 **Implementation details**
 
@@ -1050,11 +1050,11 @@ base58Encode(plaintext)
 
 **Arguments**
 
-- `plaintext` — [String](../../sql-reference/data-types/string.md) column or constant.
+- `plaintext` — [String](../data-types/string.md) column or constant.
 
 **Returned value**
 
-- A string containing the encoded value of the argument. [String](../../sql-reference/data-types/string.md).
+- A string containing the encoded value of the argument. [String](../data-types/string.md).
 
 **Example**
 
@@ -1082,7 +1082,7 @@ base58Decode(encoded)
 
 **Arguments**
 
-- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown.
+- `encoded` — [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown.
 
 **Returned value**
 
@@ -1114,7 +1114,7 @@ tryBase58Decode(encoded)
 
 **Parameters**
 
-- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
+- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
 
 **Returned value**
 
@@ -1158,7 +1158,7 @@ tryBase64Decode(encoded)
 
 **Parameters**
 
-- `encoded`: [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
+- `encoded`: [String](../data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, returns an empty string in case of error.
 
 **Examples**
 
@@ -1257,8 +1257,8 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string)
 
 **Arguments**
 
-- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md).
-- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md).
+- `trim_character` — Specified characters for trim. [String](../data-types/string.md).
+- `input_string` — String for trim. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -1292,7 +1292,7 @@ Alias: `ltrim(input_string)`.
 
 **Arguments**
 
-- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
+- `input_string` — string to trim. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -1326,7 +1326,7 @@ Alias: `rtrim(input_string)`.
 
 **Arguments**
 
-- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
+- `input_string` — string to trim. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -1360,7 +1360,7 @@ Alias: `trim(input_string)`.
 
 **Arguments**
 
-- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md).
+- `input_string` — string to trim. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -1410,11 +1410,11 @@ normalizeQuery(x)
 
 **Arguments**
 
-- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
+- `x` — Sequence of characters. [String](../data-types/string.md).
 
 **Returned value**
 
-- Sequence of characters with placeholders. [String](../../sql-reference/data-types/string.md).
+- Sequence of characters with placeholders. [String](../data-types/string.md).
 
 **Example**
 
@@ -1442,11 +1442,11 @@ normalizedQueryHash(x)
 
 **Arguments**
 
-- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
+- `x` — Sequence of characters. [String](../data-types/string.md).
 
 **Returned value**
 
-- Hash value. [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
+- Hash value. [UInt64](../data-types/int-uint.md#uint-ranges).
 
 **Example**
 
@@ -1474,11 +1474,11 @@ normalizeUTF8NFC(words)
 
 **Arguments**
 
-- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md).
+- `words` — UTF8-encoded input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- String transformed to NFC normalization form. [String](../../sql-reference/data-types/string.md).
+- String transformed to NFC normalization form. [String](../data-types/string.md).
 
 **Example**
 
@@ -1506,11 +1506,11 @@ normalizeUTF8NFD(words)
 
 **Arguments**
 
-- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md).
+- `words` — UTF8-encoded input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- String transformed to NFD normalization form. [String](../../sql-reference/data-types/string.md).
+- String transformed to NFD normalization form. [String](../data-types/string.md).
 
 **Example**
 
@@ -1538,11 +1538,11 @@ normalizeUTF8NFKC(words)
 
 **Arguments**
 
-- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md).
+- `words` — UTF8-encoded input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- String transformed to NFKC normalization form. [String](../../sql-reference/data-types/string.md).
+- String transformed to NFKC normalization form. [String](../data-types/string.md).
 
 **Example**
 
@@ -1570,11 +1570,11 @@ normalizeUTF8NFKD(words)
 
 **Arguments**
 
-- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md).
+- `words` — UTF8-encoded input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- String transformed to NFKD normalization form. [String](../../sql-reference/data-types/string.md).
+- String transformed to NFKD normalization form. [String](../data-types/string.md).
 
 **Example**
 
@@ -1605,11 +1605,11 @@ encodeXMLComponent(x)
 
 **Arguments**
 
-- `x` — An input string. [String](../../sql-reference/data-types/string.md).
+- `x` — An input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- The escaped string. [String](../../sql-reference/data-types/string.md).
+- The escaped string. [String](../data-types/string.md).
 
 **Example**
 
@@ -1643,11 +1643,11 @@ decodeXMLComponent(x)
 
 **Arguments**
 
-- `x` — An input string. [String](../../sql-reference/data-types/string.md).
+- `x` — An input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- The un-escaped string. [String](../../sql-reference/data-types/string.md).
+- The un-escaped string. [String](../data-types/string.md).
 
 **Example**
 
@@ -1677,11 +1677,11 @@ decodeHTMLComponent(x)
 
 **Arguments**
 
-- `x` — An input string. [String](../../sql-reference/data-types/string.md).
+- `x` — An input string. [String](../data-types/string.md).
 
 **Returned value**
 
-- The un-escaped string. [String](../../sql-reference/data-types/string.md).
+- The un-escaped string. [String](../data-types/string.md).
 
 **Example**
 
@@ -1730,11 +1730,11 @@ extractTextFromHTML(x)
 
 **Arguments**
 
-- `x` — input text. [String](../../sql-reference/data-types/string.md).
+- `x` — input text. [String](../data-types/string.md).
 
 **Returned value**
 
-- Extracted text. [String](../../sql-reference/data-types/string.md).
+- Extracted text. [String](../data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 0e183626555..7aeb1f5b2a7 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -202,13 +202,13 @@ translateUTF8(s, from, to)
 
 **Parameters**
 
-- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md).
-- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md).
-- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md).
+- `s`: A string type [String](../data-types/string.md).
+- `from`: A string type [String](../data-types/string.md).
+- `to`: A string type [String](../data-types/string.md).
 
 **Returned value**
 
-- A [String](/docs/en/sql-reference/data-types/string.md) data type value.
+- A [String](../data-types/string.md) data type value.
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 43b9e621bc0..07f776906e6 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -17,7 +17,7 @@ Functions in this section also assume that the searched string (referred to in t
 violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function
 variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the
 results are undefined. Note that no automatic Unicode normalization is performed, however you can use the
-[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
+[normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that.
 
 [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately.
 
@@ -38,12 +38,12 @@ Alias:
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional.
+- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
 **Returned values**
 
-- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../../sql-reference/data-types/int-uint.md).
-- 0, if the substring was not found. [UInt64](../../sql-reference/data-types/int-uint.md).
+- Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md).
+- 0, if the substring was not found. [UInt64](../data-types/int-uint.md).
 
 If substring `needle` is empty, these rules apply:
 - if no `start_pos` was specified: return `1`
@@ -204,7 +204,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
 **Arguments**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned values**
 
@@ -239,7 +239,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -273,7 +273,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -309,7 +309,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee
 **Parameters**
 
 - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — UTF-8 encoded substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -347,7 +347,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` —  Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` —  Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -381,7 +381,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Array of substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -415,7 +415,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -451,7 +451,7 @@ multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., ne
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
+- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
 
 **Returned value**
 
@@ -488,7 +488,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -522,7 +522,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -556,7 +556,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
+- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md)
 
 **Returned value**
 
@@ -592,7 +592,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Array of UTF-8 substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -630,7 +630,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -664,7 +664,7 @@ multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md)
+- `needle` — Substrings to be searched. [Array](../data-types/array.md)
 
 **Returned value**
 
@@ -698,7 +698,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md).
+- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md).
 
 **Returned value**
 
@@ -734,7 +734,7 @@ multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN])
 **Parameters**
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md)
+- `needle` — UTF-8 substrings to be searched. [Array](../data-types/array.md)
 
 **Returned value**
 
@@ -894,12 +894,12 @@ extractAllGroupsHorizontal(haystack, pattern)
 
 **Arguments**
 
-- `haystack` — Input string. [String](../../sql-reference/data-types/string.md).
-- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md).
+- `haystack` — Input string. [String](../data-types/string.md).
+- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md).
 
 **Returned value**
 
-- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md).
+- Array of arrays of matches. [Array](../data-types/array.md).
 
 :::note
 If `haystack` does not match the `pattern` regex, an array of empty arrays is returned.
@@ -931,12 +931,12 @@ extractAllGroupsVertical(haystack, pattern)
 
 **Arguments**
 
-- `haystack` — Input string. [String](../../sql-reference/data-types/string.md).
-- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../../sql-reference/data-types/string.md).
+- `haystack` — Input string. [String](../data-types/string.md).
+- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. [String](../data-types/string.md).
 
 **Returned value**
 
-- Array of arrays of matches. [Array](../../sql-reference/data-types/array.md).
+- Array of arrays of matches. [Array](../data-types/array.md).
 
 :::note
 If `haystack` does not match the `pattern` regex, an empty array is returned.
@@ -970,7 +970,7 @@ Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which i
 
 If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined.
 
-No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
+No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com../functions/string-functions/) functions for that.
 
 To match against literal `%`, `_` and `\` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`.
 The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`.
@@ -1007,7 +1007,7 @@ Alias: `haystack NOT ILIKE pattern` (operator)
 
 ## ngramDistance
 
-Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other.
+Calculates the 4-gram distance between a `haystack` string and a `needle` string. For this, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The smaller the result is, the more similar the strings are to each other.
 
 Functions [`ngramDistanceCaseInsensitive`](#ngramdistancecaseinsensitive), [`ngramDistanceUTF8`](#ngramdistanceutf8), [`ngramDistanceCaseInsensitiveUTF8`](#ngramdistancecaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
 
@@ -1024,7 +1024,7 @@ ngramDistance(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64)
 
 **Implementation details**
 
@@ -1078,7 +1078,7 @@ ngramDistanceCaseInsensitive(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64)
 
 **Examples**
 
@@ -1127,7 +1127,7 @@ ngramDistanceUTF8(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64)
 
 **Example**
 
@@ -1160,7 +1160,7 @@ ngramDistanceCaseInsensitiveUTF8(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the similarity between the two strings. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the similarity between the two strings. [Float32](../data-types/float.md/#float32-float64)
 
 **Example**
 
@@ -1178,7 +1178,7 @@ Result:
 
 ## ngramSearch
 
-Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../../sql-reference/data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex).
+Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from the needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a [Float32](../data-types/float.md/#float32-float64) between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function [`soundex`](../../sql-reference/functions/string-functions#soundex).
 
 Functions [`ngramSearchCaseInsensitive`](#ngramsearchcaseinsensitive), [`ngramSearchUTF8`](#ngramsearchutf8), [`ngramSearchCaseInsensitiveUTF8`](#ngramsearchcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
 
@@ -1195,7 +1195,7 @@ ngramSearch(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64)
 
 **Implementation details**
 
@@ -1234,7 +1234,7 @@ ngramSearchCaseInsensitive(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64)
 
 The bigger the result is, the more likely `needle` is in the `haystack`.
 
@@ -1269,7 +1269,7 @@ ngramSearchUTF8(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64)
 
 The bigger the result is, the more likely `needle` is in the `haystack`.
 
@@ -1304,7 +1304,7 @@ ngramSearchCaseInsensitiveUTF8(haystack, needle)
 
 **Returned value**
 
-- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../../sql-reference/data-types/float.md/#float32-float64)
+- Value between 0 and 1 representing the likelihood of the `needle` being in the `haystack`. [Float32](../data-types/float.md/#float32-float64)
 
 The bigger the result is, the more likely `needle` is in the `haystack`.
 
@@ -1338,11 +1338,11 @@ countSubstrings(haystack, needle[, start_pos])
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional.
+- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
 **Returned values**
 
-- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1385,11 +1385,11 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos])
 
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional.
+- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
 **Returned values**
 
-- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1437,11 +1437,11 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
 
 - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional.
+- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
 **Returned values**
 
-- The number of occurrences. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of occurrences. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1488,11 +1488,11 @@ countMatches(haystack, pattern)
 **Arguments**
 
 - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md).
+- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
 
 **Returned value**
 
-- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of matches. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1533,11 +1533,11 @@ countMatchesCaseInsensitive(haystack, pattern)
 **Arguments**
 
 - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md).
+- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../data-types/string.md).
 
 **Returned value**
 
-- The number of matches. [UInt64](../../sql-reference/data-types/int-uint.md).
+- The number of matches. [UInt64](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1571,7 +1571,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
 
 - `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal).
-- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional.
+- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional.
 
 **Returned values**
 
diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index beb7a0503b9..da8ed1f51ba 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -30,7 +30,7 @@ At least four data points are required in `series` to detect outliers.
 
 **Returned value**
 
-- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../../sql-reference/data-types/array.md).
+- Returns an array of the same length as the input array where each value represents score of possible anomaly of corresponding element in the series. A non-zero score indicates a possible anomaly. [Array](../data-types/array.md).
 
 **Examples**
 
@@ -79,8 +79,8 @@ seriesPeriodDetectFFT(series);
 
 **Returned value**
 
-- A real value equal to the period of series data. [Float64](../../sql-reference/data-types/float.md).
-- Returns NAN when number of data points are less than four. [nan](../../sql-reference/data-types/float.md/#nan-and-inf).
+- A real value equal to the period of series data. [Float64](../data-types/float.md).
+- Returns NAN when number of data points are less than four. [nan](../data-types/float.md/#nan-and-inf).
 
 **Examples**
 
@@ -130,7 +130,7 @@ The number of data points in `series` should be at least twice the value of `per
 **Returned value**
 
 - An array of four arrays where the first array include seasonal components, the second array - trend,
-the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../../sql-reference/data-types/array.md).
+the third array - residue component, and the fourth array - baseline(seasonal + trend) component. [Array](../data-types/array.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md
index 2b5f093c149..2cec1987c20 100644
--- a/docs/en/sql-reference/functions/time-window-functions.md
+++ b/docs/en/sql-reference/functions/time-window-functions.md
@@ -17,8 +17,8 @@ tumble(time_attr, interval [, timezone])
 ```
 
 **Arguments**
-- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type.
-- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
+- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
+- `interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type.
 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). 
 
 **Returned values**
@@ -51,9 +51,9 @@ hop(time_attr, hop_interval, window_interval [, timezone])
 
 **Arguments**
 
-- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type.
-- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number.
-- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number.
+- `time_attr` - Date and time. [DateTime](../data-types/datetime.md) data type.
+- `hop_interval` - Hop interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
+- `window_interval` - Window interval in [Interval](../data-types/special-data-types/interval.md) data type. Should be a positive number.
 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). 
 
 **Returned values**
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index b4fa442a637..0663be08240 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -35,7 +35,7 @@ tupleElement(tuple, name, [, default_value])
 
 ## untuple
 
-Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location.
+Performs syntactic substitution of [tuple](../data-types/tuple.md#tuplet1-t2) elements in the call location.
 
 The names of the result columns are implementation-specific and subject to change. Do not assume specific column names after `untuple`.
 
@@ -49,7 +49,7 @@ You can use the `EXCEPT` expression to skip columns as a result of the query.
 
 **Arguments**
 
-- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md).
+- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
@@ -111,7 +111,7 @@ Result:
 
 **See Also**
 
-- [Tuple](../../sql-reference/data-types/tuple.md)
+- [Tuple](../data-types/tuple.md)
 
 ## tupleHammingDistance
 
@@ -125,8 +125,8 @@ tupleHammingDistance(tuple1, tuple2)
 
 **Arguments**
 
-- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
-- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple1` — First tuple. [Tuple](../data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md).
 
 Tuples should have the same type of the elements.
 
@@ -198,11 +198,11 @@ tupleToNameValuePairs(tuple)
 
 **Arguments**
 
-- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values.
+- `tuple` — Named tuple. [Tuple](../data-types/tuple.md) with any types of values.
 
 **Returned value**
 
-- An array with (name, value) pairs. [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
+- An array with (name, value) pairs. [Array](../data-types/array.md)([Tuple](../data-types/tuple.md)([String](../data-types/string.md), ...)).
 
 **Example**
 
@@ -273,12 +273,12 @@ Alias: `vectorSum`.
 
 **Arguments**
 
-- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
-- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple1` — First tuple. [Tuple](../data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Tuple with the sum. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the sum. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -310,12 +310,12 @@ Alias: `vectorDifference`.
 
 **Arguments**
 
-- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
-- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple1` — First tuple. [Tuple](../data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Tuple with the result of subtraction. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of subtraction. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -345,12 +345,12 @@ tupleMultiply(tuple1, tuple2)
 
 **Arguments**
 
-- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
-- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple1` — First tuple. [Tuple](../data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Tuple with the multiplication. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the multiplication. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -380,12 +380,12 @@ tupleDivide(tuple1, tuple2)
 
 **Arguments**
 
-- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md).
-- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple1` — First tuple. [Tuple](../data-types/tuple.md).
+- `tuple2` — Second tuple. [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Tuple with the result of division. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of division. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -415,11 +415,11 @@ tupleNegate(tuple)
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
 
 **Returned value**
 
-- Tuple with the result of negation. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with the result of negation. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -449,12 +449,12 @@ tupleMultiplyByNumber(tuple, number)
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
-- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
+- `number` — Multiplier. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 
 **Returned value**
 
-- Tuple with multiplied values. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with multiplied values. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -484,12 +484,12 @@ tupleDivideByNumber(tuple, number)
 
 **Arguments**
 
-- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md).
-- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
+- `tuple` — [Tuple](../data-types/tuple.md).
+- `number` — Divider. [Int/UInt](../data-types/int-uint.md), [Float](../data-types/float.md) or [Decimal](../data-types/decimal.md).
 
 **Returned value**
 
-- Tuple with divided values. [Tuple](../../sql-reference/data-types/tuple.md).
+- Tuple with divided values. [Tuple](../data-types/tuple.md).
 
 **Example**
 
@@ -517,7 +517,7 @@ tupleConcat(tuples)
 
 **Arguments**
 
-- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type.
+- `tuples` – Arbitrary number of arguments of [Tuple](../data-types/tuple.md) type.
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md
index f02c8fde06c..d9c18e2a0a2 100644
--- a/docs/en/sql-reference/functions/tuple-map-functions.md
+++ b/docs/en/sql-reference/functions/tuple-map-functions.md
@@ -6,7 +6,7 @@ sidebar_label: Maps
 
 ## map
 
-Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type.
+Arranges `key:value` pairs into [Map(key, value)](../data-types/map.md) data type.
 
 **Syntax**
 
@@ -16,12 +16,12 @@ map(key1, value1[, key2, value2, ...])
 
 **Arguments**
 
-- `key` — The key part of the pair. Arbitrary type, except [Nullable](../../sql-reference/data-types/nullable.md) and [LowCardinality](../../sql-reference/data-types/lowcardinality.md) nested with [Nullable](../../sql-reference/data-types/nullable.md).
-- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md).
+- `key` — The key part of the pair. Arbitrary type, except [Nullable](../data-types/nullable.md) and [LowCardinality](../data-types/lowcardinality.md) nested with [Nullable](../data-types/nullable.md).
+- `value` — The value part of the pair. Arbitrary type, including [Map](../data-types/map.md) and [Array](../data-types/array.md).
 
 **Returned value**
 
-- Data structure as `key:value` pairs. [Map(key, value)](../../sql-reference/data-types/map.md).
+- Data structure as `key:value` pairs. [Map(key, value)](../data-types/map.md).
 
 **Examples**
 
@@ -61,11 +61,11 @@ Result:
 
 **See Also**
 
-- [Map(key, value)](../../sql-reference/data-types/map.md) data type
+- [Map(key, value)](../data-types/map.md) data type
 
 ## mapFromArrays
 
-Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing.
+Merges an [Array](../data-types/array.md) of keys and an [Array](../data-types/array.md) of values into a [Map(key, value)](../data-types/map.md). Notice that the second argument could also be a [Map](../data-types/map.md), thus it is casted to an Array when executing.
 
 
 The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`.
@@ -81,7 +81,7 @@ Alias: `MAP_FROM_ARRAYS(keys, values)`
 
 **Arguments**
 
-- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md)
+- `keys` — Given key array to create a map from. The nested type of array must be: [String](../data-types/string.md), [Integer](../data-types/int-uint.md), [LowCardinality](../data-types/lowcardinality.md), [FixedString](../data-types/fixedstring.md), [UUID](../data-types/uuid.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [Date32](../data-types/date32.md), [Enum](../data-types/enum.md)
 - `values`  - Given value array or map to create a map from.
 
 **Returned value**
@@ -109,7 +109,7 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
 
 ## extractKeyValuePairs
 
-Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files).
+Extracts key-value pairs, i.e. a [Map(String, String)](../data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files).
 
 A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported. 
 
@@ -125,14 +125,14 @@ Alias:
 
 **Arguments**
 
-- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
-- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
+- `data` - String to extract key-value pairs from. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned values**
 
-- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs.
+- A [Map(String, String)](../data-types/map.md) of key-value pairs.
 
 **Examples**
 
@@ -221,11 +221,11 @@ mapAdd(arg1, arg2 [, ...])
 
 **Arguments**
 
-Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
+Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promoted to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
 **Returned value**
 
-- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
+- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
 
 **Example**
 
@@ -269,11 +269,11 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
 
 **Arguments**
 
-Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
+Arguments are [maps](../data-types/map.md) or [tuples](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../data-types/int-uint.md#int-ranges), [UInt64](../data-types/int-uint.md#uint-ranges) or [Float64](../data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array.
 
 **Returned value**
 
-- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
+- Depending on the arguments returns one [map](../data-types/map.md) or [tuple](../data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values.
 
 **Example**
 
@@ -322,21 +322,21 @@ For array arguments the number of elements in `keys` and `values` must be the sa
 
 **Arguments**
 
-Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
+Arguments are [maps](../data-types/map.md) or two [arrays](../data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
 
 Mapped arrays:
 
-- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
-- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
-- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
+- `keys` — Array of keys. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
+- `values` — Array of values. [Array](../data-types/array.md#data-type-array)([Int](../data-types/int-uint.md#uint-ranges)).
+- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../data-types/int-uint.md#int-ranges).
 
 or
 
-- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map with integer keys. [Map](../data-types/map.md).
 
 **Returned value**
 
-- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
+- Depending on the arguments returns a [map](../data-types/map.md) or a [tuple](../data-types/tuple.md#tuplet1-t2) of two [arrays](../data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
 
 **Example**
 
@@ -380,12 +380,12 @@ mapContains(map, key)
 
 **Arguments**
 
-- `map` — Map. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map. [Map](../data-types/map.md).
 - `key` — Key. Type matches the type of keys of `map` parameter.
 
 **Returned value**
 
-- `1` if `map` contains `key`, `0` if not. [UInt8](../../sql-reference/data-types/int-uint.md).
+- `1` if `map` contains `key`, `0` if not. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -413,7 +413,7 @@ Result:
 
 Returns all keys from the `map` parameter.
 
-Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`.
+Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [keys](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapKeys(m) FROM table` transforms to `SELECT m.keys FROM table`.
 
 **Syntax**
 
@@ -423,11 +423,11 @@ mapKeys(map)
 
 **Arguments**
 
-- `map` — Map. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map. [Map](../data-types/map.md).
 
 **Returned value**
 
-- Array containing all keys from the `map`. [Array](../../sql-reference/data-types/array.md).
+- Array containing all keys from the `map`. [Array](../data-types/array.md).
 
 **Example**
 
@@ -454,7 +454,7 @@ Result:
 
 Returns all values from the `map` parameter.
 
-Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`.
+Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [values](../data-types/map.md#map-subcolumns) subcolumn instead of reading and processing the whole column data. The query `SELECT mapValues(m) FROM table` transforms to `SELECT m.values FROM table`.
 
 **Syntax**
 
@@ -464,11 +464,11 @@ mapValues(map)
 
 **Arguments**
 
-- `map` — Map. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map. [Map](../data-types/map.md).
 
 **Returned value**
 
-- Array containing all the values from `map`. [Array](../../sql-reference/data-types/array.md).
+- Array containing all the values from `map`. [Array](../data-types/array.md).
 
 **Example**
 
@@ -500,7 +500,7 @@ mapContainsKeyLike(map, pattern)
 ```
 
 **Arguments**
-- `map` — Map. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map. [Map](../data-types/map.md).
 - `pattern`  - String pattern to match.
 
 **Returned value**
@@ -538,7 +538,7 @@ mapExtractKeyLike(map, pattern)
 
 **Arguments**
 
-- `map` — Map. [Map](../../sql-reference/data-types/map.md).
+- `map` — Map. [Map](../data-types/map.md).
 - `pattern`  - String pattern to match.
 
 **Returned value**
@@ -577,7 +577,7 @@ mapApply(func, map)
 **Arguments**
 
 - `func`  - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
-- `map` — [Map](../../sql-reference/data-types/map.md).
+- `map` — [Map](../data-types/map.md).
 
 **Returned value**
 
@@ -617,7 +617,7 @@ mapFilter(func, map)
 **Arguments**
 
 - `func`  - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
-- `map` — [Map](../../sql-reference/data-types/map.md).
+- `map` — [Map](../data-types/map.md).
 
 **Returned value**
 
@@ -658,8 +658,8 @@ mapUpdate(map1, map2)
 
 **Arguments**
 
-- `map1` [Map](../../sql-reference/data-types/map.md).
-- `map2` [Map](../../sql-reference/data-types/map.md).
+- `map1` [Map](../data-types/map.md).
+- `map2` [Map](../data-types/map.md).
 
 **Returned value**
 
@@ -691,7 +691,7 @@ mapConcat(maps)
 
 **Arguments**
 
--   `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type.
+-   `maps` – Arbitrary number of arguments of [Map](../data-types/map.md) type.
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index f1c2e92f201..d123f317dc6 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -51,7 +51,7 @@ SETTINGS cast_keep_nullable = 1
 
 ## toInt(8\|16\|32\|64\|128\|256)
 
-Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
+Converts an input value to a value the [Int](../data-types/int-uint.md) data type. This function family includes:
 
 - `toInt8(expr)` — Converts to a value of data type `Int8`.
 - `toInt16(expr)` — Converts to a value of data type `Int16`.
@@ -62,7 +62,7 @@ Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/i
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
+- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
 **Returned value**
 
@@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
 
 Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
 
-The behavior of functions for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
+The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
 
 **Example**
 
@@ -90,7 +90,7 @@ Result:
 
 ## toInt(8\|16\|32\|64\|128\|256)OrZero
 
-Takes an argument of type [String](/docs/en/sql-reference/data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
+Takes an argument of type [String](../data-types/string.md) and tries to parse it into an Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If unsuccessful, returns `0`.
 
 **Example**
 
@@ -151,7 +151,7 @@ Result:
 
 ## toUInt(8\|16\|32\|64\|256)
 
-Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes:
+Converts an input value to the [UInt](../data-types/int-uint.md) data type. This function family includes:
 
 - `toUInt8(expr)` — Converts to a value of data type `UInt8`.
 - `toUInt16(expr)` — Converts to a value of data type `UInt16`.
@@ -161,7 +161,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
+- `expr` — [Expression](../syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped.
 
 **Returned value**
 
@@ -169,7 +169,7 @@ Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint
 
 Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
 
-The behavior of functions for negative arguments and for the [NaN and Inf](/docs/en/sql-reference/data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
+The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
 
 **Example**
 
@@ -203,9 +203,9 @@ Result:
 
 ## toDate
 
-Converts the argument to [Date](/docs/en/sql-reference/data-types/date.md) data type.
+Converts the argument to [Date](../data-types/date.md) data type.
 
-If the argument is [DateTime](/docs/en/sql-reference/data-types/datetime.md) or [DateTime64](/docs/en/sql-reference/data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
+If the argument is [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md), it truncates it and leaves the date component of the DateTime:
 
 ```sql
 SELECT
@@ -219,7 +219,7 @@ SELECT
 └─────────────────────┴───────────────┘
 ```
 
-If the argument is a [String](/docs/en/sql-reference/data-types/string.md), it is parsed as [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). If it was parsed as [DateTime](/docs/en/sql-reference/data-types/datetime.md), the date component is being used:
+If the argument is a [String](../data-types/string.md), it is parsed as [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md). If it was parsed as [DateTime](../data-types/datetime.md), the date component is being used:
 
 ```sql
 SELECT
@@ -247,7 +247,7 @@ SELECT
 └────────────┴───────────────────────────────────────────┘
 ```
 
-If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](/docs/en/sql-reference/data-types/datetime.md), then truncated to [Date](/docs/en/sql-reference/data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](/docs/en/sql-reference/data-types/date.md) depends on the timezone:
+If the argument is a number and looks like a UNIX timestamp (is greater than 65535), it is interpreted as a [DateTime](../data-types/datetime.md), then truncated to [Date](../data-types/date.md) in the current timezone. The timezone argument can be specified as a second argument of the function. The truncation to [Date](../data-types/date.md) depends on the timezone:
 
 ```sql
 SELECT
@@ -276,7 +276,7 @@ date_Samoa_2:     2022-12-31
 
 The example above demonstrates how the same UNIX timestamp can be interpreted as different dates in different time zones.
 
-If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](/docs/en/sql-reference/data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
+If the argument is a number and it is smaller than 65536, it is interpreted as the number of days since 1970-01-01 (the first UNIX day) and converted to [Date](../data-types/date.md). It corresponds to the internal numeric representation of the `Date` data type. Example:
 
 ```sql
 SELECT toDate(12345)
@@ -317,7 +317,7 @@ SELECT
 
 ## toDateOrZero
 
-The same as [toDate](#todate) but returns lower boundary of [Date](/docs/en/sql-reference/data-types/date.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
+The same as [toDate](#todate) but returns lower boundary of [Date](../data-types/date.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported.
 
 **Example**
 
@@ -338,7 +338,7 @@ Result:
 
 ## toDateOrNull
 
-The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
+The same as [toDate](#todate) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported.
 
 **Example**
 
@@ -359,7 +359,7 @@ Result:
 
 ## toDateOrDefault
 
-Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](/docs/en/sql-reference/data-types/date.md).
+Like [toDate](#todate) but if unsuccessful, returns a default value which is either the second argument (if specified), or otherwise the lower boundary of [Date](../data-types/date.md).
 
 **Syntax**
 
@@ -386,7 +386,7 @@ Result:
 
 ## toDateTime
 
-Converts an input value to [DateTime](/docs/en/sql-reference/data-types/datetime.md).
+Converts an input value to [DateTime](../data-types/datetime.md).
 
 **Syntax**
 
@@ -396,18 +396,18 @@ toDateTime(expr[, time_zone ])
 
 **Arguments**
 
-- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
-- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md).
+- `expr` — The value. [String](../data-types/string.md), [Int](../data-types/int-uint.md), [Date](../data-types/date.md) or [DateTime](../data-types/datetime.md).
+- `time_zone` — Time zone. [String](../data-types/string.md).
 
 :::note
 If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).  
-If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.  
+If `expr` is a [String](../data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.  
 Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed.
 :::
 
 **Returned value**
 
-- A date time. [DateTime](/docs/en/sql-reference/data-types/datetime.md)
+- A date time. [DateTime](../data-types/datetime.md)
 
 **Example**
 
@@ -428,7 +428,7 @@ Result:
 
 ## toDateTimeOrZero
 
-The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md) if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
+The same as [toDateTime](#todatetime) but returns lower boundary of [DateTime](../data-types/datetime.md) if an invalid argument is received. Only [String](../data-types/string.md) argument is supported.
 
 **Example**
 
@@ -449,7 +449,7 @@ Result:
 
 ## toDateTimeOrNull
 
-The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](/docs/en/sql-reference/data-types/string.md) argument is supported.
+The same as [toDateTime](#todatetime) but returns `NULL` if an invalid argument is received. Only [String](../data-types/string.md) argument is supported.
 
 **Example**
 
@@ -470,7 +470,7 @@ Result:
 
 ## toDateTimeOrDefault
 
-Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](/docs/en/sql-reference/data-types/datetime.md).
+Like [toDateTime](#todatetime) but if unsuccessful, returns a default value which is either the third argument (if specified), or otherwise the lower boundary of [DateTime](../data-types/datetime.md).
 
 **Syntax**
 
@@ -497,7 +497,7 @@ Result:
 
 ## toDate32
 
-Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account.
+Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32` returns the border values supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account.
 
 **Syntax**
 
@@ -507,11 +507,11 @@ toDate32(expr)
 
 **Arguments**
 
-- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md).
+- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md) or [Date](../data-types/date.md).
 
 **Returned value**
 
-- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md).
+- A calendar date. Type [Date32](../data-types/date32.md).
 
 **Example**
 
@@ -539,7 +539,7 @@ SELECT toDate32('1899-01-01') AS value, toTypeName(value);
 └────────────┴────────────────────────────────────┘
 ```
 
-3. With [Date](/docs/en/sql-reference/data-types/date.md) argument:
+3. With [Date](../data-types/date.md) argument:
 
 ``` sql
 SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
@@ -553,7 +553,7 @@ SELECT toDate32(toDate('1899-01-01')) AS value, toTypeName(value);
 
 ## toDate32OrZero
 
-The same as [toDate32](#todate32) but returns the min value of [Date32](/docs/en/sql-reference/data-types/date32.md) if an invalid argument is received.
+The same as [toDate32](#todate32) but returns the min value of [Date32](../data-types/date32.md) if an invalid argument is received.
 
 **Example**
 
@@ -593,7 +593,7 @@ Result:
 
 ## toDate32OrDefault
 
-Converts the argument to the [Date32](/docs/en/sql-reference/data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](/docs/en/sql-reference/data-types/date32.md). If the argument has [Date](/docs/en/sql-reference/data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
+Converts the argument to the [Date32](../data-types/date32.md) data type. If the value is outside the range, `toDate32OrDefault` returns the lower border value supported by [Date32](../data-types/date32.md). If the argument has [Date](../data-types/date.md) type, it's borders are taken into account. Returns default value if an invalid argument is received.
 
 **Example**
 
@@ -615,7 +615,7 @@ Result:
 
 ## toDateTime64
 
-Converts the argument to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type.
+Converts the argument to the [DateTime64](../data-types/datetime64.md) data type.
 
 **Syntax**
 
@@ -625,13 +625,13 @@ toDateTime64(expr, scale, [timezone])
 
 **Arguments**
 
-- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
+- `expr` — The value. [String](../data-types/string.md), [UInt32](../data-types/int-uint.md), [Float](../data-types/float.md) or [DateTime](../data-types/datetime.md).
 - `scale` - Tick size (precision): 10<sup>-precision</sup> seconds. Valid range: [ 0 : 9 ].
 - `timezone` - Time zone of the specified datetime64 object.
 
 **Returned value**
 
-- A calendar date and time of day, with sub-second precision. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
+- A calendar date and time of day, with sub-second precision. [DateTime64](../data-types/datetime64.md).
 
 **Example**
 
@@ -692,7 +692,7 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN
 
 ## toDecimal(32\|64\|128\|256)
 
-Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.
+Converts `value` to the [Decimal](../data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.
 
 - `toDecimal32(value, S)`
 - `toDecimal64(value, S)`
@@ -701,7 +701,7 @@ Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md)
 
 ## toDecimal(32\|64\|128\|256)OrNull
 
-Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes:
+Converts an input string to a [Nullable(Decimal(P,S))](../data-types/decimal.md) data type value. This family of functions includes:
 
 - `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type.
 - `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type.
@@ -712,7 +712,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
+- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 - `S` — Scale, the number of decimal places in the resulting value.
 
 **Returned value**
@@ -755,7 +755,7 @@ Result:
 
 ## toDecimal(32\|64\|128\|256)OrDefault
 
-Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes:
+Converts an input string to a [Decimal(P,S)](../data-types/decimal.md) data type value. This family of functions includes:
 
 - `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type.
 - `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type.
@@ -766,7 +766,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
+- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 - `S` — Scale, the number of decimal places in the resulting value.
 
 **Returned value**
@@ -808,7 +808,7 @@ Result:
 
 ## toDecimal(32\|64\|128\|256)OrZero
 
-Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes:
+Converts an input value to the [Decimal(P,S)](../data-types/decimal.md) data type. This family of functions includes:
 
 - `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type.
 - `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type.
@@ -819,7 +819,7 @@ These functions should be used instead of `toDecimal*()` functions, if you prefe
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
+- `expr` — [Expression](../syntax.md/#syntax-expressions), returns a value in the [String](../data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`.
 - `S` — Scale, the number of decimal places in the resulting value.
 
 **Returned value**
@@ -919,7 +919,7 @@ Also see the `toUnixTimestamp` function.
 
 ## toFixedString(s, N)
 
-Converts a [String](/docs/en/sql-reference/data-types/string.md) type argument to a [FixedString(N)](/docs/en/sql-reference/data-types/fixedstring.md) type (a string of fixed length N).
+Converts a [String](../data-types/string.md) type argument to a [FixedString(N)](../data-types/fixedstring.md) type (a string of fixed length N).
 If the string has fewer bytes than N, it is padded with null bytes to the right. If the string has more bytes than N, an exception is thrown.
 
 ## toStringCutToZero(s)
@@ -968,14 +968,14 @@ toDecimalString(number, scale)
 
 **Arguments**
 
-- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
-- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-    * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
-    * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
+- `number` — Value to be represented as String, [Int, UInt](../data-types/int-uint.md), [Float](../data-types/float.md), [Decimal](../data-types/decimal.md),
+- `scale` — Number of fractional digits, [UInt8](../data-types/int-uint.md).
+    * Maximum scale for [Decimal](../data-types/decimal.md) and [Int, UInt](../data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
+    * Maximum scale for [Float](../data-types/float.md) is 60.
 
 **Returned value**
 
-- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
+- Input value represented as [String](../data-types/string.md) with given number of fractional digits (scale).
     The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
 
 **Example**
@@ -1017,7 +1017,7 @@ This function accepts a number or date or date with time and returns a FixedStri
 ## reinterpretAsUUID
 
 :::note
-In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md).
+In addition to the UUID functions listed here, there is dedicated [UUID function documentation](../functions/uuid-functions.md).
 :::
 
 Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
@@ -1030,11 +1030,11 @@ reinterpretAsUUID(fixed_string)
 
 **Arguments**
 
-- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring).
+- `fixed_string` — Big-endian byte string. [FixedString](../data-types/fixedstring.md/#fixedstring).
 
 **Returned value**
 
-- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type).
+- The UUID type value. [UUID](../data-types/uuid.md/#uuid-data-type).
 
 **Examples**
 
@@ -1087,7 +1087,7 @@ reinterpret(x, type)
 **Arguments**
 
 - `x` — Any type.
-- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md).
+- `type` — Destination type. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -1126,7 +1126,7 @@ x::t
 **Arguments**
 
 - `x` — A value to convert. May be of any type.
-- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md).
+- `T` — The name of the target data type. [String](../data-types/string.md).
 - `t` — The target data type.
 
 **Returned value**
@@ -1175,9 +1175,9 @@ Result:
 └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘
 ```
 
-Conversion to [FixedString (N)](/docs/en/sql-reference/data-types/fixedstring.md) only works for arguments of type [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md).
+Conversion to [FixedString (N)](../data-types/fixedstring.md) only works for arguments of type [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
-Type conversion to [Nullable](/docs/en/sql-reference/data-types/nullable.md) and back is supported.
+Type conversion to [Nullable](../data-types/nullable.md) and back is supported.
 
 **Example**
 
@@ -1251,7 +1251,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c
 
 ## accurateCastOrNull(x, T)
 
-Converts input value `x` to the specified data type `T`. Always returns [Nullable](/docs/en/sql-reference/data-types/nullable.md) type and returns [NULL](/docs/en/sql-reference/syntax.md/#null-literal) if the casted value is not representable in the target type.
+Converts input value `x` to the specified data type `T`. Always returns [Nullable](../data-types/nullable.md) type and returns [NULL](../syntax.md/#null-literal) if the casted value is not representable in the target type.
 
 **Syntax**
 
@@ -1360,7 +1360,7 @@ Result:
 
 ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second)
 
-Converts a Number type argument to an [Interval](/docs/en/sql-reference/data-types/special-data-types/interval.md) data type.
+Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type.
 
 **Syntax**
 
@@ -1407,9 +1407,9 @@ Result:
 
 ## parseDateTime {#type_conversion_functions-parseDateTime}
 
-Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
+Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
 
-This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).
+This function is the opposite operation of function [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime).
 
 **Syntax**
 
@@ -1429,7 +1429,7 @@ Returns DateTime values parsed from input string according to a MySQL style form
 
 **Supported format specifiers**
 
-All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
+All format specifiers listed in [formatDateTime](../functions/date-time-functions.md#date_time_functions-formatDateTime) except:
 - %Q: Quarter (1-4)
 
 **Example**
@@ -1458,7 +1458,7 @@ Alias: `str_to_date`.
 
 Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
 
-This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
+This function is the opposite operation of function [formatDateTimeInJodaSyntax](../functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).
 
 **Syntax**
 
@@ -1478,7 +1478,7 @@ Returns DateTime values parsed from input string according to a Joda style forma
 
 **Supported format specifiers**
 
-All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except:
+All format specifiers listed in [formatDateTimeInJoda](../functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except:
 - S: fraction of second
 - z: time zone
 - Z: time zone offset/id
@@ -1504,7 +1504,7 @@ Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTime
 ## parseDateTimeBestEffort
 ## parseDateTime32BestEffort
 
-Converts a date and time in the [String](/docs/en/sql-reference/data-types/string.md) representation to [DateTime](/docs/en/sql-reference/data-types/datetime.md/#data_type-datetime) data type.
+Converts a date and time in the [String](../data-types/string.md) representation to [DateTime](../data-types/datetime.md/#data_type-datetime) data type.
 
 The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 1123 - 5.2.14 RFC-822 Date and Time Specification](https://tools.ietf.org/html/rfc1123#page-55), ClickHouse’s and some other date and time formats.
 
@@ -1516,8 +1516,8 @@ parseDateTimeBestEffort(time_string [, time_zone])
 
 **Arguments**
 
-- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md).
-- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md).
+- `time_string` — String containing a date and time to convert. [String](../data-types/string.md).
+- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../data-types/string.md).
 
 **Supported non-standard formats**
 
@@ -1533,7 +1533,7 @@ If the year is not specified, it is considered to be equal to the current year.
 
 **Returned value**
 
-- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
+- `time_string` converted to the [DateTime](../data-types/datetime.md) data type.
 
 **Examples**
 
@@ -1665,7 +1665,7 @@ Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except
 
 ## parseDateTime64BestEffort
 
-Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](/docs/en/sql-reference/functions/type-conversion-functions.md/#data_type-datetime) data type.
+Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../functions/type-conversion-functions.md/#data_type-datetime) data type.
 
 **Syntax**
 
@@ -1675,13 +1675,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]])
 
 **Arguments**
 
-- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md).
-- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
-- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
+- `time_string` — String containing a date or date with time to convert. [String](../data-types/string.md).
+- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
 
 **Returned value**
 
-- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type.
+- `time_string` converted to the [DateTime](../data-types/datetime.md) data type.
 
 **Examples**
 
@@ -1731,7 +1731,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
 
 ## toLowCardinality
 
-Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type.
+Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type.
 
 To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`.
 
@@ -1743,7 +1743,7 @@ toLowCardinality(expr)
 
 **Arguments**
 
-- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
+- `expr` — [Expression](../syntax.md/#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md/#data_types).
 
 **Returned values**
 
@@ -1978,7 +1978,7 @@ Result:
 
 ## snowflakeToDateTime
 
-Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](/docs/en/sql-reference/data-types/datetime.md) format.
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
 
 **Syntax**
 
@@ -1988,12 +1988,12 @@ snowflakeToDateTime(value[, time_zone])
 
 **Arguments**
 
-- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
-- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
+- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
 
 **Returned value**
 
-- The timestamp component of `value` as a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value.
+- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
 
 **Example**
 
@@ -2014,7 +2014,7 @@ Result:
 
 ## snowflakeToDateTime64
 
-Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) format.
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
 
 **Syntax**
 
@@ -2024,12 +2024,12 @@ snowflakeToDateTime64(value[, time_zone])
 
 **Arguments**
 
-- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md).
-- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md).
+- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
 
 **Returned value**
 
-- The timestamp component of `value` as a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
+- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
 
 **Example**
 
@@ -2050,7 +2050,7 @@ Result:
 
 ## dateTimeToSnowflake
 
-Converts a [DateTime](/docs/en/sql-reference/data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
 
 **Syntax**
 
@@ -2060,11 +2060,11 @@ dateTimeToSnowflake(value)
 
 **Arguments**
 
-- `value` — Date with time. [DateTime](/docs/en/sql-reference/data-types/datetime.md).
+- `value` — Date with time. [DateTime](../data-types/datetime.md).
 
 **Returned value**
 
-- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
+- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
 
 **Example**
 
@@ -2084,7 +2084,7 @@ Result:
 
 ## dateTime64ToSnowflake
 
-Convert a [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
 
 **Syntax**
 
@@ -2094,11 +2094,11 @@ dateTime64ToSnowflake(value)
 
 **Arguments**
 
-- `value` — Date with time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md).
+- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
 
 **Returned value**
 
-- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time.
+- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md
index b4e3fc2d164..dc6a803d638 100644
--- a/docs/en/sql-reference/functions/ulid-functions.md
+++ b/docs/en/sql-reference/functions/ulid-functions.md
@@ -18,7 +18,7 @@ generateULID([x])
 
 **Arguments**
 
-- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter.
+- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter.
 
 **Returned value**
 
@@ -60,12 +60,12 @@ ULIDStringToDateTime(ulid[, timezone])
 
 **Arguments**
 
-- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md).
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
+- `ulid` — Input ULID. [String](../data-types/string.md) or [FixedString(26)](../data-types/fixedstring.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
 
 **Returned value**
 
-- Timestamp with milliseconds precision. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
+- Timestamp with milliseconds precision. [DateTime64(3)](../data-types/datetime64.md).
 
 **Usage example**
 
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index cc826b0bba4..130f0147ca1 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -28,7 +28,7 @@ domain(url)
 
 **Arguments**
 
-- `url` — URL. [String](../../sql-reference/data-types/string.md).
+- `url` — URL. [String](../data-types/string.md).
 
 The URL can be specified with or without a scheme. Examples:
 
@@ -77,7 +77,7 @@ topLevelDomain(url)
 
 **Arguments**
 
-- `url` — URL. [String](../../sql-reference/data-types/string.md).
+- `url` — URL. [String](../data-types/string.md).
 
 The URL can be specified with or without a scheme. Examples:
 
@@ -89,8 +89,8 @@ https://clickhouse.com/time/
 
 **Returned values**
 
-- Domain name. If ClickHouse can parse the input string as a URL. [String](../../sql-reference/data-types/string.md).
-- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../../sql-reference/data-types/string.md).
+- Domain name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md).
+- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../data-types/string.md).
 
 **Example**
 
@@ -153,12 +153,12 @@ cutToFirstSignificantSubdomainCustom(URL, TLD)
 
 **Arguments**
 
-- `URL` — URL. [String](../../sql-reference/data-types/string.md).
-- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+- `URL` — URL. [String](../data-types/string.md).
+- `TLD` — Custom TLD list name. [String](../data-types/string.md).
 
 **Returned value**
 
-- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../../sql-reference/data-types/string.md).
+- Part of the domain that includes top-level subdomains up to the first significant subdomain. [String](../data-types/string.md).
 
 **Example**
 
@@ -205,12 +205,12 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD)
 
 **Arguments**
 
-- `URL` — URL. [String](../../sql-reference/data-types/string.md).
-- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+- `URL` — URL. [String](../data-types/string.md).
+- `TLD` — Custom TLD list name. [String](../data-types/string.md).
 
 **Returned value**
 
-- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../../sql-reference/data-types/string.md).
+- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. [String](../data-types/string.md).
 
 **Example**
 
@@ -257,12 +257,12 @@ firstSignificantSubdomainCustom(URL, TLD)
 
 **Arguments**
 
-- `URL` — URL. [String](../../sql-reference/data-types/string.md).
-- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md).
+- `URL` — URL. [String](../data-types/string.md).
+- `TLD` — Custom TLD list name. [String](../data-types/string.md).
 
 **Returned value**
 
-- First significant subdomain. [String](../../sql-reference/data-types/string.md).
+- First significant subdomain. [String](../data-types/string.md).
 
 **Example**
 
@@ -408,7 +408,7 @@ netloc(URL)
 
 **Arguments**
 
-- `url` — URL. [String](../../sql-reference/data-types/string.md).
+- `url` — URL. [String](../data-types/string.md).
 
 **Returned value**
 
@@ -462,8 +462,8 @@ cutURLParameter(URL, name)
 
 **Arguments**
 
-- `url` — URL. [String](../../sql-reference/data-types/string.md).
-- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings.
+- `url` — URL. [String](../data-types/string.md).
+- `name` — name of URL parameter. [String](../data-types/string.md) or [Array](../data-types/array.md) of Strings.
 
 **Returned value**
 
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index a16663afc5b..a4e4037eedc 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -18,7 +18,7 @@ generateUUIDv4([expr])
 
 **Arguments**
 
-- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
 
 **Returned value**
 
@@ -90,7 +90,7 @@ generateUUIDv7([expr])
 
 **Arguments**
 
-- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
 
 **Returned value**
 
@@ -163,7 +163,7 @@ generateUUIDv7ThreadMonotonic([expr])
 
 **Arguments**
 
-- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
 
 **Returned value**
 
@@ -233,7 +233,7 @@ generateUUIDv7NonMonotonic([expr])
 
 **Arguments**
 
-- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
 
 **Returned value**
 
@@ -379,8 +379,8 @@ Result:
 
 **Arguments**
 
-- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string).
-- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md).
+- `string` — String of 36 characters or FixedString(36). [String](../syntax.md#string).
+- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](../data-types/uuid.md).
 
 **Returned value**
 
@@ -478,7 +478,7 @@ Result:
 
 ## UUIDStringToNum
 
-Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default).
+Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default).
 
 **Syntax**
 
@@ -488,7 +488,7 @@ UUIDStringToNum(string[, variant = 1])
 
 **Arguments**
 
-- `string` — A [String](../../sql-reference/syntax.md#syntax-string-literal) of 36 characters or [FixedString](../../sql-reference/syntax.md#syntax-string-literal)
+- `string` — A [String](../syntax.md#syntax-string-literal) of 36 characters or [FixedString](../syntax.md#syntax-string-literal)
 - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
 
 **Returned value**
@@ -537,7 +537,7 @@ UUIDNumToString(binary[, variant = 1])
 
 **Arguments**
 
-- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID.
+- `binary` — [FixedString(16)](../data-types/fixedstring.md) as a binary representation of a UUID.
 - `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
 
 **Returned value**
@@ -576,7 +576,7 @@ Result:
 
 ## UUIDToNum
 
-Accepts a [UUID](../../sql-reference/data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID.
+Accepts a [UUID](../data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID.
 
 **Syntax**
 
@@ -636,11 +636,11 @@ UUIDv7ToDateTime(uuid[, timezone])
 **Arguments**
 
 - `uuid` — [UUID](../data-types/uuid.md) of version 7.
-- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
 
 **Returned value**
 
-- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md).
+- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](../data-types/datetime64.md).
 
 **Usage examples**
 
@@ -684,4 +684,4 @@ serverUUID()
 
 ## See also
 
-- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other)
+- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other)
diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md
index 043686889c4..03251f0b9af 100644
--- a/docs/en/sql-reference/functions/ym-dict-functions.md
+++ b/docs/en/sql-reference/functions/ym-dict-functions.md
@@ -432,13 +432,13 @@ regionIn(lhs, rhs\[, geobase\])
 
 **Parameters**
 
-- `lhs` — Lhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint).
-- `rhs` — Rhs region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint).
+- `lhs` — Lhs region ID from the geobase. [UInt32](../data-types/int-uint).
+- `rhs` — Rhs region ID from the geobase. [UInt32](../data-types/int-uint).
 - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../data-types/string). Optional.
 
 **Returned value**
 
-- 1, if it belongs. [UInt8](../../sql-reference/data-types/int-uint).
+- 1, if it belongs. [UInt8](../data-types/int-uint).
 - 0, if it doesn't belong.
 
 **Implementation details**

From e87c168bd86a0697621b5692f80b1f64e40337a5 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 06:42:13 +0200
Subject: [PATCH 319/392] Turn multi-line returns into a single line

---
 .../sql-reference/functions/introspection.md  | 13 ++---
 .../functions/splitting-merging-functions.md  |  3 +-
 .../functions/string-search-functions.md      | 58 +++++++------------
 .../functions/time-series-functions.md        |  3 +-
 .../sql-reference/functions/url-functions.md  |  6 +-
 5 files changed, 31 insertions(+), 52 deletions(-)

diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 540e148e3f1..5dc57e70591 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -112,9 +112,11 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
 
 ## addressToLineWithInlines
 
-Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price.
+Similar to `addressToLine`, but returns an Array with all inline functions. As a result of this, it is slower than `addressToLine`.
 
+:::note
 If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
+:::
 
 **Syntax**
 
@@ -128,11 +130,7 @@ addressToLineWithInlines(address_of_binary_instruction)
 
 **Returned value**
 
-- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed.
-
-- Array with single element which is name of a binary, if the function couldn’t find the debug information.
-
-- Empty array, if the address is not valid. [Array(String)](../data-types/array.md).
+- An array whose first element is the source code filename and line number in the file delimited by a colon. From the second element onwards, inline functions' source code filenames, line numbers and function names are listed. If the function couldn’t find the debug information, then an array with a single element equal to the name of the binary is returned, otherwise an empty array is returned if the address is not valid. [Array(String)](../data-types/array.md).
 
 **Example**
 
@@ -324,8 +322,7 @@ demangle(symbol)
 
 **Returned value**
 
-- Name of the C++ function. [String](../data-types/string.md).
-- Empty string if a symbol is not valid. [String](../data-types/string.md).
+- Name of the C++ function, or an empty string if the symbol is not valid. [String](../data-types/string.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 9ec4ee974c4..a3c28504a29 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -328,8 +328,7 @@ extractAllGroups(text, regexp)
 
 **Returned values**
 
-- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). [Array](../data-types/array.md).
-- If there is no matching group, returns an empty array. [Array](../data-types/array.md).
+- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). If there is no matching group, it returns an empty array. [Array](../data-types/array.md).
 
 **Example**
 
diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md
index 07f776906e6..d261cff3580 100644
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@@ -40,7 +40,7 @@ Alias:
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
-**Returned values**
+**Returned value**
 
 - Starting position in bytes and counting from 1, if the substring was found. [UInt64](../data-types/int-uint.md).
 - 0, if the substring was not found. [UInt64](../data-types/int-uint.md).
@@ -206,7 +206,7 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN])
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Substrings to be searched. [Array](../data-types/array.md).
 
-**Returned values**
+**Returned value**
 
 - Array of the starting position in bytes and counting from 1, if the substring was found.
 - 0, if the substring was not found.
@@ -492,8 +492,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN])
 
 **Returned value**
 
-- index (starting from 1) of the leftmost found needle.
-- 0, if there was no match.
+- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -526,8 +525,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN])
 
 **Returned value**
 
-- index (starting from 1) of the leftmost found needle.
-- 0, if there was no match.
+- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -560,8 +558,7 @@ multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN])
 
 **Returned value**
 
-- index (starting from 1) of the leftmost found needle.
-- 0, if there was no match.
+- index (starting from 1) of the leftmost found needle, Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -596,8 +593,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl
 
 **Returned value**
 
-- index (starting from 1) of the leftmost found needle.
-- 0, if there was no match.
+- index (starting from 1) of the leftmost found needle. Otherwise 0, if there was no match. [UInt8](../data-types/int-uint.md).
 
 **Example**
 
@@ -1340,7 +1336,7 @@ countSubstrings(haystack, needle[, start_pos])
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
-**Returned values**
+**Returned value**
 
 - The number of occurrences. [UInt64](../data-types/int-uint.md).
 
@@ -1387,7 +1383,7 @@ countSubstringsCaseInsensitive(haystack, needle[, start_pos])
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
-**Returned values**
+**Returned value**
 
 - The number of occurrences. [UInt64](../data-types/int-uint.md).
 
@@ -1439,7 +1435,7 @@ countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
 - `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../data-types/int-uint.md). Optional.
 
-**Returned values**
+**Returned value**
 
 - The number of occurrences. [UInt64](../data-types/int-uint.md).
 
@@ -1573,7 +1569,7 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`.
 - `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../data-types/int-uint.md). Optional.
 
-**Returned values**
+**Returned value**
 
 `pattern` may contain multiple regexp groups, `index` indicates which regex group to extract. An index of 0 means matching the entire regular expression. [String](../data-types/string.md).
 
@@ -1612,10 +1608,9 @@ hasSubsequence(haystack, needle)
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-**Returned values**
+**Returned value**
 
-- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
-- 0, otherwise. [UInt8](../data-types/int-uint.md).
+- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1648,10 +1643,9 @@ hasSubsequenceCaseInsensitive(haystack, needle)
 - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-**Returned values**
+**Returned value**
 
-- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
-- 0, otherwise. [UInt8](../data-types/int-uint.md).
+- 1, if needle is a subsequence of haystack, 0 otherwise [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1684,10 +1678,9 @@ hasSubsequenceUTF8(haystack, needle)
 - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-**Returned values**
+**Returned value**
 
-- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
-- 0, otherwise. [UInt8](../data-types/int-uint.md).
+- 1, if needle is a subsequence of haystack, 0, otherwise. [UInt8](../data-types/int-uint.md).
 
 Query:
 
@@ -1720,10 +1713,9 @@ hasSubsequenceCaseInsensitiveUTF8(haystack, needle)
 - `haystack` — String in which the search is performed. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
 - `needle` — Subsequence to be searched. UTF-8 encoded [String](../../sql-reference/syntax.md#syntax-string-literal).
 
-**Returned values**
+**Returned value**
 
-- 1, if needle is a subsequence of haystack. [UInt8](../data-types/int-uint.md).
-- 0, otherwise. [UInt8](../data-types/int-uint.md).
+- 1, if needle is a subsequence of haystack, 0 otherwise. [UInt8](../data-types/int-uint.md).
 
 **Examples**
 
@@ -1758,8 +1750,7 @@ hasToken(haystack, token)
 
 **Returned value**
 
-- 1, if the token is present in the haystack.
-- 0, if the token is not present.
+- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md).
 
 **Implementation details**
 
@@ -1794,9 +1785,7 @@ hasTokenOrNull(haystack, token)
 
 **Returned value**
 
-- 1, if the token is present in the haystack.
-- 0, if the token is not present in the haystack.
-- null, if the token is ill-formed.
+- 1, if the token is present in the haystack, 0 if it is not present, and null if the token is ill formed. 
 
 **Implementation details**
 
@@ -1833,8 +1822,7 @@ hasTokenCaseInsensitive(haystack, token)
 
 **Returned value**
 
-- 1, if the token is present in the haystack.
-- 0, otherwise.
+- 1, if the token is present in the haystack, 0 otherwise. [UInt8](../data-types/int-uint.md).
 
 **Implementation details**
 
@@ -1869,9 +1857,7 @@ hasTokenCaseInsensitiveOrNull(haystack, token)
 
 **Returned value**
 
-- 1, if the token is present in the haystack.
-- 0, if token is not present.
-- null, if the token is ill-formed.
+- 1, if the token is present in the haystack, 0 if the token is not present, otherwise [`null`](../data-types/nullable.md) if the token is ill-formed. [UInt8](../data-types/int-uint.md).
 
 **Implementation details**
 
diff --git a/docs/en/sql-reference/functions/time-series-functions.md b/docs/en/sql-reference/functions/time-series-functions.md
index da8ed1f51ba..ce5dea14ec5 100644
--- a/docs/en/sql-reference/functions/time-series-functions.md
+++ b/docs/en/sql-reference/functions/time-series-functions.md
@@ -79,8 +79,7 @@ seriesPeriodDetectFFT(series);
 
 **Returned value**
 
-- A real value equal to the period of series data. [Float64](../data-types/float.md).
-- Returns NAN when number of data points are less than four. [nan](../data-types/float.md/#nan-and-inf).
+- A real value equal to the period of series data. NaN when number of data points are less than four. [Float64](../data-types/float.md).
 
 **Examples**
 
diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md
index 130f0147ca1..47890e0b271 100644
--- a/docs/en/sql-reference/functions/url-functions.md
+++ b/docs/en/sql-reference/functions/url-functions.md
@@ -48,8 +48,7 @@ clickhouse.com
 
 **Returned values**
 
-- Host name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md).
-- Empty string. If ClickHouse can’t parse the input string as a URL. [String](../data-types/string.md).
+- Host name if ClickHouse can parse the input string as a URL, otherwise an empty string. [String](../data-types/string.md).
 
 **Example**
 
@@ -89,8 +88,7 @@ https://clickhouse.com/time/
 
 **Returned values**
 
-- Domain name. If ClickHouse can parse the input string as a URL. [String](../data-types/string.md).
-- Empty string. If ClickHouse cannot parse the input string as a URL. [String](../data-types/string.md).
+- Domain name if ClickHouse can parse the input string as a URL. Otherwise, an empty string. [String](../data-types/string.md).
 
 **Example**
 

From 67ff6883fd11422231d029cf5a128dd5b87dbdfa Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 06:51:38 +0200
Subject: [PATCH 320/392] Restore original formatting for logical functions
 and, or, not, xor

---
 .../functions/logical-functions.md            | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md
index 8448dd4ff12..7222dbeeb0d 100644
--- a/docs/en/sql-reference/functions/logical-functions.md
+++ b/docs/en/sql-reference/functions/logical-functions.md
@@ -30,9 +30,11 @@ Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-ope
 
 **Returned value**
 
-- `0`, if at least one argument evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
-- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
-- `1`, otherwise. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
+- `0`, if at least one argument evaluates to `false`,
+- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`,
+- `1`, otherwise.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -86,7 +88,7 @@ Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-opera
 - `0`, if all arguments evaluate to `false`,
 - `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
 
-Type: [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
+Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -134,9 +136,11 @@ Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-ne
 
 **Returned value**
 
-- `1`, if `val` evaluates to `false`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
-- `0`, if `val` evaluates to `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
-- `NULL`, if `val` is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
+- `1`, if `val` evaluates to `false`,
+- `0`, if `val` evaluates to `true`,
+- `NULL`, if `val` is `NULL`.
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 
@@ -168,9 +172,11 @@ xor(val1, val2...)
 
 **Returned value**
 
-- `1`, for two values: if one of the values evaluates to `false` and other does not. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
-- `0`, for two values: if both values evaluate to `false` or to both `true`. [UInt8](../data-types/int-uint.md) or [Nullable](../data-types/nullable.md)([UInt8](../data-types/int-uint.md)).
-- `NULL`, if at least one of the inputs is `NULL`. [NULL](../../sql-reference/syntax.md/#null).
+- `1`, for two values: if one of the values evaluates to `false` and other does not,
+- `0`, for two values: if both values evaluate to `false` or to both `true`,
+- `NULL`, if at least one of the inputs is `NULL`
+
+Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
 
 **Example**
 

From 3071909aca68d73b0e29660896f883ff759ef48e Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 07:00:47 +0200
Subject: [PATCH 321/392] Revert roundAge to original formatting

---
 .../sql-reference/functions/rounding-functions.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index ab344f664fd..c2998a82205 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -328,14 +328,15 @@ roundAge(num)
 
 **Returned value**
 
-- Returns `0`, for $age \lt 1$. [UInt8](../data-types/int-uint.md).
-- Returns `17`, for $1 \leq age \leq 17$. [UInt8](../data-types/int-uint.md).
-- Returns `18`, for $18 \leq age \leq 24$. [UInt8](../data-types/int-uint.md).
-- Returns `25`, for $25 \leq age \leq 34$. [UInt8](../data-types/int-uint.md).
-- Returns `35`, for $35 \leq age \leq 44$. [UInt8](../data-types/int-uint.md).
-- Returns `45`, for $45 \leq age \leq 54$. [UInt8](../data-types/int-uint.md).
-- Returns `55`, for $age \geq 55$. [UInt8](../data-types/int-uint.md).
+- Returns `0`, for $age \lt 1$.
+- Returns `17`, for $1 \leq age \leq 17$.
+- Returns `18`, for $18 \leq age \leq 24$.
+- Returns `25`, for $25 \leq age \leq 34$.
+- Returns `35`, for $35 \leq age \leq 44$.
+- Returns `45`, for $45 \leq age \leq 54$.
+- Returns `55`, for $age \geq 55$.
 
+Type: [UInt8](../data-types/int-uint.md).
 **Example**
 
 Query:

From b19c5ad13ac56d0e2cf6d0b5361ef7992b18e29b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 07:01:15 +0200
Subject: [PATCH 322/392] Revert roundAge to original formatting

---
 docs/en/sql-reference/functions/rounding-functions.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md
index c2998a82205..d18185c5013 100644
--- a/docs/en/sql-reference/functions/rounding-functions.md
+++ b/docs/en/sql-reference/functions/rounding-functions.md
@@ -337,6 +337,7 @@ roundAge(num)
 - Returns `55`, for $age \geq 55$.
 
 Type: [UInt8](../data-types/int-uint.md).
+
 **Example**
 
 Query:

From 8783647703ec60eb936824c0265a298a33e9ae43 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 07:03:15 +0200
Subject: [PATCH 323/392] Revert addressToLine to original formatting

---
 docs/en/sql-reference/functions/introspection.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md
index 5dc57e70591..bec97208843 100644
--- a/docs/en/sql-reference/functions/introspection.md
+++ b/docs/en/sql-reference/functions/introspection.md
@@ -40,10 +40,12 @@ addressToLine(address_of_binary_instruction)
 
 **Returned value**
 
-- Source code filename and the line number in this file delimited by colon. [String](../data-types/string.md).
-    - For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number.
-- Name of a binary, if the function couldn’t find the debug information. [String](../data-types/string.md).
-- Empty string, if the address is not valid. [String](../data-types/string.md).
+- Source code filename and the line number in this file delimited by colon.
+        For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number.
+- Name of a binary, if the function couldn’t find the debug information.
+- Empty string, if the address is not valid.
+
+Type: [String](../../sql-reference/data-types/string.md).
 
 **Example**
 

From c638de90c2d6e0a2aa48d2eadd763ad7aa47e3a7 Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Fri, 24 May 2024 10:01:06 +0200
Subject: [PATCH 324/392] Fix incorrectly placed :::note blocks

---
 .../sql-reference/functions/splitting-merging-functions.md | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index a3c28504a29..20d63d84628 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -27,13 +27,11 @@ splitByChar(separator, s[, max_substrings]))
 
 - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
-:::note
  Empty substrings may be selected when:
 
 - A separator occurs at the beginning or end of the string;
 - There are multiple consecutive separators;
 - The original string `s` is empty.
-:::
 
 :::note
 The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list.
@@ -80,13 +78,13 @@ splitByString(separator, s[, max_substrings]))
 
 - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
-:::note
 Empty substrings may be selected when:
 
 - A non-empty separator occurs at the beginning or end of the string;
 - There are multiple consecutive non-empty separators;
 - The original string `s` is empty while the separator is not empty.
 
+:::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 :::
 
@@ -137,13 +135,14 @@ splitByRegexp(regexp, s[, max_substrings]))
 
 - An array of selected substrings. [Array](../data-types/array.md)([String](../data-types/string.md)).
 
-:::note
+
 Empty substrings may be selected when:
 
 - A non-empty regular expression match occurs at the beginning or end of the string;
 - There are multiple consecutive non-empty regular expression matches;
 - The original string `s` is empty while the regular expression is not empty.
 
+:::note
 Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
 :::
 

From 480f911c7664c15cccf913b0b7cc3d66645c557c Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 May 2024 08:33:44 +0000
Subject: [PATCH 325/392] Fix spelling

---
 .../aspell-ignore/en/aspell-dict.txt          | 117 +++++++++---------
 1 file changed, 59 insertions(+), 58 deletions(-)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 1c601bc200a..6df2e426561 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -6,6 +6,7 @@ AMPLab
 AMQP
 ANNIndex
 ANNIndexes
+ANOVA
 AORM
 APIs
 ARMv
@@ -29,13 +30,6 @@ Alexey
 AnyEvent
 AppleClang
 Approximative
-arrayDotProduct
-arrayEnumerateDenseRanked
-arrayEnumerateUniqRanked
-arrayFirstOrNull
-arrayLastOrNull
-arrayPartialShuffle
-arrayShuffle
 ArrayJoin
 ArrowStream
 AsyncInsertCacheSize
@@ -53,8 +47,6 @@ AutoFDO
 AutoML
 Autocompletion
 AvroConfluent
-analysisOfVariance
-ANOVA
 BIGINT
 BIGSERIAL
 BORO
@@ -186,7 +178,6 @@ ComplexKeyCache
 ComplexKeyDirect
 ComplexKeyHashed
 Composable
-composable
 Config
 ConnectionDetails
 Const
@@ -396,8 +387,6 @@ InterserverThreads
 IsPentagon
 IsResClassIII
 IsValid
-isNotDistinctFrom
-isNullable
 JBOD
 JOINed
 JOINs
@@ -466,8 +455,6 @@ KittenHouse
 Klickhouse
 Kolmogorov
 Konstantin
-kostik
-kostikConsistentHash
 Korzeniewski
 Kubernetes
 LDAP
@@ -477,9 +464,8 @@ LLDB
 LLVM's
 LOCALTIME
 LOCALTIMESTAMP
-LOONGARCH
 LONGLONG
-LoongArch
+LOONGARCH
 Levenshtein
 Liao
 LibFuzzer
@@ -497,6 +483,7 @@ LocalThreadActive
 LogQL
 Logstash
 LookML
+LoongArch
 LowCardinality
 LpDistance
 LpNorm
@@ -571,17 +558,6 @@ MindsDB
 Mongodb
 Monotonicity
 MsgPack
-multiSearchAllPositionsCaseInsensitive
-multiSearchAllPositionsCaseInsensitiveUTF
-multiSearchAnyCaseInsensitive
-multiSearchAnyCaseInsensitiveUTF
-multiSearchAnyUTF
-multiSearchFirstIndexCaseInsensitive
-multiSearchFirstIndexCaseInsensitiveUTF
-multiSearchFirstIndexUTF
-multiSearchFirstPositionCaseInsensitive
-multiSearchFirstPositionCaseInsensitiveUTF
-multiSearchFirstPositionUTF
 MultiPolygon
 Multiline
 Multiqueries
@@ -683,8 +659,8 @@ OSUserTimeNormalized
 OTLP
 OUTFILE
 ObjectId
-Observability
 Oblakov
+Observability
 Octonica
 Ok
 OnTime
@@ -765,7 +741,6 @@ Promtail
 Protobuf
 ProtobufSingle
 ProxySQL
-proportionsZTest
 Punycode
 PyArrow
 PyCharm
@@ -886,7 +861,6 @@ Simhash
 SimpleAggregateFunction
 SimpleState
 SipHash
-sigmoid
 Smirnov's
 Smirnov'test
 Soundex
@@ -932,7 +906,6 @@ TAVG
 TCPConnection
 TCPThreads
 TDigest
-ThreadMonotonic
 TINYINT
 TLSv
 TMAX
@@ -958,7 +931,6 @@ TablesLoaderForegroundThreads
 TablesLoaderForegroundThreadsActive
 TablesToDropQueueSize
 TargetSpecific
-tanh
 Telegraf
 TemplateIgnoreSpaces
 TemporaryFilesForAggregation
@@ -968,6 +940,7 @@ TemporaryFilesUnknown
 Testflows
 Tgz
 Theil's
+ThreadMonotonic
 ThreadPoolFSReaderThreads
 ThreadPoolFSReaderThreadsActive
 ThreadPoolRemoteFSReaderThreads
@@ -1028,7 +1001,6 @@ UncompressedCacheBytes
 UncompressedCacheCells
 UnidirectionalEdgeIsValid
 UniqThetaSketch
-unshuffled
 Updatable
 Uppercased
 Uptime
@@ -1095,6 +1067,7 @@ activerecord
 addDate
 addDays
 addHours
+addInterval
 addMicroseconds
 addMilliseconds
 addMinutes
@@ -1102,10 +1075,9 @@ addMonths
 addNanoseconds
 addQuarters
 addSeconds
+addTupleOfIntervals
 addWeeks
 addYears
-addInterval
-addTupleOfIntervals
 addr
 addressToLine
 addressToLineWithInlines
@@ -1120,6 +1092,7 @@ aiochclient
 allocator
 alphaTokens
 amplab
+analysisOfVariance
 analytics
 anonymize
 anonymized
@@ -1147,15 +1120,19 @@ arrayCumSum
 arrayCumSumNonNegative
 arrayDifference
 arrayDistinct
+arrayDotProduct
 arrayElement
 arrayEnumerate
 arrayEnumerateDense
+arrayEnumerateDenseRanked
 arrayEnumerateUniq
+arrayEnumerateUniqRanked
 arrayExists
 arrayFill
 arrayFilter
 arrayFirst
 arrayFirstIndex
+arrayFirstOrNull
 arrayFlatten
 arrayFold
 arrayIntersect
@@ -1163,10 +1140,12 @@ arrayJaccardIndex
 arrayJoin
 arrayLast
 arrayLastIndex
+arrayLastOrNull
 arrayMap
 arrayMax
 arrayMin
 arrayPartialReverseSort
+arrayPartialShuffle
 arrayPartialSort
 arrayPopBack
 arrayPopFront
@@ -1186,6 +1165,7 @@ arrayRotateRight
 arrayShiftLeft
 arrayShiftRight
 arrayShingles
+arrayShuffle
 arraySlice
 arraySort
 arraySplit
@@ -1367,6 +1347,7 @@ collapsingmergetree
 combinator
 combinators
 comparising
+composable
 compressability
 concat
 concatAssumeInjective
@@ -1728,8 +1709,8 @@ hasSubsequenceCaseInsensitive
 hasSubsequenceCaseInsensitiveUTF
 hasSubsequenceUTF
 hasSubstr
-hasToken
 hasThreadFuzzer
+hasToken
 hasTokenCaseInsensitive
 hasTokenCaseInsensitiveOrNull
 hasTokenOrNull
@@ -1802,8 +1783,10 @@ isIPAddressInRange
 isIPv
 isInfinite
 isNaN
+isNotDistinctFrom
 isNotNull
 isNull
+isNullable
 isValidJSON
 isValidUTF
 isZeroOrNull
@@ -1855,6 +1838,8 @@ kolmogorovSmirnovTest
 kolmogorovsmirnovtest
 kolya
 konsole
+kostik
+kostikConsistentHash
 kurtPop
 kurtSamp
 kurtosis
@@ -1866,9 +1851,9 @@ laravel
 largestTriangleThreeBuckets
 latencies
 ldap
-leftUTF
 leftPad
 leftPadUTF
+leftUTF
 lemmatization
 lemmatize
 lemmatized
@@ -1915,8 +1900,8 @@ logTrace
 logagent
 loghouse
 london
-loongarch
 lookups
+loongarch
 lowcardinality
 lowerUTF
 lowercased
@@ -1987,8 +1972,8 @@ mispredictions
 mmap
 mmapped
 modularization
-moduloOrZero
 moduli
+moduloOrZero
 mongodb
 monotonicity
 monthName
@@ -2005,10 +1990,21 @@ multiMatchAllIndices
 multiMatchAny
 multiMatchAnyIndex
 multiSearchAllPositions
+multiSearchAllPositionsCaseInsensitive
+multiSearchAllPositionsCaseInsensitiveUTF
 multiSearchAllPositionsUTF
 multiSearchAny
+multiSearchAnyCaseInsensitive
+multiSearchAnyCaseInsensitiveUTF
+multiSearchAnyUTF
 multiSearchFirstIndex
+multiSearchFirstIndexCaseInsensitive
+multiSearchFirstIndexCaseInsensitiveUTF
+multiSearchFirstIndexUTF
 multiSearchFirstPosition
+multiSearchFirstPositionCaseInsensitive
+multiSearchFirstPositionCaseInsensitiveUTF
+multiSearchFirstPositionUTF
 multibyte
 multidirectory
 multiline
@@ -2094,6 +2090,7 @@ ok
 omclickhouse
 onstraints
 ontime
+onwards
 openSSL
 openSUSE
 openldap
@@ -2205,6 +2202,7 @@ procfs
 profiler
 proleptic
 prometheus
+proportionsZTest
 proto
 protobuf
 protobufsingle
@@ -2343,8 +2341,8 @@ retentions
 rethrow
 retransmit
 retriable
-rewritable
 reverseUTF
+rewritable
 rightPad
 rightPadUTF
 rightUTF
@@ -2404,8 +2402,9 @@ sharded
 sharding
 shortcircuit
 shortkeys
-showCertificate
 shoutout
+showCertificate
+sigmoid
 simdjson
 simpleJSON
 simpleJSONExtractBool
@@ -2419,8 +2418,8 @@ simpleLinearRegression
 simpleaggregatefunction
 simplelinearregression
 simpod
-singlepart
 singleValueOrNull
+singlepart
 singlevalueornull
 sinh
 sipHash
@@ -2465,13 +2464,13 @@ statbox
 stateful
 stddev
 stddevPop
-stddevSamp
-stddevpop
-stddevsamp
-stddevpopstable
 stddevPopStable
-stddevsampstable
+stddevSamp
 stddevSampStable
+stddevpop
+stddevpopstable
+stddevsamp
+stddevsampstable
 stderr
 stdin
 stdout
@@ -2532,6 +2531,7 @@ substrings
 subtitiles
 subtractDays
 subtractHours
+subtractInterval
 subtractMicroseconds
 subtractMilliseconds
 subtractMinutes
@@ -2539,10 +2539,9 @@ subtractMonths
 subtractNanoseconds
 subtractQuarters
 subtractSeconds
+subtractTupleOfIntervals
 subtractWeeks
 subtractYears
-subtractInterval
-subtractTupleOfIntervals
 subtree
 subtrees
 subtype
@@ -2551,13 +2550,13 @@ sumCount
 sumKahan
 sumMap
 sumMapFiltered
+sumMapFilteredWithOverflow
+sumMapWithOverflow
 sumWithOverflow
 sumcount
 sumkahan
 summap
 summapwithoverflow
-sumMapWithOverflow
-sumMapFilteredWithOverflow
 summingmergetree
 sumwithoverflow
 superaggregates
@@ -2580,6 +2579,7 @@ tabseparatedrawwithnames
 tabseparatedrawwithnamesandtypes
 tabseparatedwithnames
 tabseparatedwithnamesandtypes
+tanh
 tcp
 tcpPort
 tcpnodelay
@@ -2714,18 +2714,18 @@ tupleDivide
 tupleDivideByNumber
 tupleElement
 tupleHammingDistance
+tupleIntDiv
+tupleIntDivByNumber
+tupleIntDivOrZero
+tupleIntDivOrZeroByNumber
 tupleMinus
+tupleModulo
+tupleModuloByNumber
 tupleMultiply
 tupleMultiplyByNumber
 tupleNegate
 tuplePlus
 tupleToNameValuePairs
-tupleIntDiv
-tupleIntDivByNumber
-tupleIntDivOrZero
-tupleIntDivOrZeroByNumber
-tupleModulo
-tupleModuloByNumber
 turbostat
 txt
 typename
@@ -2769,6 +2769,7 @@ unrealiable
 unreplicated
 unresolvable
 unrounded
+unshuffled
 untracked
 untrusted
 untuple
@@ -2779,8 +2780,8 @@ uptime
 uptrace
 uring
 url
-urlencoded
 urlCluster
+urlencoded
 urls
 usearch
 userspace

From 3e21ff92a38ece0b0ebcf72554e45d33ce612771 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 10:53:19 +0200
Subject: [PATCH 326/392] CI: master workflow with folded jobs

---
 .github/workflows/master.yml       | 825 ++---------------------------
 .github/workflows/merge_queue.yml  |   6 +-
 .github/workflows/pull_request.yml |  22 +-
 3 files changed, 62 insertions(+), 791 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index d2ea714e4e4..11ec484d208 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -53,13 +53,13 @@ jobs:
       - name: Re-create GH statuses for skipped jobs if any
         run: |
             python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --infile ${{ runner.temp }}/ci_run_data.json --update-gh-statuses
-  BuildDockers:
-    needs: [RunConfig]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_docker.yml
-    with:
-      data: ${{ needs.RunConfig.outputs.data }}
-  # Tested in MQ
+# Runs in MQ:
+#  BuildDockers:
+#    needs: [RunConfig]
+#    if: ${{ !failure() && !cancelled() }}
+#    uses: ./.github/workflows/reusable_docker.yml
+#    with:
+#      data: ${{ needs.RunConfig.outputs.data }}
   # StyleCheck:
   #   needs: [RunConfig, BuildDockers]
   #   if: ${{ !failure() && !cancelled() }}
@@ -70,262 +70,73 @@ jobs:
   #     data: ${{ needs.RunConfig.outputs.data }}
   #     run_command: |
   #         python3 style_check.py --no-push
-  CompatibilityCheckX86:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
+
+  ################################# Main stages #################################
+  # for main CI chain
+  #
+  Builds_1:
+    needs: [RunConfig]
+    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
+    # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab
+    uses: ./.github/workflows/reusable_build_stage.yml
     with:
-      test_name: Compatibility check (amd64)
-      runner_type: style-checker
+      stage: Builds_1
       data: ${{ needs.RunConfig.outputs.data }}
-  CompatibilityCheckAarch64:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
+  Tests_1:
+    needs: [RunConfig, Builds_1]
+    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }}
+    uses: ./.github/workflows/reusable_test_stage.yml
     with:
-      test_name: Compatibility check (aarch64)
-      runner_type: style-checker
+      stage: Tests_1
       data: ${{ needs.RunConfig.outputs.data }}
-#########################################################################################
-#################################### ORDINARY BUILDS ####################################
-#########################################################################################
-# TODO: never skip builds!
-  BuilderDebRelease:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
+  Builds_2:
+    needs: [RunConfig, Builds_1]
+    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }}
+    uses: ./.github/workflows/reusable_build_stage.yml
     with:
-      build_name: package_release
-      checkout_depth: 0
+      stage: Builds_2
       data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebReleaseCoverage:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
+  Tests_2:
+    needs: [RunConfig, Builds_2]
+    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
+    uses: ./.github/workflows/reusable_test_stage.yml
     with:
-      build_name: package_release_coverage
-      checkout_depth: 0
+      stage: Tests_2
       data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebAarch64:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
+  # stage for jobs that do not prohibit merge
+  Tests_3:
+    needs: [RunConfig, Builds_1]
+    if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }}
+    uses: ./.github/workflows/reusable_test_stage.yml
     with:
-      build_name: package_aarch64
-      checkout_depth: 0
+      stage: Tests_3
       data: ${{ needs.RunConfig.outputs.data }}
-  BuilderBinRelease:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_release
-      checkout_depth: 0 # otherwise we will have no info about contributors
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebAsan:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: package_asan
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebUBsan:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: package_ubsan
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebTsan:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: package_tsan
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebMsan:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: package_msan
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderDebDebug:
-    needs: [RunConfig, BuildDockers]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: package_debug
-      data: ${{ needs.RunConfig.outputs.data }}
-##########################################################################################
-##################################### SPECIAL BUILDS #####################################
-##########################################################################################
-  BuilderBinClangTidy:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_tidy
-      data: ${{ needs.RunConfig.outputs.data }}
-  BuilderBinDarwin:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_darwin
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinAarch64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinFreeBSD:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_freebsd
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinDarwinAarch64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_darwin_aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinPPC64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_ppc64le
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinAmd64Compat:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_amd64_compat
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinAmd64Musl:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_amd64_musl
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinAarch64V80Compat:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_aarch64_v80compat
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinRISCV64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_riscv64
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinS390X:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_s390x
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-  BuilderBinLoongarch64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_build.yml
-    with:
-      build_name: binary_loongarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-      checkout_depth: 0
-############################################################################################
-##################################### Docker images  #######################################
-############################################################################################
-  DockerServerImage:
-    needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Docker server image
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-  DockerKeeperImage:
-    needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Docker keeper image
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-############################################################################################
-##################################### BUILD REPORTER #######################################
-############################################################################################
-  BuilderReport:
+
+  ################################# Reports #################################
+  # Reports should be run even if Builds_1/2 failed - put them separately in wf (not in Tests_1/2)
+  Builds_1_Report:
     # run report check for failed builds to indicate the CI error
-    if: ${{ !cancelled() }}
-    needs:
-      - RunConfig
-      - BuilderDebAarch64
-      - BuilderDebAsan
-      - BuilderDebDebug
-      - BuilderDebMsan
-      - BuilderDebRelease
-      - BuilderDebTsan
-      - BuilderDebUBsan
+    if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse build check') }}
+    needs: [RunConfig, Builds_1]
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickHouse build check
       runner_type: style-checker-aarch64
       data: ${{ needs.RunConfig.outputs.data }}
-  BuilderSpecialReport:
+  Builds_2_Report:
     # run report check for failed builds to indicate the CI error
-    if: ${{ !cancelled() }}
-    needs:
-      - RunConfig
-      - BuilderBinAarch64
-      - BuilderBinDarwin
-      - BuilderBinDarwinAarch64
-      - BuilderBinFreeBSD
-      - BuilderBinPPC64
-      - BuilderBinRISCV64
-      - BuilderBinS390X
-      - BuilderBinLoongarch64
-      - BuilderBinAmd64Compat
-      - BuilderBinAarch64V80Compat
-      - BuilderBinClangTidy
-      - BuilderBinAmd64Musl
-      - BuilderDebReleaseCoverage
-      - BuilderBinRelease
+    if: ${{ !cancelled() && needs.RunConfig.result == 'success' && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse special build check') }}
+    needs: [RunConfig, Builds_2]
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickHouse special build check
       runner_type: style-checker-aarch64
       data: ${{ needs.RunConfig.outputs.data }}
+
   MarkReleaseReady:
     if: ${{ !failure() && !cancelled() }}
-    needs:
-      - BuilderBinDarwin
-      - BuilderBinDarwinAarch64
-      - BuilderDebRelease
-      - BuilderDebAarch64
-    runs-on: [self-hosted, style-checker]
+    needs: [RunConfig, Builds_1]
+    runs-on: [self-hosted, style-checker-aarch64]
     steps:
       - name: Debug
         run: |
@@ -338,7 +149,7 @@ jobs:
           no both ${{ !(contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
           EOF
       - name: Not ready
-        # fail the job to be able restart it
+        # fail the job to be able to restart it
         if: ${{ contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure') }}
         run: exit 1
       - name: Check out repository code
@@ -349,544 +160,14 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 mark_release_ready.py
-############################################################################################
-#################################### INSTALL PACKAGES ######################################
-############################################################################################
-  InstallPackagesTestRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Install packages (amd64)
-      runner_type: style-checker
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-        python3 install_check.py "$CHECK_NAME"
-  InstallPackagesTestAarch64:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Install packages (arm64)
-      runner_type: style-checker-aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-        python3 install_check.py "$CHECK_NAME"
-##############################################################################################
-########################### FUNCTIONAl STATELESS TESTS #######################################
-##############################################################################################
-  FunctionalStatelessTestRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (release)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestReleaseAnalyzerS3Replicated:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (release, old analyzer, s3, DatabaseReplicated)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestS3Debug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (debug, s3 storage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestS3Tsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (tsan, s3 storage)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestAarch64:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (aarch64)
-      runner_type: func-tester-aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (asan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
 
-  FunctionalStatelessTestTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (tsan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestMsan:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (msan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestUBsan:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (ubsan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestDebug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (debug)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatelessTestAsanAzure:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateless tests (azure, asan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-############################ FUNCTIONAl STATEFUL TESTS #######################################
-##############################################################################################
-  FunctionalStatefulTestRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (release)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestAarch64:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (aarch64)
-      runner_type: func-tester-aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (asan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (tsan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestMsan:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (msan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestUBsan:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (ubsan)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestDebug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (debug)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  # Parallel replicas
-  FunctionalStatefulTestDebugParallelReplicas:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (debug, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestUBsanParallelReplicas:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (ubsan, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestMsanParallelReplicas:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (msan, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestTsanParallelReplicas:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (tsan, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestAsanParallelReplicas:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (asan, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  FunctionalStatefulTestReleaseParallelReplicas:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stateful tests (release, ParallelReplicas)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-########################### ClickBench #######################################################
-##############################################################################################
-  ClickBenchAMD64:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: ClickBench (amd64)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-        python3 clickbench.py "$CHECK_NAME"
-  ClickBenchAarch64:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: ClickBench (aarch64)
-      runner_type: func-tester-aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-      run_command: |
-        python3 clickbench.py "$CHECK_NAME"
-##############################################################################################
-######################################### STRESS TESTS #######################################
-##############################################################################################
-  StressTestAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (asan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  StressTestTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (tsan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  StressTestTsanAzure:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (azure, tsan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  StressTestMsan:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (msan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  StressTestUBsan:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (ubsan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  StressTestDebug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Stress test (debug)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-#############################################################################################
-############################# INTEGRATION TESTS #############################################
-#############################################################################################
-  IntegrationTestsAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Integration tests (asan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  IntegrationTestsAnalyzerAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Integration tests (asan, old analyzer)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  IntegrationTestsTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Integration tests (tsan)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  IntegrationTestsRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Integration tests (release)
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-##################################### AST FUZZERS ############################################
-##############################################################################################
-  ASTFuzzerTestAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: AST fuzzer (asan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  ASTFuzzerTestTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: AST fuzzer (tsan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  ASTFuzzerTestUBSan:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: AST fuzzer (ubsan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  ASTFuzzerTestMSan:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: AST fuzzer (msan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  ASTFuzzerTestDebug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: AST fuzzer (debug)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-#############################################################################################
-#################################### UNIT TESTS #############################################
-#############################################################################################
-  UnitTestsAsan:
-    needs: [RunConfig, BuilderDebAsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Unit tests (asan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  UnitTestsReleaseClang:
-    needs: [RunConfig, BuilderBinRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Unit tests (release)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  UnitTestsTsan:
-    needs: [RunConfig, BuilderDebTsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Unit tests (tsan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  UnitTestsMsan:
-    needs: [RunConfig, BuilderDebMsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Unit tests (msan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  UnitTestsUBsan:
-    needs: [RunConfig, BuilderDebUBsan]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Unit tests (ubsan)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-#############################################################################################
-#################################### PERFORMANCE TESTS ######################################
-#############################################################################################
-  PerformanceComparisonX86:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Performance Comparison
-      runner_type: stress-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  PerformanceComparisonAarch:
-    needs: [RunConfig, BuilderDebAarch64]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Performance Comparison Aarch64
-      runner_type: func-tester-aarch64
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-############################ SQLLOGIC TEST ###################################################
-##############################################################################################
-  SQLLogicTestRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: Sqllogic test (release)
-      runner_type: func-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-##################################### SQL TEST ###############################################
-##############################################################################################
-  SQLTest:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: SQLTest
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-##############################################################################################
-###################################### SQLANCER FUZZERS ######################################
-##############################################################################################
-  SQLancerTestRelease:
-    needs: [RunConfig, BuilderDebRelease]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: SQLancer (release)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
-  SQLancerTestDebug:
-    needs: [RunConfig, BuilderDebDebug]
-    if: ${{ !failure() && !cancelled() }}
-    uses: ./.github/workflows/reusable_test.yml
-    with:
-      test_name: SQLancer (debug)
-      runner_type: fuzzer-unit-tester
-      data: ${{ needs.RunConfig.outputs.data }}
   FinishCheck:
     if: ${{ !failure() && !cancelled() }}
-    needs:
-      - MarkReleaseReady
-      - FunctionalStatelessTestDebug
-      - FunctionalStatelessTestRelease
-      - FunctionalStatelessTestReleaseAnalyzerS3Replicated
-      - FunctionalStatelessTestAarch64
-      - FunctionalStatelessTestAsan
-      - FunctionalStatelessTestTsan
-      - FunctionalStatelessTestMsan
-      - FunctionalStatelessTestUBsan
-      - FunctionalStatelessTestS3Debug
-      - FunctionalStatelessTestS3Tsan
-      - FunctionalStatefulTestDebug
-      - FunctionalStatefulTestRelease
-      - FunctionalStatefulTestAarch64
-      - FunctionalStatefulTestAsan
-      - FunctionalStatefulTestTsan
-      - FunctionalStatefulTestMsan
-      - FunctionalStatefulTestUBsan
-      - FunctionalStatefulTestDebugParallelReplicas
-      - FunctionalStatefulTestUBsanParallelReplicas
-      - FunctionalStatefulTestMsanParallelReplicas
-      - FunctionalStatefulTestTsanParallelReplicas
-      - FunctionalStatefulTestAsanParallelReplicas
-      - FunctionalStatefulTestReleaseParallelReplicas
-      - StressTestDebug
-      - StressTestAsan
-      - StressTestTsan
-      - StressTestMsan
-      - StressTestUBsan
-      - IntegrationTestsAsan
-      - IntegrationTestsAnalyzerAsan
-      - IntegrationTestsTsan
-      - IntegrationTestsRelease
-      - PerformanceComparisonX86
-      - PerformanceComparisonAarch
-      - CompatibilityCheckX86
-      - CompatibilityCheckAarch64
-      - ASTFuzzerTestDebug
-      - ASTFuzzerTestAsan
-      - ASTFuzzerTestTsan
-      - ASTFuzzerTestMSan
-      - ASTFuzzerTestUBSan
-      - UnitTestsAsan
-      - UnitTestsTsan
-      - UnitTestsMsan
-      - UnitTestsUBsan
-      - UnitTestsReleaseClang
-      - SQLancerTestRelease
-      - SQLancerTestDebug
-      - SQLLogicTestRelease
-      - SQLTest
-    runs-on: [self-hosted, style-checker]
+    needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
+    runs-on: [self-hosted, style-checker-aarch64]
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
       - name: Finish label
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml
index 97aa0db4cdb..d1b03198485 100644
--- a/.github/workflows/merge_queue.yml
+++ b/.github/workflows/merge_queue.yml
@@ -20,7 +20,7 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
-          fetch-depth: 0 # to get version
+          fetch-depth: 0 # to get a version
           filter: tree:0
       - name: Cancel PR workflow
         run: |
@@ -60,7 +60,7 @@ jobs:
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Style check
-      runner_type: style-checker
+      runner_type: style-checker-aarch64
       run_command: |
           python3 style_check.py
       data: ${{ needs.RunConfig.outputs.data }}
@@ -85,7 +85,7 @@ jobs:
   FinishCheck:
     if: ${{ !failure() && !cancelled() }}
     needs: [RunConfig, BuildDockers, StyleCheck, FastTest]
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index 48b4a558580..aa570c3ce2f 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -31,7 +31,7 @@ jobs:
         uses: ClickHouse/checkout@v1
         with:
           clear-repository: true # to ensure correct digests
-          fetch-depth: 0 # to get version
+          fetch-depth: 0 # to get a version
           filter: tree:0
       - name: Cancel Sync PR workflow
         run: |
@@ -78,7 +78,7 @@ jobs:
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: Style check
-      runner_type: style-checker
+      runner_type: style-checker-aarch64
       run_command: |
           python3 style_check.py
       data: ${{ needs.RunConfig.outputs.data }}
@@ -98,13 +98,13 @@ jobs:
       run_command: |
           python3 fast_test_check.py
 
-  ################################# Main statges #################################
+  ################################# Main stages #################################
   # for main CI chain
   #
   Builds_1:
     needs: [RunConfig, StyleCheck, FastTest]
     if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_1') }}
-    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
+    # using callable wf (reusable_stage.yml) allows grouping all nested jobs under a tab
     uses: ./.github/workflows/reusable_build_stage.yml
     with:
       stage: Builds_1
@@ -112,7 +112,6 @@ jobs:
   Tests_1:
     needs: [RunConfig, Builds_1]
     if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_1') }}
-    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
     uses: ./.github/workflows/reusable_test_stage.yml
     with:
       stage: Tests_1
@@ -120,7 +119,6 @@ jobs:
   Builds_2:
     needs: [RunConfig, Builds_1]
     if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Builds_2') }}
-    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
     uses: ./.github/workflows/reusable_build_stage.yml
     with:
       stage: Builds_2
@@ -128,7 +126,6 @@ jobs:
   Tests_2:
     needs: [RunConfig, Builds_2]
     if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }}
-    # using callable wf (reusable_stage.yml) allows to group all nested jobs under a tab
     uses: ./.github/workflows/reusable_test_stage.yml
     with:
       stage: Tests_2
@@ -182,7 +179,7 @@ jobs:
   FinishCheck:
     if: ${{ !failure() && !cancelled() }}
     needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
-    runs-on: [self-hosted, style-checker]
+    runs-on: [self-hosted, style-checker-aarch64]
     steps:
       - name: Check out repository code
         uses: ClickHouse/checkout@v1
@@ -192,13 +189,6 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 finish_check.py
-      # FIXME: merge on approval does not work with MQ. Could be fixed by using defaul GH's automerge after some corrections in Mergeable Check status
-      # - name: Auto merge if approved
-      #   if: ${{ github.event_name != 'merge_group' }}
-      #   run: |
-      #     cd "$GITHUB_WORKSPACE/tests/ci"
-      #     python3 merge_pr.py --check-approved
-
 
 #############################################################################################
 ###################################### JEPSEN TESTS #########################################
@@ -216,5 +206,5 @@ jobs:
     uses: ./.github/workflows/reusable_test.yml
     with:
       test_name: ClickHouse Keeper Jepsen
-      runner_type: style-checker
+      runner_type: style-checker-aarch64
       data: ${{ needs.RunConfig.outputs.data }}

From d4fb2d50e95762838b46356a79e7ba8ecd3e4c5e Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 11:36:28 +0200
Subject: [PATCH 327/392] CI: Sync, Merge check, CI gh's statuses fixes

---
 .github/workflows/master.yml       | 21 +++++++++++----------
 .github/workflows/pull_request.yml |  7 +++++--
 tests/ci/ci.py                     | 29 +++++++++++++++++++++++++++--
 tests/ci/commit_status_helper.py   | 13 +++++++------
 tests/ci/finish_check.py           |  2 +-
 tests/ci/merge_pr.py               |  1 -
 6 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 11ec484d208..7c55098bdfd 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -27,15 +27,16 @@ jobs:
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
           python3 sync_pr.py --merge || :
-      - name: Python unit tests
-        run: |
-          cd "$GITHUB_WORKSPACE/tests/ci"
-          echo "Testing the main ci directory"
-          python3 -m unittest discover -s . -p 'test_*.py'
-          for dir in *_lambda/; do
-            echo "Testing $dir"
-            python3 -m unittest discover -s "$dir" -p 'test_*.py'
-          done
+# Runs in MQ:
+#      - name: Python unit tests
+#        run: |
+#          cd "$GITHUB_WORKSPACE/tests/ci"
+#          echo "Testing the main ci directory"
+#          python3 -m unittest discover -s . -p 'test_*.py'
+#          for dir in *_lambda/; do
+#            echo "Testing $dir"
+#            python3 -m unittest discover -s "$dir" -p 'test_*.py'
+#          done
       - name: PrepareRunConfig
         id: runconfig
         run: |
@@ -162,7 +163,7 @@ jobs:
           python3 mark_release_ready.py
 
   FinishCheck:
-    if: ${{ !failure() && !cancelled() }}
+    if: ${{ !cancelled() }}
     needs: [RunConfig, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
     runs-on: [self-hosted, style-checker-aarch64]
     steps:
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index aa570c3ce2f..7d22554473e 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -33,9 +33,12 @@ jobs:
           clear-repository: true # to ensure correct digests
           fetch-depth: 0 # to get a version
           filter: tree:0
-      - name: Cancel Sync PR workflow
+      - name: Cancel previous Sync PR workflow
         run: |
           python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --cancel-previous-run
+      - name: Set pending Sync status
+        run: |
+          python3 "$GITHUB_WORKSPACE/tests/ci/ci.py" --set-pending-status
       - name: Labels check
         run: |
           cd "$GITHUB_WORKSPACE/tests/ci"
@@ -177,7 +180,7 @@ jobs:
   ################################# Stage Final #################################
   #
   FinishCheck:
-    if: ${{ !failure() && !cancelled() }}
+    if: ${{ !cancelled() }}
     needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_1_Report, Builds_2_Report, Tests_1, Tests_2, Tests_3]
     runs-on: [self-hosted, style-checker-aarch64]
     steps:
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index 4afd3f46f9d..fc25bee354d 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -17,7 +17,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
 import docker_images_helper
 import upload_result_helper
 from build_check import get_release_or_pr
-from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames
+from ci_config import CI_CONFIG, Build, CILabels, CIStages, JobNames, StatusNames
 from ci_utils import GHActions, is_hex, normalize_string
 from clickhouse_helper import (
     CiLogsCredentials,
@@ -52,7 +52,7 @@ from git_helper import GIT_PREFIX, Git
 from git_helper import Runner as GitRunner
 from github_helper import GitHub
 from pr_info import PRInfo
-from report import ERROR, SUCCESS, BuildResult, JobReport
+from report import ERROR, SUCCESS, BuildResult, JobReport, PENDING
 from s3_helper import S3Helper
 from ci_metadata import CiMetadata
 from version_helper import get_version_from_repo
@@ -996,6 +996,11 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
         action="store_true",
         help="Action that cancels previous running PR workflow if PR added into the Merge Queue",
     )
+    parser.add_argument(
+        "--set-pending-status",
+        action="store_true",
+        help="Action to set needed pending statuses in the beginning of CI workflow, e.g. for Sync wf",
+    )
     parser.add_argument(
         "--configure",
         action="store_true",
@@ -1930,6 +1935,19 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No
             )
 
 
+def _set_pending_statuses(pr_info: PRInfo) -> None:
+    commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha)
+    try:
+        commit.create_status(
+            state=PENDING,
+            target_url="",
+            description="",
+            context=StatusNames.SYNC,
+        )
+    except Exception as ex:
+        print(f"ERROR: failed to set GH commit status, ex: {ex}")
+
+
 def main() -> int:
     logging.basicConfig(level=logging.INFO)
     exit_code = 0
@@ -2265,6 +2283,13 @@ def main() -> int:
         else:
             assert False, "BUG! Not supported scenario"
 
+    ### SET PENDING STATUS
+    elif args.cancel_previous_run:
+        if pr_info.is_pr:
+            _set_pending_statuses(pr_info)
+        else:
+            assert False, "BUG! Not supported scenario"
+
     ### print results
     _print_results(result, args.outfile, args.pretty)
 
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index e1c47353743..22cc0085781 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -433,11 +433,8 @@ def set_mergeable_check(
     commit: Commit,
     description: str = "",
     state: StatusType = SUCCESS,
-    hide_url: bool = False,
 ) -> CommitStatus:
-    report_url = GITHUB_RUN_URL
-    if hide_url:
-        report_url = ""
+    report_url = ""
     return post_commit_status(
         commit,
         state,
@@ -469,7 +466,6 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) ->
 def trigger_mergeable_check(
     commit: Commit,
     statuses: CommitStatuses,
-    hide_url: bool = False,
     set_if_green: bool = False,
     workflow_failed: bool = False,
 ) -> StatusType:
@@ -484,9 +480,12 @@ def trigger_mergeable_check(
 
     success = []
     fail = []
+    pending = []
     for status in required_checks:
         if status.state == SUCCESS:
             success.append(status.context)
+        elif status.state == PENDING:
+            pending.append(status.context)
         else:
             fail.append(status.context)
 
@@ -503,6 +502,8 @@ def trigger_mergeable_check(
     elif workflow_failed:
         description = "check workflow failures"
         state = FAILURE
+    elif pending:
+        description = "pending: " + ", ".join(pending)
     description = format_description(description)
 
     if not set_if_green and state == SUCCESS:
@@ -510,7 +511,7 @@ def trigger_mergeable_check(
         pass
     else:
         if mergeable_status is None or mergeable_status.description != description:
-            set_mergeable_check(commit, description, state, hide_url)
+            set_mergeable_check(commit, description, state)
 
     return state
 
diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py
index 1a7000f5353..130973ee8ff 100644
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@@ -67,7 +67,7 @@ def main():
         if status.state == PENDING:
             post_commit_status(
                 commit,
-                SUCCESS,
+                state, # map Mergeable Check status to CI Running
                 status.target_url,
                 "All checks finished",
                 StatusNames.CI,
diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py
index 500de4eb718..e1c7bf94ff5 100644
--- a/tests/ci/merge_pr.py
+++ b/tests/ci/merge_pr.py
@@ -250,7 +250,6 @@ def main():
         trigger_mergeable_check(
             commit,
             statuses,
-            hide_url=False,
             set_if_green=True,
             workflow_failed=(args.wf_status != "success"),
         )

From 22b441ed40034280d80506150f9f4969966a3f87 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 11:46:50 +0200
Subject: [PATCH 328/392] fix PR template

---
 .github/PULL_REQUEST_TEMPLATE.md | 64 +++++++++++++++-----------------
 tests/ci/ci.py                   |  3 +-
 2 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 64dc9049bc2..663b464d002 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -46,42 +46,36 @@ At a minimum, the following information should be added (but add more as needed)
 
 **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
 **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
-
-#### Run these jobs only (required builds will be added automatically):
-- [ ] <!---ci_include_integration--> Integration Tests
-- [ ] <!---ci_include_stateless--> Stateless tests
-- [ ] <!---ci_include_stateful--> Stateful tests
-- [ ] <!---ci_include_unit--> Unit tests
-- [ ] <!---ci_include_performance--> Performance tests
-- [ ] <!---ci_include_aarch64--> All with aarch64
-- [ ] <!---ci_include_asan--> All with ASAN
-- [ ] <!---ci_include_tsan--> All with TSAN
-- [ ] <!---ci_include_analyzer--> All with Analyzer
-- [ ] <!---ci_include_azure --> All with Azure
-- [ ] <!---ci_include_KEYWORD--> Add your option here
-
-#### Deny these jobs:
-- [ ] <!---ci_exclude_fast--> Fast test
-- [ ] <!---ci_exclude_integration--> Integration Tests
-- [ ] <!---ci_exclude_stateless--> Stateless tests
-- [ ] <!---ci_exclude_stateful--> Stateful tests
-- [ ] <!---ci_exclude_performance--> Performance tests
-- [ ] <!---ci_exclude_asan--> All with ASAN
-- [ ] <!---ci_exclude_tsan--> All with TSAN
-- [ ] <!---ci_exclude_msan--> All with MSAN
-- [ ] <!---ci_exclude_ubsan--> All with UBSAN
-- [ ] <!---ci_exclude_coverage--> All with Coverage
-- [ ] <!---ci_exclude_aarch64--> All with Aarch64
-
-#### Extra options:
+---
+- [ ] <!---ci_include_integration--> Allow: Integration Tests
+- [ ] <!---ci_include_stateless--> Allow:: Stateless tests
+- [ ] <!---ci_include_stateful--> Allow: Stateful tests
+- [ ] <!---ci_include_unit--> Allow: Unit tests
+- [ ] <!---ci_include_performance--> Allow: Performance tests
+- [ ] <!---ci_include_aarch64--> Allow: All with aarch64
+- [ ] <!---ci_include_asan--> Allow: All with ASAN
+- [ ] <!---ci_include_tsan--> Allow: All with TSAN
+- [ ] <!---ci_include_analyzer--> Allow: All with Analyzer
+- [ ] <!---ci_include_azure --> Allow: All with Azure
+- [ ] <!---ci_include_KEYWORD--> Allow: Add your option here
+---
+- [ ] <!---ci_exclude_fast--> Exclude: Fast test
+- [ ] <!---ci_exclude_integration--> Exclude: Integration Tests
+- [ ] <!---ci_exclude_stateless--> Exclude: Stateless tests
+- [ ] <!---ci_exclude_stateful--> Exclude: Stateful tests
+- [ ] <!---ci_exclude_performance--> Exclude: Performance tests
+- [ ] <!---ci_exclude_asan--> Exclude: All with ASAN
+- [ ] <!---ci_exclude_tsan--> Exclude: All with TSAN
+- [ ] <!---ci_exclude_msan--> Exclude: All with MSAN
+- [ ] <!---ci_exclude_ubsan--> Exclude: All with UBSAN
+- [ ] <!---ci_exclude_coverage--> Exclude: All with Coverage
+- [ ] <!---ci_exclude_aarch64--> Exclude: All with Aarch64
+---
 - [ ] <!---do_not_test--> do not test (only style check)
 - [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
 - [ ] <!---no_ci_cache--> disable CI cache (job reuse)
-
-#### Only specified batches in multi-batch jobs:
-- [ ] <!---batch_0--> 1
-- [ ] <!---batch_1--> 2
-- [ ] <!---batch_2--> 3
-- [ ] <!---batch_3--> 4
-
+- [ ] <!---batch_0--> only batch 1 for multi-batch jobs
+- [ ] <!---batch_1--> only batch 2 for multi-batch jobs
+- [ ] <!---batch_2--> only batch 3 for multi-batch jobs
+- [ ] <!---batch_3--> only batch 4 for multi-batch jobs
 </details>
diff --git a/tests/ci/ci.py b/tests/ci/ci.py
index fc25bee354d..c4e06ccd79a 100644
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@@ -1938,6 +1938,7 @@ def _cancel_pr_wf(s3: S3Helper, pr_number: int, cancel_sync: bool = False) -> No
 def _set_pending_statuses(pr_info: PRInfo) -> None:
     commit = get_commit(GitHub(get_best_robot_token(), per_page=100), pr_info.sha)
     try:
+        print("Set SYNC status to pending")
         commit.create_status(
             state=PENDING,
             target_url="",
@@ -2284,7 +2285,7 @@ def main() -> int:
             assert False, "BUG! Not supported scenario"
 
     ### SET PENDING STATUS
-    elif args.cancel_previous_run:
+    elif args.set_pending_status:
         if pr_info.is_pr:
             _set_pending_statuses(pr_info)
         else:

From a725112c4c7e33ae23e970b2c50f762ca2edea96 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 May 2024 10:10:39 +0000
Subject: [PATCH 329/392] Fix different hashes for reading/writing from/to
 query cache

---
 src/Interpreters/executeQuery.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 0b5f68f27f6..59d012a0a0e 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -1093,6 +1093,15 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             && (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>());
         QueryCache::Usage query_cache_usage = QueryCache::Usage::None;
 
+        /// If the query runs with "use_query_cache = 1", we first probe if the query cache already contains the query result (if yes:
+        /// return result from cache). If doesn't, we execute the query normally and write the result into the query cache. Both steps use a
+        /// hash of the AST, the current database and the settings as cache key. Unfortunately, the settings are in some places internally
+        /// modified between steps 1 and 2 (= during query execution) - this is silly but hard to forbid. As a result, the hashes no longer
+        /// match and the cache is rendered ineffective. Therefore make a copy of the settings and use it for steps 1 and 2.
+        std::optional<Settings> settings_copy;
+        if (can_use_query_cache)
+            settings_copy = settings;
+
         if (!async_insert)
         {
             /// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set
@@ -1101,7 +1110,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
             {
                 if (can_use_query_cache && settings.enable_reads_from_query_cache)
                 {
-                    QueryCache::Key key(ast, context->getCurrentDatabase(), settings, context->getUserID(), context->getCurrentRoles());
+                    QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles());
                     QueryCache::Reader reader = query_cache->createReader(key);
                     if (reader.hasCacheEntryForKey())
                     {
@@ -1224,7 +1233,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
                             && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
                         {
                             QueryCache::Key key(
-                                ast, context->getCurrentDatabase(), settings, res.pipeline.getHeader(),
+                                ast, context->getCurrentDatabase(), *settings_copy, res.pipeline.getHeader(),
                                 context->getUserID(), context->getCurrentRoles(),
                                 settings.query_cache_share_between_users,
                                 std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),

From 0e758722c6da7044fcb2c8958f175a8321c056a5 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 May 2024 10:19:01 +0000
Subject: [PATCH 330/392] Enable 02494_query_cache_nested_query_bug for
 Analyzer

---
 .../0_stateless/02494_query_cache_nested_query_bug.reference    | 2 +-
 tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
index 389e2621455..b261da18d51 100644
--- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
+++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
@@ -1,2 +1,2 @@
-2
+1
 0
diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
index 8712c7c84c6..15015761295 100755
--- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
+++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
@@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)"
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)"
 
-SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
+SETTINGS="SETTINGS use_query_cache=1, max_threads=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
 
 # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated
 # with EXPLAIN PLAN, we need to check the logs.

From f1421c9e5c542ed529dd3b225fc06c696a054080 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 12:02:14 +0200
Subject: [PATCH 331/392] style fix

---
 .github/PULL_REQUEST_TEMPLATE.md | 11 +++++------
 tests/ci/commit_status_helper.py |  4 +---
 tests/ci/finish_check.py         |  4 ++--
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 663b464d002..f9765c1d57b 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -46,9 +46,8 @@ At a minimum, the following information should be added (but add more as needed)
 
 **NOTE:** If your merge the PR with modified CI you **MUST KNOW** what you are doing
 **NOTE:** Checked options will be applied if set before CI RunConfig/PrepareRunConfig step
----
 - [ ] <!---ci_include_integration--> Allow: Integration Tests
-- [ ] <!---ci_include_stateless--> Allow:: Stateless tests
+- [ ] <!---ci_include_stateless--> Allow: Stateless tests
 - [ ] <!---ci_include_stateful--> Allow: Stateful tests
 - [ ] <!---ci_include_unit--> Allow: Unit tests
 - [ ] <!---ci_include_performance--> Allow: Performance tests
@@ -74,8 +73,8 @@ At a minimum, the following information should be added (but add more as needed)
 - [ ] <!---do_not_test--> do not test (only style check)
 - [ ] <!---no_merge_commit--> disable merge-commit (no merge from master before tests)
 - [ ] <!---no_ci_cache--> disable CI cache (job reuse)
-- [ ] <!---batch_0--> only batch 1 for multi-batch jobs
-- [ ] <!---batch_1--> only batch 2 for multi-batch jobs
-- [ ] <!---batch_2--> only batch 3 for multi-batch jobs
-- [ ] <!---batch_3--> only batch 4 for multi-batch jobs
+- [ ] <!---batch_0--> allow: batch 1 for multi-batch jobs
+- [ ] <!---batch_1--> allow: batch 2
+- [ ] <!---batch_2--> allow: batch 3
+- [ ] <!---batch_3_4_5--> allow: batch 4, 5 and 6
 </details>
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index 22cc0085781..bdbb0e80653 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -20,7 +20,6 @@ from github.Repository import Repository
 from ci_config import CHECK_DESCRIPTIONS, CheckDescription, StatusNames, is_required
 from env_helper import (
     GITHUB_REPOSITORY,
-    GITHUB_RUN_URL,
     GITHUB_UPSTREAM_REPOSITORY,
     TEMP_PATH,
 )
@@ -557,13 +556,12 @@ def update_upstream_sync_status(
     post_commit_status(
         last_synced_upstream_commit,
         sync_status,
-        "",  # let's won't expose any urls from cloud
+        "",
         "",
         StatusNames.SYNC,
     )
     trigger_mergeable_check(
         last_synced_upstream_commit,
         get_commit_filtered_statuses(last_synced_upstream_commit),
-        True,
         set_if_green=can_set_green_mergeable_status,
     )
diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py
index 130973ee8ff..269d5aa3175 100644
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@@ -15,7 +15,7 @@ from commit_status_helper import (
 )
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from report import PENDING, SUCCESS
+from report import PENDING
 from synchronizer_utils import SYNC_BRANCH_PREFIX
 from env_helper import GITHUB_REPOSITORY, GITHUB_UPSTREAM_REPOSITORY
 
@@ -67,7 +67,7 @@ def main():
         if status.state == PENDING:
             post_commit_status(
                 commit,
-                state, # map Mergeable Check status to CI Running
+                state,  # map Mergeable Check status to CI Running
                 status.target_url,
                 "All checks finished",
                 StatusNames.CI,

From 1f1c2c21b19dc3d29b60f0508b79bceb425585e7 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 May 2024 10:32:42 +0000
Subject: [PATCH 332/392] Fix spelling

---
 utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt
index 6df2e426561..6eae333681d 100644
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@@ -1617,6 +1617,8 @@ gcem
 generateRandom
 generateRandomStructure
 generateSeries
+generateSnowflakeID
+generateSnowflakeIDThreadMonotonic
 generateULID
 generateUUIDv
 geoDistance

From 7ccb776ed93196e72485aa0219d7b281ea0f68de Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 12:39:35 +0200
Subject: [PATCH 333/392] mcheck fix

---
 tests/ci/commit_status_helper.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index bdbb0e80653..b17c189c405 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -490,11 +490,6 @@ def trigger_mergeable_check(
 
     state: StatusType = SUCCESS
 
-    if success:
-        description = ", ".join(success)
-    else:
-        description = "awaiting job statuses"
-
     if fail:
         description = "failed: " + ", ".join(fail)
         state = FAILURE
@@ -503,6 +498,11 @@ def trigger_mergeable_check(
         state = FAILURE
     elif pending:
         description = "pending: " + ", ".join(pending)
+        state = PENDING
+    else:
+        # all good
+        description = ", ".join(success)
+
     description = format_description(description)
 
     if not set_if_green and state == SUCCESS:

From 534f996be3ec5baa544b45180fd1ff049eb2cada Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 13:07:37 +0200
Subject: [PATCH 334/392] Change input_format_parquet_use_native_reader to 24.6

---
 src/Core/SettingsChangesHistory.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 23f7810835c..9b5bf6b50a5 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -85,6 +85,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
+             }},
     {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
               {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
@@ -93,7 +95,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
               {"http_max_chunk_size", 0, 0, "Internal limitation"},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
-              {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
               {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},

From d48fba5b2b4176434242c75121066001846a1e17 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 19 Apr 2024 00:30:55 +0800
Subject: [PATCH 335/392] Limit the array index of FixedHashTable by min/max

If the type of key is 8 bits or 16 bits in aggregation, ClickHouse will use array
of 256 or 65536 length to store the key and boost the mergeSingleLevel, rather than
key comparison. However, if the key has occupied only small range of the total 65536
cells, most of the cycles are wasted on the `isZero()` to find the next cell which
is not zero in iterator++.
The solution is to use min/max and update min/max when emplace. Then we can set the
upper searching limit to max in iterator++. And just set min as the value of `begin()`,
rather than searching the first cell that not equals to 0.
We have tested the patch on 2x80 vCPUs server, Query 7 of ClickBench has gained 2.1x
performance improvement.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 src/Common/HashTable/FixedHashTable.h | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 49675aaafbc..d40169028b5 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -114,6 +114,8 @@ template <typename Key, typename Cell, typename Size, typename Allocator>
 class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
 {
     static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
+    size_t min = NUM_CELLS - 1;
+    size_t max = 0;
 
 protected:
     friend class const_iterator;
@@ -169,7 +171,7 @@ protected:
             ++ptr;
 
             /// Skip empty cells in the main buffer.
-            const auto * buf_end = container->buf + container->NUM_CELLS;
+            const auto * buf_end= container->buf + container->max + 1;
             while (ptr < buf_end && ptr->isZero(*container))
                 ++ptr;
 
@@ -294,14 +296,10 @@ public:
 
     const_iterator begin() const
     {
-        if (!buf)
+        if (!buf && min > max)
             return end();
 
-        const Cell * ptr = buf;
-        auto buf_end = buf + NUM_CELLS;
-        while (ptr < buf_end && ptr->isZero(*this))
-            ++ptr;
-
+        const Cell * ptr = buf + min;
         return const_iterator(this, ptr);
     }
 
@@ -309,21 +307,17 @@ public:
 
     iterator begin()
     {
-        if (!buf)
+        if (!buf && min > max)
             return end();
 
-        Cell * ptr = buf;
-        auto buf_end = buf + NUM_CELLS;
-        while (ptr < buf_end && ptr->isZero(*this))
-            ++ptr;
-
+        Cell * ptr = buf + min;
         return iterator(this, ptr);
     }
 
     const_iterator end() const
     {
         /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
-        return const_iterator(this, buf ? buf + NUM_CELLS : buf);
+        return const_iterator(this, buf ? buf + max + 1: buf);
     }
 
     const_iterator cend() const
@@ -333,7 +327,7 @@ public:
 
     iterator end()
     {
-        return iterator(this, buf ? buf + NUM_CELLS : buf);
+        return iterator(this, buf ? buf + max + 1 : buf);
     }
 
 
@@ -350,6 +344,8 @@ public:
 
         new (&buf[x]) Cell(x, *this);
         inserted = true;
+        if (x < min) min = x;
+        if (x > max) max = x;
         this->increaseSize();
     }
 

From 69960a5735fa3f08ddac258e2208d27e2d4e0a01 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 19 Apr 2024 18:19:25 +0800
Subject: [PATCH 336/392] Fix a bug if the container is empty

---
 src/Common/HashTable/FixedHashTable.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index d40169028b5..67605417a84 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -296,7 +296,7 @@ public:
 
     const_iterator begin() const
     {
-        if (!buf && min > max)
+        if (!buf || min > max)
             return end();
 
         const Cell * ptr = buf + min;
@@ -307,7 +307,8 @@ public:
 
     iterator begin()
     {
-        if (!buf && min > max)
+        /// If the container is empty, the initialization of min/max will not work as min > max.
+        if (!buf || min > max)
             return end();
 
         Cell * ptr = buf + min;

From 60420f2a8e3809640fd7a6a6b5c26b7b0d9df962 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Thu, 25 Apr 2024 01:53:20 +0800
Subject: [PATCH 337/392] Fix a bug if data will be inserted not by emplace().

---
 src/Common/HashTable/FixedHashTable.h | 42 ++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 67605417a84..be4f82434b1 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -171,7 +171,9 @@ protected:
             ++ptr;
 
             /// Skip empty cells in the main buffer.
-            const auto * buf_end= container->buf + container->max + 1;
+            const auto * buf_end = container->buf + container->NUM_CELLS;
+            if (container->min <= container->max)
+                buf_end = container->buf + container->max + 1;
             while (ptr < buf_end && ptr->isZero(*container))
                 ++ptr;
 
@@ -296,10 +298,19 @@ public:
 
     const_iterator begin() const
     {
-        if (!buf || min > max)
+        if (!buf)
             return end();
 
-        const Cell * ptr = buf + min;
+        const Cell * ptr = buf;
+        if (min > max)
+        {
+            auto buf_end = buf + NUM_CELLS;
+            while (ptr < buf_end && ptr->isZero(*this))
+                ++ptr;
+        }
+        else
+            ptr = buf + min;
+
         return const_iterator(this, ptr);
     }
 
@@ -307,18 +318,30 @@ public:
 
     iterator begin()
     {
-        /// If the container is empty, the initialization of min/max will not work as min > max.
-        if (!buf || min > max)
+        /// If min > max, it might use emplace to insert the value or the container is empty.
+        if (!buf)
             return end();
 
-        Cell * ptr = buf + min;
+        Cell * ptr = buf;
+        if (min > max)
+        {
+            auto buf_end = buf + NUM_CELLS;
+            while (ptr < buf_end && ptr->isZero(*this))
+                ++ptr;
+        }
+        else
+            ptr = buf + min;
+
         return iterator(this, ptr);
     }
 
     const_iterator end() const
     {
         /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
-        return const_iterator(this, buf ? buf + max + 1: buf);
+        if (min > max)
+            return const_iterator(this, buf ? buf + NUM_CELLS: buf);
+        else
+            return const_iterator(this, buf ? buf + max + 1: buf);
     }
 
     const_iterator cend() const
@@ -328,7 +351,10 @@ public:
 
     iterator end()
     {
-        return iterator(this, buf ? buf + max + 1 : buf);
+        if (min > max)
+            return iterator(this, buf ? buf + NUM_CELLS: buf);
+        else
+            return iterator(this, buf ? buf + max + 1: buf);
     }
 
 
From 7f960e4e8ad046e4359a9803fb49b49441444bdc Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Thu, 9 May 2024 01:13:11 +0800
Subject: [PATCH 338/392] Add the use_emplace_to_insert_data flag. `emplace()`
 is the only interface to update min/max. If the FixedHashTable.emplace() is
 not used to revise the hashtable value, then we should not continue the
 min/max optimization.

---
 src/Common/HashTable/FixedHashTable.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index be4f82434b1..25860800f6e 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -114,6 +114,7 @@ template <typename Key, typename Cell, typename Size, typename Allocator>
 class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
 {
     static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
+    bool use_emplace_to_insert_data = true;
     size_t min = NUM_CELLS - 1;
     size_t max = 0;
 
@@ -172,7 +173,7 @@ protected:
 
             /// Skip empty cells in the main buffer.
             const auto * buf_end = container->buf + container->NUM_CELLS;
-            if (container->min <= container->max)
+            if (container->use_min_max_optimization())
                 buf_end = container->buf + container->max + 1;
             while (ptr < buf_end && ptr->isZero(*container))
                 ++ptr;
@@ -302,7 +303,7 @@ public:
             return end();
 
         const Cell * ptr = buf;
-        if (min > max)
+        if (!use_min_max_optimization())
         {
             auto buf_end = buf + NUM_CELLS;
             while (ptr < buf_end && ptr->isZero(*this))
@@ -323,7 +324,7 @@ public:
             return end();
 
         Cell * ptr = buf;
-        if (min > max)
+        if (!use_min_max_optimization())
         {
             auto buf_end = buf + NUM_CELLS;
             while (ptr < buf_end && ptr->isZero(*this))
@@ -338,7 +339,7 @@ public:
     const_iterator end() const
     {
         /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
-        if (min > max)
+        if (!use_min_max_optimization())
             return const_iterator(this, buf ? buf + NUM_CELLS: buf);
         else
             return const_iterator(this, buf ? buf + max + 1: buf);
@@ -351,7 +352,7 @@ public:
 
     iterator end()
     {
-        if (min > max)
+        if (!use_min_max_optimization())
             return iterator(this, buf ? buf + NUM_CELLS: buf);
         else
             return iterator(this, buf ? buf + max + 1: buf);
@@ -400,6 +401,10 @@ public:
     bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
     bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
 
+    /// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `use_emplace_to_insert_data`
+    /// will check if the FixedHashTable will use `emplace()` to insert the raw data.
+    bool ALWAYS_INLINE use_min_max_optimization() const {return ((max >= min) && use_emplace_to_insert_data);}
+
     void write(DB::WriteBuffer & wb) const
     {
         Cell::State::write(wb);
@@ -456,6 +461,7 @@ public:
             x.read(rb);
             new (&buf[place_value]) Cell(x, *this);
         }
+        use_emplace_to_insert_data = false;
     }
 
     void readText(DB::ReadBuffer & rb)
@@ -478,6 +484,7 @@ public:
             x.readText(rb);
             new (&buf[place_value]) Cell(x, *this);
         }
+        use_emplace_to_insert_data = false;
     }
 
     size_t size() const { return this->getSize(buf, *this, NUM_CELLS); }
@@ -516,7 +523,11 @@ public:
     }
 
     const Cell * data() const { return buf; }
-    Cell * data() { return buf; }
+    Cell * data()
+    {
+        use_emplace_to_insert_data = false;
+        return buf;
+    }
 
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
     size_t getCollisions() const { return 0; }

From 4e6f5fba830008091fbb2e62acc7a7e60e193a37 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 17 May 2024 10:32:41 +0800
Subject: [PATCH 339/392] Update src/Common/HashTable/FixedHashTable.h

Add comment by Nikita.

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 src/Common/HashTable/FixedHashTable.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 25860800f6e..3214c974003 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -114,7 +114,9 @@ template <typename Key, typename Cell, typename Size, typename Allocator>
 class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
 {
     static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
-    bool use_emplace_to_insert_data = true;
+    /// We maintain min and max values inserted into the hash table to then limit the amount of cells to traverse to the [min; max] range.
+    /// Both values could be efficiently calculated only within `emplace` calls (and not when we populate the hash table in `read` method for example), so we update them only within `emplace` and track if any other method was called.
+    bool only_emplace_was_used_to_insert_data = true;
     size_t min = NUM_CELLS - 1;
     size_t max = 0;
 

From ca88da11e0e1f96d6e833349130899aa0605263a Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 17 May 2024 10:33:43 +0800
Subject: [PATCH 340/392] Update src/Common/HashTable/FixedHashTable.h

Revise the method name by Nikita.

Co-authored-by: Nikita Taranov <nickita.taranov@gmail.com>
---
 src/Common/HashTable/FixedHashTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index 3214c974003..b34f45f0a9a 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -405,7 +405,7 @@ public:
 
     /// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `use_emplace_to_insert_data`
     /// will check if the FixedHashTable will use `emplace()` to insert the raw data.
-    bool ALWAYS_INLINE use_min_max_optimization() const {return ((max >= min) && use_emplace_to_insert_data);}
+    bool ALWAYS_INLINE canUseMinMaxOptimization() const {return ((max >= min) && use_emplace_to_insert_data);}
 
     void write(DB::WriteBuffer & wb) const
     {

From d1d57caf0a2b470f7ad9d05b910633d7c08c581e Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 17 May 2024 22:30:51 +0800
Subject: [PATCH 341/392] Generate the seperate function firstPopulatedCell()
 and lastPopulatedCell()

---
 src/Common/HashTable/FixedHashTable.h | 66 ++++++++++++---------------
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index b34f45f0a9a..f842a30e3d8 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -114,6 +114,7 @@ template <typename Key, typename Cell, typename Size, typename Allocator>
 class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size
 {
     static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
+
     /// We maintain min and max values inserted into the hash table to then limit the amount of cells to traverse to the [min; max] range.
     /// Both values could be efficiently calculated only within `emplace` calls (and not when we populate the hash table in `read` method for example), so we update them only within `emplace` and track if any other method was called.
     bool only_emplace_was_used_to_insert_data = true;
@@ -175,7 +176,7 @@ protected:
 
             /// Skip empty cells in the main buffer.
             const auto * buf_end = container->buf + container->NUM_CELLS;
-            if (container->use_min_max_optimization())
+            if (container->canUseMinMaxOptimization())
                 buf_end = container->buf + container->max + 1;
             while (ptr < buf_end && ptr->isZero(*container))
                 ++ptr;
@@ -304,47 +305,23 @@ public:
         if (!buf)
             return end();
 
-        const Cell * ptr = buf;
-        if (!use_min_max_optimization())
-        {
-            auto buf_end = buf + NUM_CELLS;
-            while (ptr < buf_end && ptr->isZero(*this))
-                ++ptr;
-        }
-        else
-            ptr = buf + min;
-
-        return const_iterator(this, ptr);
+        return const_iterator(this, firstPopulatedCell());
     }
 
     const_iterator cbegin() const { return begin(); }
 
     iterator begin()
     {
-        /// If min > max, it might use emplace to insert the value or the container is empty.
         if (!buf)
             return end();
 
-        Cell * ptr = buf;
-        if (!use_min_max_optimization())
-        {
-            auto buf_end = buf + NUM_CELLS;
-            while (ptr < buf_end && ptr->isZero(*this))
-                ++ptr;
-        }
-        else
-            ptr = buf + min;
-
-        return iterator(this, ptr);
+        return iterator(this, const_cast<Cell *>(firstPopulatedCell()));
     }
 
     const_iterator end() const
     {
         /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
-        if (!use_min_max_optimization())
-            return const_iterator(this, buf ? buf + NUM_CELLS: buf);
-        else
-            return const_iterator(this, buf ? buf + max + 1: buf);
+        return const_iterator(this, lastPopulatedCell());
     }
 
     const_iterator cend() const
@@ -354,10 +331,7 @@ public:
 
     iterator end()
     {
-        if (!use_min_max_optimization())
-            return iterator(this, buf ? buf + NUM_CELLS: buf);
-        else
-            return iterator(this, buf ? buf + max + 1: buf);
+        return iterator(this, lastPopulatedCell());
     }
 
 
@@ -403,9 +377,25 @@ public:
     bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); }
     bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
 
-    /// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `use_emplace_to_insert_data`
-    /// will check if the FixedHashTable will use `emplace()` to insert the raw data.
-    bool ALWAYS_INLINE canUseMinMaxOptimization() const {return ((max >= min) && use_emplace_to_insert_data);}
+    /// Decide if we use the min/max optimization. `max < min` means the FixedHashtable is empty. The flag `only_emplace_was_used_to_insert_data`
+    /// will check if the FixedHashTable will only use `emplace()` to insert the raw data.
+    bool ALWAYS_INLINE canUseMinMaxOptimization() const { return ((max >= min) && only_emplace_was_used_to_insert_data); }
+
+    const Cell * ALWAYS_INLINE firstPopulatedCell() const
+    {
+        const Cell * ptr = buf;
+        if (!canUseMinMaxOptimization())
+        {
+            while (ptr < buf + NUM_CELLS && ptr->isZero(*this))
+                ++ptr;
+        }
+        else
+            ptr = buf + min;
+
+        return ptr;
+    }
+
+    Cell * ALWAYS_INLINE lastPopulatedCell() const { return canUseMinMaxOptimization() ? buf + max + 1 : buf + NUM_CELLS; }
 
     void write(DB::WriteBuffer & wb) const
     {
@@ -463,7 +453,7 @@ public:
             x.read(rb);
             new (&buf[place_value]) Cell(x, *this);
         }
-        use_emplace_to_insert_data = false;
+        only_emplace_was_used_to_insert_data = false;
     }
 
     void readText(DB::ReadBuffer & rb)
@@ -486,7 +476,7 @@ public:
             x.readText(rb);
             new (&buf[place_value]) Cell(x, *this);
         }
-        use_emplace_to_insert_data = false;
+        only_emplace_was_used_to_insert_data = false;
     }
 
     size_t size() const { return this->getSize(buf, *this, NUM_CELLS); }
@@ -527,7 +517,7 @@ public:
     const Cell * data() const { return buf; }
     Cell * data()
     {
-        use_emplace_to_insert_data = false;
+        only_emplace_was_used_to_insert_data = false;
         return buf;
     }
 

From d40c5a07becdbaa1652f3860f239e7e83d752f91 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Tue, 21 May 2024 20:31:43 +0800
Subject: [PATCH 342/392] Avoid UBSan warning while buf is nullptr

---
 src/Common/HashTable/FixedHashTable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/HashTable/FixedHashTable.h b/src/Common/HashTable/FixedHashTable.h
index f842a30e3d8..a84391b37e3 100644
--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@@ -321,7 +321,7 @@ public:
     const_iterator end() const
     {
         /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
-        return const_iterator(this, lastPopulatedCell());
+        return const_iterator(this, buf ? lastPopulatedCell() : buf);
     }
 
     const_iterator cend() const
@@ -331,7 +331,7 @@ public:
 
     iterator end()
     {
-        return iterator(this, lastPopulatedCell());
+        return iterator(this, buf ? lastPopulatedCell() : buf);
     }
 
 
From a6e06b27d221cfd7f5b7987c2b642487b2a80d01 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Fri, 24 May 2024 14:17:37 +0200
Subject: [PATCH 343/392] Update description for settings
 cross_join_min_rows_to_compress and cross_join_min_bytes_to_compress

---
 src/Core/SettingsChangesHistory.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index 23f7810835c..0521f70a91b 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -89,8 +89,8 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
               {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
               {"output_format_parquet_use_custom_encoder", false, true, "Enable custom Parquet encoder."},
-              {"cross_join_min_rows_to_compress", 0, 10000000, "A new setting."},
-              {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."},
+              {"cross_join_min_rows_to_compress", 0, 10000000, "Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
+              {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
               {"http_max_chunk_size", 0, 0, "Internal limitation"},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
               {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},

From 3d207039584cb69d9fffe1b3ec923a31fab5f032 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Fri, 24 May 2024 12:27:19 +0000
Subject: [PATCH 344/392] Force-enable analyzer so that tests without Analyzer
 can no longer fail

---
 tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
index 15015761295..a5339a098dc 100755
--- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
+++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
@@ -15,7 +15,7 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)"
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)"
 
-SETTINGS="SETTINGS use_query_cache=1, max_threads=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
+SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
 
 # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated
 # with EXPLAIN PLAN, we need to check the logs.

From aada1de796144829b2a6e334764923cef6da4fff Mon Sep 17 00:00:00 2001
From: TTPO100AJIEX <ttpo100ajiex@mail.ru>
Date: Fri, 24 May 2024 15:36:41 +0300
Subject: [PATCH 345/392] Rename function parameters, remove unnecessary
 virtual

---
 src/Server/ServersManager/IServersManager.cpp |  8 ++--
 src/Server/ServersManager/IServersManager.h   | 14 +++----
 .../ServersManager/InterServersManager.cpp    | 20 +++++-----
 .../ServersManager/InterServersManager.h      |  1 -
 .../ServersManager/ProtocolServersManager.cpp | 40 +++++++++----------
 5 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp
index c903d90f766..8b1eee94303 100644
--- a/src/Server/ServersManager/IServersManager.cpp
+++ b/src/Server/ServersManager/IServersManager.cpp
@@ -17,8 +17,8 @@ extern const int NETWORK_ERROR;
 extern const int INVALID_CONFIG_PARAMETER;
 }
 
-IServersManager::IServersManager(ContextMutablePtr l_global_context, Poco::Logger * l_logger)
-    : global_context(l_global_context), logger(l_logger)
+IServersManager::IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_)
+    : global_context(global_context_), logger(logger_)
 {
 }
 
@@ -107,8 +107,8 @@ void IServersManager::createServer(
     const Poco::Util::AbstractConfiguration & config,
     const std::string & listen_host,
     const char * port_name,
-    CreateServerFunc && func,
-    bool start_server)
+    bool start_server,
+    CreateServerFunc && func)
 {
     /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file.
     if (config.getString(port_name, "").empty())
diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h
index 5218ab63554..7e1d9d50d82 100644
--- a/src/Server/ServersManager/IServersManager.h
+++ b/src/Server/ServersManager/IServersManager.h
@@ -19,7 +19,7 @@ namespace DB
 class IServersManager
 {
 public:
-    IServersManager(ContextMutablePtr global_context, Poco::Logger * logger);
+    IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_);
     virtual ~IServersManager() = default;
 
     bool empty() const;
@@ -35,9 +35,9 @@ public:
         const ServerType & server_type)
         = 0;
 
-    virtual void startServers();
+    void startServers();
 
-    virtual void stopServers(const ServerType & server_type);
+    void stopServers(const ServerType & server_type);
     virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0;
 
     virtual void updateServers(
@@ -58,14 +58,14 @@ protected:
         const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const;
 
     using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
-    virtual void createServer(
+    void createServer(
         const Poco::Util::AbstractConfiguration & config,
         const std::string & listen_host,
         const char * port_name,
-        CreateServerFunc && func,
-        bool start_server);
+        bool start_server,
+        CreateServerFunc && func);
 
-    virtual void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config);
+    void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config);
 
     Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const;
     bool getListenTry(const Poco::Util::AbstractConfiguration & config) const;
diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp
index 28491a4f4f4..4425d468248 100644
--- a/src/Server/ServersManager/InterServersManager.cpp
+++ b/src/Server/ServersManager/InterServersManager.cpp
@@ -71,6 +71,7 @@ void InterServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                /* start_server = */ false,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -92,14 +93,14 @@ void InterServersManager::createServers(
                                 false),
                             server_pool,
                             socket));
-                },
-                /* start_server = */ false);
+                });
 
             constexpr auto secure_port_name = "keeper_server.tcp_port_secure";
             createServer(
                 config,
                 listen_host,
                 secure_port_name,
+                /* start_server = */ false,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
 #    if USE_SSL
@@ -128,14 +129,14 @@ void InterServersManager::createServers(
                         ErrorCodes::SUPPORT_IS_DISABLED,
                         "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
 #    endif
-                },
-                /* start_server: */ false);
+                });
 
             /// HTTP control endpoints
             createServer(
                 config,
                 listen_host,
                 /* port_name = */ "keeper_server.http_control.port",
+                /* start_server = */ false,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     auto http_context = std::make_shared<HTTPContext>(global_context);
@@ -159,8 +160,7 @@ void InterServersManager::createServers(
                             server_pool,
                             socket,
                             http_params));
-                },
-                /* start_server: */ false);
+                });
         }
 #else
         throw Exception(
@@ -264,6 +264,7 @@ void InterServersManager::createInterserverServers(
                 config,
                 interserver_listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -282,8 +283,7 @@ void InterServersManager::createInterserverServers(
                             http_params,
                             ProfileEvents::InterfaceInterserverReceiveBytes,
                             ProfileEvents::InterfaceInterserverSendBytes));
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
@@ -293,6 +293,7 @@ void InterServersManager::createInterserverServers(
                 config,
                 interserver_listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
 #if USE_SSL
@@ -318,8 +319,7 @@ void InterServersManager::createInterserverServers(
                         ErrorCodes::SUPPORT_IS_DISABLED,
                         "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
 #endif
-                },
-                start_servers);
+                });
         }
     }
 }
diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h
index 2a389e28c22..8780eae18e0 100644
--- a/src/Server/ServersManager/InterServersManager.h
+++ b/src/Server/ServersManager/InterServersManager.h
@@ -19,7 +19,6 @@ public:
         bool start_servers,
         const ServerType & server_type) override;
 
-    using IServersManager::stopServers;
     size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override;
 
     void updateServers(
diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp
index 17b028eddbb..af57de3ac3c 100644
--- a/src/Server/ServersManager/ProtocolServersManager.cpp
+++ b/src/Server/ServersManager/ProtocolServersManager.cpp
@@ -99,6 +99,7 @@ void ProtocolServersManager::createServers(
                 config,
                 host,
                 port_name.c_str(),
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -110,8 +111,7 @@ void ProtocolServersManager::createServers(
                         port_name.c_str(),
                         description + ": " + address.toString(),
                         std::make_unique<TCPServer>(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams));
-                },
-                start_servers);
+                });
         }
     }
 
@@ -125,6 +125,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -143,8 +144,7 @@ void ProtocolServersManager::createServers(
                             http_params,
                             ProfileEvents::InterfaceHTTPReceiveBytes,
                             ProfileEvents::InterfaceHTTPSendBytes));
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::HTTPS))
@@ -155,6 +155,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
 #if USE_SSL
@@ -180,8 +181,7 @@ void ProtocolServersManager::createServers(
                         ErrorCodes::SUPPORT_IS_DISABLED,
                         "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
 #endif
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::TCP))
@@ -192,6 +192,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -208,8 +209,7 @@ void ProtocolServersManager::createServers(
                             server_pool,
                             socket,
                             new Poco::Net::TCPServerParams));
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
@@ -220,6 +220,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -236,8 +237,7 @@ void ProtocolServersManager::createServers(
                             server_pool,
                             socket,
                             new Poco::Net::TCPServerParams));
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
@@ -248,6 +248,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
 #if USE_SSL
@@ -271,8 +272,7 @@ void ProtocolServersManager::createServers(
                         ErrorCodes::SUPPORT_IS_DISABLED,
                         "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
 #endif
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::MYSQL))
@@ -282,6 +282,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -298,8 +299,7 @@ void ProtocolServersManager::createServers(
                             server_pool,
                             socket,
                             new Poco::Net::TCPServerParams));
-                },
-                start_servers);
+                });
         }
 
         if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
@@ -309,6 +309,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -325,8 +326,7 @@ void ProtocolServersManager::createServers(
                             server_pool,
                             socket,
                             new Poco::Net::TCPServerParams));
-                },
-                start_servers);
+                });
         }
 
 #if USE_GRPC
@@ -337,6 +337,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::SocketAddress server_address(listen_host, port);
@@ -345,8 +346,7 @@ void ProtocolServersManager::createServers(
                         port_name,
                         "gRPC protocol: " + server_address.toString(),
                         std::make_unique<GRPCServer>(server, makeSocketAddress(listen_host, port, logger)));
-                },
-                start_servers);
+                });
         }
 #endif
         if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
@@ -357,6 +357,7 @@ void ProtocolServersManager::createServers(
                 config,
                 listen_host,
                 port_name,
+                start_servers,
                 [&](UInt16 port) -> ProtocolServerAdapter
                 {
                     Poco::Net::ServerSocket socket;
@@ -375,8 +376,7 @@ void ProtocolServersManager::createServers(
                             http_params,
                             ProfileEvents::InterfacePrometheusReceiveBytes,
                             ProfileEvents::InterfacePrometheusSendBytes));
-                },
-                start_servers);
+                });
         }
     }
 }

From 2cc1b27fb5f898a8c728dda03f4dea3941c653b4 Mon Sep 17 00:00:00 2001
From: Nikita Fomichev <nikita.fomichev@clickhouse.com>
Date: Fri, 24 May 2024 14:41:04 +0200
Subject: [PATCH 346/392] Update docs for settings
 cross_join_min_rows_to_compress and cross_join_min_bytes_to_compress

---
 docs/en/operations/settings/settings.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 2b5cd11819a..b2efe5d2af4 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -5468,3 +5468,15 @@ Defines how MySQL types are converted to corresponding ClickHouse types. A comma
 - `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`.
 - `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`.
 - `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`.
+
+## cross_join_min_rows_to_compress
+
+Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
+
+Default value: `10000000`.
+
+## cross_join_min_bytes_to_compress
+
+Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
+
+Default value: `1GiB`.

From 7f450cfbdd7578a0b1519f74ff7998f400793284 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Wed, 22 May 2024 17:17:43 +0000
Subject: [PATCH 347/392] Try add alias to array join.

---
 src/Analyzer/ArrayJoinNode.cpp                | 19 +++++
 src/Analyzer/ColumnNode.cpp                   |  7 +-
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 69 +++++++++++++++----
 src/Analyzer/QueryTreeBuilder.cpp             |  4 +-
 src/Analyzer/createUniqueTableAliases.cpp     | 34 +++++++++
 src/Parsers/ASTTablesInSelectQuery.cpp        |  9 +++
 src/Parsers/ASTTablesInSelectQuery.h          |  4 ++
 src/Parsers/ParserTablesInSelectQuery.cpp     |  4 ++
 .../QueryPlan/DistributedCreateLocalPlan.cpp  |  8 +++
 9 files changed, 144 insertions(+), 14 deletions(-)

diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp
index 59389d4f2a8..9c1eb9dce3e 100644
--- a/src/Analyzer/ArrayJoinNode.cpp
+++ b/src/Analyzer/ArrayJoinNode.cpp
@@ -24,6 +24,9 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat
     buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this);
     buffer << ", is_left: " << is_left;
 
+    if (hasAlias())
+        buffer << ", alias: " << getAlias();
+
     buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n";
     getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);
 
@@ -52,6 +55,8 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
     auto array_join_ast = std::make_shared<ASTArrayJoin>();
     array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
 
+    array_join_ast->setAlias(getAlias());
+
     auto array_join_expressions_ast = std::make_shared<ASTExpressionList>();
     const auto & array_join_expressions = getJoinExpressions().getNodes();
 
@@ -65,7 +70,21 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
         else
             array_join_expression_ast = array_join_expression->toAST(options);
 
+        // QueryTreeNodePtr column_source;
+        // if (column_node)
+        //     column_source = column_node->getColumnSourceOrNull();
+
+        // if (column_source && column_source->hasAlias())
+        // {
+        //     const auto & column_alias = column_node->getAlias();
+        //     const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias;
+
+        //     if (!name_or_alias.starts_with("__"))
+        //         array_join_expression_ast->setAlias(fmt::format("{}.{}", column_source->getAlias(), name_or_alias));
+        // }
+        // else
         array_join_expression_ast->setAlias(array_join_expression->getAlias());
+
         array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
     }
 
diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp
index 2b514a85121..f76c096a339 100644
--- a/src/Analyzer/ColumnNode.cpp
+++ b/src/Analyzer/ColumnNode.cpp
@@ -103,10 +103,15 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
     if (column_source && options.fully_qualified_identifiers)
     {
         auto node_type = column_source->getNodeType();
+
+        // if (node_type == QueryTreeNodeType::ARRAY_JOIN && column_source->hasAlias())
+        //      return std::make_shared<ASTIdentifier>(std::string(fmt::format("{}.{}", column_source->getAlias(), column.name)));
+
         if (node_type == QueryTreeNodeType::TABLE ||
             node_type == QueryTreeNodeType::TABLE_FUNCTION ||
             node_type == QueryTreeNodeType::QUERY ||
-            node_type == QueryTreeNodeType::UNION)
+            node_type == QueryTreeNodeType::UNION ||
+            node_type == QueryTreeNodeType::ARRAY_JOIN)
         {
             if (column_source->hasAlias())
             {
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index b7c223303eb..f55f6d6c18f 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1068,10 +1068,25 @@ public:
     void visitImpl(QueryTreeNodePtr & node)
     {
         updateAliasesIfNeeded(node, false /*is_lambda_node*/);
+
+        // if (auto * array_join_node = node->as<ArrayJoinNode>())
+        // {
+        //     for (const auto & elem : array_join_node->getJoinExpressions())
+        //     {
+        //         for (auto & child : elem->getChildren())
+        //         {
+        //             // std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl;
+        //             visit(child);
+        //         }
+        //     }
+        // }
     }
 
     bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child)
     {
+        // if (parent->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
+        //     return false;
+
         if (auto * lambda_node = child->as<LambdaNode>())
         {
             updateAliasesIfNeeded(child, true /*is_lambda_node*/);
@@ -1114,6 +1129,8 @@ private:
         if (node->getNodeType() == QueryTreeNodeType::WINDOW)
             return;
 
+        // std::cerr << ">>>>>>>>>> " << node->dumpTree() << std::endl;
+
         const auto & alias = node->getAlias();
 
         if (is_lambda_node)
@@ -1526,7 +1543,7 @@ private:
 
     ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
 
-    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
+    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table = true);
 
     ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
 
@@ -3794,6 +3811,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con
     const QueryTreeNodePtr & table_expression_node,
     IdentifierResolveScope & scope)
 {
+    // std::cerr << "tryResolveExpressionFromArrayJoinExpressions " << scope.dump() << std::endl;
+
     const auto & array_join_node = table_expression_node->as<const ArrayJoinNode &>();
     const auto & array_join_column_expressions_list = array_join_node.getJoinExpressions();
     const auto & array_join_column_expressions_nodes = array_join_column_expressions_list.getNodes();
@@ -3871,9 +3890,14 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     const QueryTreeNodePtr & table_expression_node,
     IdentifierResolveScope & scope)
 {
+    // std::cerr << "tryResolveIdentifierFromArrayJoin " << identifier_lookup.identifier.getFullName() << std::endl;
+
     const auto & from_array_join_node = table_expression_node->as<const ArrayJoinNode &>();
     auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope);
 
+    // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get())
+    //  << ' ' << identifier_lookup.dump()  << '\n' << table_expression_node->dumpTree() << std::endl;
+
     if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup())
         return resolved_identifier;
 
@@ -3888,8 +3912,11 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     for (const auto & array_join_column_expression : array_join_column_expressions_nodes)
     {
         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
+        // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl;
 
-        if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName())
+        const auto & parts = identifier_lookup.identifier.getParts();
+        if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() ||
+            (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias()))
         {
             auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
                 array_join_column_expression_typed.getColumnSource());
@@ -3911,6 +3938,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident
     const QueryTreeNodePtr & join_tree_node,
     IdentifierResolveScope & scope)
 {
+    // std::cerr << "tryResolveIdentifierFromJoinTreeNode " << identifier_lookup.identifier.getFullName() << std::endl;
+
     auto join_tree_node_type = join_tree_node->getNodeType();
 
     switch (join_tree_node_type)
@@ -3964,6 +3993,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie
     if (identifier_lookup.isFunctionLookup())
         return {};
 
+    // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl;
+
     /// Try to resolve identifier from table columns
     if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope))
         return resolved_identifier;
@@ -4112,6 +4143,8 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
     IdentifierResolveScope & scope,
     IdentifierResolveSettings identifier_resolve_settings)
 {
+    // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl;
+
     auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup);
     if (it != scope.identifier_lookup_to_resolve_state.end())
     {
@@ -6284,7 +6317,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
   *
   * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
+ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table)
 {
     checkStackSize();
 
@@ -6334,7 +6367,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
       * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use
       * alias table because in alias table subquery could be evaluated as scalar.
       */
-    bool use_alias_table = true;
+    //bool use_alias_table = true;
     if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType())))
         use_alias_table = false;
 
@@ -7569,22 +7602,33 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     for (auto & array_join_expression : array_join_nodes)
     {
         auto array_join_expression_alias = array_join_expression->getAlias();
-        if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias))
-            throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
-                "ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
-                array_join_expression->formatASTForErrorMessage(),
-                array_join_expression_alias,
-                scope.scope_node->formatASTForErrorMessage());
+        // if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias))
+        //     throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
+        //         "ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
+        //         array_join_expression->formatASTForErrorMessage(),
+        //         array_join_expression_alias,
+        //         scope.scope_node->formatASTForErrorMessage());
 
         /// Add array join expression into scope
-        expressions_visitor.visit(array_join_expression);
+
+        for (const auto & elem : array_join_nodes)
+        {
+            for (auto & child : elem->getChildren())
+            {
+                //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl;
+                expressions_visitor.visit(child);
+                //visit(child);
+            }
+        }
+
+        //expressions_visitor.visit(array_join_expression);
 
         std::string identifier_full_name;
 
         if (auto * identifier_node = array_join_expression->as<IdentifierNode>())
             identifier_full_name = identifier_node->getIdentifier().getFullName();
 
-        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, false);
 
         auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
         {
@@ -8456,6 +8500,7 @@ QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_ana
 
 void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
+    // std::cerr << ".... qap\n" << query_tree_node->dumpTree() << std::endl;
     QueryAnalyzer analyzer(only_analyze);
     analyzer.resolve(query_tree_node, table_expression, context);
     createUniqueTableAliases(query_tree_node, table_expression, context);
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 6a5db4bc1de..1d4810296b4 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -957,6 +957,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
 
             auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context);
             auto array_join_node = std::make_shared<ArrayJoinNode>(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join);
+            array_join_node->setAlias(array_join_expression.tryGetAlias());
 
             /** Original AST is not set because it will contain only array join part and does
               * not include left table expression.
@@ -1045,7 +1046,8 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr &
 QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context)
 {
     QueryTreeBuilder builder(std::move(query), context);
-    return builder.getQueryTreeNode();
+    auto qt = builder.getQueryTreeNode();
+    return qt;
 }
 
 }
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 8f850fe8dec..30b8c0a433b 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -1,6 +1,8 @@
 #include <memory>
 #include <unordered_map>
 #include <Analyzer/createUniqueTableAliases.h>
+#include <Analyzer/ArrayJoinNode.h>
+#include <Analyzer/ColumnNode.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/IQueryTreeNode.h>
@@ -58,6 +60,38 @@ public:
                     alias = fmt::format("__table{}", ++next_id);
                     node->setAlias(alias);
                 }
+
+                if (auto * array_join = node->as<ArrayJoinNode>())
+                {
+                    //size_t counter = 0;
+                    for (auto & column : array_join->getJoinExpressions())
+                    {
+                        if (auto * column_node = column->as<ColumnNode>())
+                        {
+                            if (!column_node->hasAlias())
+                                column_node->setAlias(column_node->getColumnName());
+                        }
+                    }
+                }
+
+                // if (auto * array_join = node->as<ArrayJoinNode>())
+                // {
+                //     for (auto & column : array_join->getJoinExpressions())
+                //     {
+                //         if (auto * column_node = column->as<ColumnNode>())
+                //         {
+                //             const auto & column_alias = column_node->getAlias();
+                //             const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias;
+
+                //             if (!name_or_alias.starts_with("__"))
+                //             {
+
+                //                 column_node->setAlias(fmt::format("{}.{}", alias, name_or_alias));
+                //             }
+                //         }
+                //     }
+                // }
+
                 break;
             }
             default:
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index e782bad797e..2f3e9207f81 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -247,6 +247,12 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta
     formatImplAfterTable(settings, state, frame);
 }
 
+static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings)
+{
+    settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : "");
+    settings.writeIdentifier(name);
+    settings.ostr << (settings.hilite ? IAST::hilite_none : "");
+}
 
 void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
@@ -258,6 +264,9 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta
         << indent_str
         << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : "");
 
+    if (!alias.empty())
+        writeAlias(alias, settings);
+
     settings.one_line
         ? expression_list->formatImpl(settings, state, frame)
         : expression_list->as<ASTExpressionList &>().formatImplMultiline(settings, state, frame);
diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h
index f3f329ca2b6..4619b22f022 100644
--- a/src/Parsers/ASTTablesInSelectQuery.h
+++ b/src/Parsers/ASTTablesInSelectQuery.h
@@ -95,6 +95,10 @@ struct ASTArrayJoin : public IAST
 
     /// List of array or nested names to JOIN, possible with aliases.
     ASTPtr expression_list;
+    String alias;
+
+    String tryGetAlias() const override { return alias; }
+    void setAlias(const String & to) override { alias = to; }
 
     using IAST::IAST;
     String getID(char) const override { return "ArrayJoin"; }
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index b4d48ae67e9..b2a801c8943 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -98,6 +98,10 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!has_array_join)
         return false;
 
+    ASTPtr alias_node;
+    if (ParserAlias(false).parse(pos, alias_node, expected))
+        tryGetIdentifierNameInto(alias_node, res->alias);
+
     if (!ParserExpressionList(false).parse(pos, res->expression_list, expected))
         return false;
 
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index d4545482477..aef3c03255e 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -2,6 +2,7 @@
 
 #include <Common/config_version.h>
 #include <Common/checkStackSize.h>
+#include "Parsers/queryToString.h"
 #include <Core/ProtocolDefines.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@@ -68,12 +69,19 @@ std::unique_ptr<QueryPlan> createLocalPlan(
 
     if (context->getSettingsRef().allow_experimental_analyzer)
     {
+        // std::cerr << query_ast->dumpTree() << std::endl;
+        // std::cerr << queryToString(query_ast) << std::endl;
+
         /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to
         /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace
         /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289).
         new_context->setSetting("enable_positional_arguments", Field(false));
         auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
+        // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl;
         query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
+        WriteBufferFromOwnString buf;
+        query_plan->explainPlan(buf, {.header=true, .actions=true});
+        // std::cerr << buf.str() << std::endl;
     }
     else
     {

From b4581286f74bcdfe199c3b8967e237ae3375cd88 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 16:34:11 +0000
Subject: [PATCH 348/392] Properly resolve array join columns.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 60 ++++++++++++++++---
 .../02374_analyzer_array_join.reference       | 16 ++++-
 .../0_stateless/02374_analyzer_array_join.sql |  4 +-
 .../02521_analyzer_array_join_crash.reference |  9 ++-
 .../02521_analyzer_array_join_crash.sql       |  6 +-
 5 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index f55f6d6c18f..6bce3dff49d 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -607,6 +607,8 @@ struct ScopeAliases
     std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
     std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
 
+    std::unordered_set<std::string> array_join_aliases;
+
     std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
     {
         switch (lookup_context)
@@ -2875,7 +2877,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons
 
 bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
 {
-    return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
+    return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr || scope.aliases.array_join_aliases.contains(identifier_lookup.identifier.front());
 }
 
 /** Resolve identifier from scope aliases.
@@ -2924,6 +2926,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     IdentifierResolveSettings identifier_resolve_settings)
 {
     const auto & identifier_bind_part = identifier_lookup.identifier.front();
+    // std::cerr << "tryResolveIdentifierFromAliases " << identifier_lookup.dump() << std::endl;
 
     auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME);
     if (it == nullptr)
@@ -2952,6 +2955,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     }
 
     auto node_type = alias_node->getNodeType();
+    // std::cerr << "tryResolveIdentifierFromAliases 1.5 \n" << alias_node->dumpTree() << std::endl;
 
     /// Resolve expression if necessary
     if (node_type == QueryTreeNodeType::IDENTIFIER)
@@ -2960,6 +2964,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
 
         auto & alias_identifier_node = alias_node->as<IdentifierNode &>();
         auto identifier = alias_identifier_node.getIdentifier();
+        // std::cerr << "tryResolveIdentifierFromAliases 2 " << identifier.getFullName() << std::endl;
         auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
         if (!lookup_result.resolved_identifier)
         {
@@ -3136,6 +3141,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
     size_t identifier_column_qualifier_parts,
     bool can_be_not_found)
 {
+    // std::cerr << "tryResolveIdentifierFromStorage " << identifier.getFullName() << std::endl;
     auto identifier_without_column_qualifier = identifier;
     identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts);
 
@@ -3278,6 +3284,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
     {
         auto qualified_identifier_with_removed_part = qualified_identifier;
         qualified_identifier_with_removed_part.popFirst();
+        // std::cerr << "tryResolveIdentifierFromStorage qualified_identifier_with_removed_part" << qualified_identifier_with_removed_part.getFullName() << std::endl;
 
         if (qualified_identifier_with_removed_part.empty())
             break;
@@ -3896,7 +3903,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope);
 
     // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get())
-    //  << ' ' << identifier_lookup.dump()  << '\n' << table_expression_node->dumpTree() << std::endl;
+    //  << ' ' << identifier_lookup.dump()  << ' ' << (resolved_identifier ? resolved_identifier->dumpTree() : "not resolved ") << std::endl;
 
     if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup())
         return resolved_identifier;
@@ -3914,14 +3921,48 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
         // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl;
 
-        const auto & parts = identifier_lookup.identifier.getParts();
-        if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() ||
-            (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias()))
+        IdentifierView identifier_view(identifier_lookup.identifier);
+
+        if (identifier_view.isCompound() && from_array_join_node.hasAlias() && identifier_view.front() == from_array_join_node.getAlias())
+            identifier_view.popFirst();
+
+        const auto & alias_or_name = array_join_column_expression_typed.hasAlias()
+            ? array_join_column_expression_typed.getAlias()
+            : array_join_column_expression_typed.getColumnName();
+
+        if (identifier_view.front() == alias_or_name)
+            identifier_view.popFirst();
+        else if (identifier_view.getFullName() == alias_or_name)
+            identifier_view.popFirst(identifier_view.getPartsSize()); /// Clear
+        else
+            continue;
+
+        if (identifier_view.empty())
         {
             auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
                 array_join_column_expression_typed.getColumnSource());
             return array_join_column;
         }
+
+        auto compound_expr = tryResolveIdentifierFromCompoundExpression(
+            identifier_lookup.identifier,
+            identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/,
+            array_join_column_expression,
+            {} /* compound_expression_source */,
+            scope,
+            true /* can_be_not_found */);
+
+        if (compound_expr)
+            return compound_expr;
+
+        // const auto & parts = identifier_lookup.identifier.getParts();
+        // if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() ||
+        //     (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias()))
+        // {
+        //     auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
+        //         array_join_column_expression_typed.getColumnSource());
+        //     return array_join_column;
+        // }
     }
 
     if (!resolved_identifier)
@@ -3993,7 +4034,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie
     if (identifier_lookup.isFunctionLookup())
         return {};
 
-    // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl;
+    // std::cerr << "tryResolveIdentifierFromJoinTree " << identifier_lookup.identifier.getFullName() << std::endl;
 
     /// Try to resolve identifier from table columns
     if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope))
@@ -7613,15 +7654,18 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
 
         for (const auto & elem : array_join_nodes)
         {
+            if (elem->hasAlias())
+                scope.aliases.array_join_aliases.insert(elem->getAlias());
             for (auto & child : elem->getChildren())
             {
                 //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl;
-                expressions_visitor.visit(child);
+                if (child)
+                    expressions_visitor.visit(child);
                 //visit(child);
             }
         }
 
-        //expressions_visitor.visit(array_join_expression);
+        // expressions_visitor.visit(array_join_expression);
 
         std::string identifier_full_name;
 
diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference
index 6dd384c7d9c..44f3e5a95e9 100644
--- a/tests/queries/0_stateless/02374_analyzer_array_join.reference
+++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference
@@ -45,7 +45,13 @@ SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS val
 0	Value	[1,2,3]	1
 0	Value	[1,2,3]	2
 0	Value	[1,2,3]	3
-SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 }
+SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value;
+1
+2
+3
+1
+2
+3
 SELECT 'ARRAY JOIN with column';
 ARRAY JOIN with column
 SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array;
@@ -84,7 +90,13 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J
 0	Value	[4,5,6]
 SELECT '--';
 --
-SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 }
+SELECT id AS value FROM test_table ARRAY JOIN value_array AS value;
+1
+2
+3
+4
+5
+6
 SELECT '--';
 --
 SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element;
diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql
index bc4bb6616c1..dfd3b755aff 100644
--- a/tests/queries/0_stateless/02374_analyzer_array_join.sql
+++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql
@@ -33,7 +33,7 @@ SELECT '--';
 
 SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS value_1 ARRAY JOIN value_1 AS value_2;
 
-SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value; -- { serverError 179 }
+SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value;
 
 SELECT 'ARRAY JOIN with column';
 
@@ -53,7 +53,7 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J
 
 SELECT '--';
 
-SELECT id AS value FROM test_table ARRAY JOIN value_array AS value; -- { serverError 179 }
+SELECT id AS value FROM test_table ARRAY JOIN value_array AS value;
 
 SELECT '--';
 
diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
index 5e7728e0590..426cfe35e73 100644
--- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
+++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.reference
@@ -1,11 +1,10 @@
 -- { echoOn }
 
-SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value;
-0	[1,2,3]	[1,2,3]
+SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER }
 SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value;
 0	[1,2,3]	1
 0	[1,2,3]	2
 0	[1,2,3]	3
-SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value;
-1048577	[1048577]
-SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 }
+SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value;
+1048577	1048577
+SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN }
diff --git a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
index 53606e01ab7..7842d47d757 100644
--- a/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
+++ b/tests/queries/0_stateless/02521_analyzer_array_join_crash.sql
@@ -11,13 +11,13 @@ INSERT INTO test_table VALUES (0, 'Value');
 
 -- { echoOn }
 
-SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value;
+SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value; -- { serverError UNKNOWN_IDENTIFIER }
 
 SELECT id, value_element, value FROM test_table ARRAY JOIN [[1,2,3]] AS value_element ARRAY JOIN value_element AS value;
 
-SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element, arrayMap(x -> value_element, ['']) AS value;
+SELECT value_element, value FROM test_table ARRAY JOIN [1048577] AS value_element ARRAY JOIN arrayMap(x -> value_element, ['']) AS value;
 
-SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem, arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError 44 }
+SELECT arrayFilter(x -> notEmpty(concat(x)), [NULL, NULL]) FROM system.one ARRAY JOIN [1048577] AS elem ARRAY JOIN arrayMap(x -> splitByChar(x, elem), ['']) AS unused; -- { serverError ILLEGAL_COLUMN }
 
 -- { echoOff }
 

From 317941f06af836d719e1360b04616970271ecc12 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 17:01:46 +0000
Subject: [PATCH 349/392] Add a test.

---
 .../03156_analyzer_array_join_distributed.reference  | 12 ++++++++++++
 .../03156_analyzer_array_join_distributed.sql        | 10 ++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference
 create mode 100644 tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql

diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference
new file mode 100644
index 00000000000..b5b2aec9c12
--- /dev/null
+++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.reference
@@ -0,0 +1,12 @@
+Hello	[1,2]	1
+Hello	[1,2]	2
+Hello	[1,2]	1
+Hello	[1,2]	1
+Hello	[1,2]	2
+Hello	[1,2]	2
+Hello	1
+Hello	2
+Hello	1
+Hello	1
+Hello	2
+Hello	2
diff --git a/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql
new file mode 100644
index 00000000000..f605a369822
--- /dev/null
+++ b/tests/queries/0_stateless/03156_analyzer_array_join_distributed.sql
@@ -0,0 +1,10 @@
+CREATE TABLE arrays_test (s String, arr Array(UInt8)) ENGINE = MergeTree() ORDER BY (s);
+
+INSERT INTO arrays_test VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []);
+
+SELECT s, arr, a FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a;
+SELECT s, arr, a FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr AS a WHERE a < 3 ORDER BY a;
+
+
+SELECT s, arr FROM remote('127.0.0.2', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr;
+SELECT s, arr FROM remote('127.0.0.{1,2}', currentDatabase(), arrays_test) ARRAY JOIN arr WHERE arr < 3 ORDER BY arr;

From bee3c50ecd4a41e64d29812b5607927c12dba111 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 17:23:02 +0000
Subject: [PATCH 350/392] Try not to add alias to array join.

---
 src/Analyzer/ArrayJoinNode.cpp            |  2 +-
 src/Analyzer/ColumnNode.cpp               |  4 ++--
 src/Analyzer/QueryTreeBuilder.cpp         |  2 +-
 src/Parsers/ASTTablesInSelectQuery.cpp    | 16 ++++++++--------
 src/Parsers/ASTTablesInSelectQuery.h      |  6 +++---
 src/Parsers/ParserTablesInSelectQuery.cpp |  6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp
index 9c1eb9dce3e..37c198f8472 100644
--- a/src/Analyzer/ArrayJoinNode.cpp
+++ b/src/Analyzer/ArrayJoinNode.cpp
@@ -55,7 +55,7 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
     auto array_join_ast = std::make_shared<ASTArrayJoin>();
     array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
 
-    array_join_ast->setAlias(getAlias());
+    // array_join_ast->setAlias(getAlias());
 
     auto array_join_expressions_ast = std::make_shared<ASTExpressionList>();
     const auto & array_join_expressions = getJoinExpressions().getNodes();
diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp
index f76c096a339..d12eac68ab4 100644
--- a/src/Analyzer/ColumnNode.cpp
+++ b/src/Analyzer/ColumnNode.cpp
@@ -110,8 +110,8 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
         if (node_type == QueryTreeNodeType::TABLE ||
             node_type == QueryTreeNodeType::TABLE_FUNCTION ||
             node_type == QueryTreeNodeType::QUERY ||
-            node_type == QueryTreeNodeType::UNION ||
-            node_type == QueryTreeNodeType::ARRAY_JOIN)
+            node_type == QueryTreeNodeType::UNION)// ||
+            //node_type == QueryTreeNodeType::ARRAY_JOIN)
         {
             if (column_source->hasAlias())
             {
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 1d4810296b4..02d742f5e49 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -957,7 +957,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
 
             auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context);
             auto array_join_node = std::make_shared<ArrayJoinNode>(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join);
-            array_join_node->setAlias(array_join_expression.tryGetAlias());
+            // array_join_node->setAlias(array_join_expression.tryGetAlias());
 
             /** Original AST is not set because it will contain only array join part and does
               * not include left table expression.
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 2f3e9207f81..b4058a0950d 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -247,12 +247,12 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta
     formatImplAfterTable(settings, state, frame);
 }
 
-static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings)
-{
-    settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : "");
-    settings.writeIdentifier(name);
-    settings.ostr << (settings.hilite ? IAST::hilite_none : "");
-}
+// static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings)
+// {
+//     settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : "");
+//     settings.writeIdentifier(name);
+//     settings.ostr << (settings.hilite ? IAST::hilite_none : "");
+// }
 
 void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
@@ -264,8 +264,8 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta
         << indent_str
         << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : "");
 
-    if (!alias.empty())
-        writeAlias(alias, settings);
+    // if (!alias.empty())
+    //     writeAlias(alias, settings);
 
     settings.one_line
         ? expression_list->formatImpl(settings, state, frame)
diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h
index 4619b22f022..212436b0d9e 100644
--- a/src/Parsers/ASTTablesInSelectQuery.h
+++ b/src/Parsers/ASTTablesInSelectQuery.h
@@ -95,10 +95,10 @@ struct ASTArrayJoin : public IAST
 
     /// List of array or nested names to JOIN, possible with aliases.
     ASTPtr expression_list;
-    String alias;
+    // String alias;
 
-    String tryGetAlias() const override { return alias; }
-    void setAlias(const String & to) override { alias = to; }
+    // String tryGetAlias() const override { return alias; }
+    // void setAlias(const String & to) override { alias = to; }
 
     using IAST::IAST;
     String getID(char) const override { return "ArrayJoin"; }
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index b2a801c8943..c96b6c1584d 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -98,9 +98,9 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!has_array_join)
         return false;
 
-    ASTPtr alias_node;
-    if (ParserAlias(false).parse(pos, alias_node, expected))
-        tryGetIdentifierNameInto(alias_node, res->alias);
+    // ASTPtr alias_node;
+    // if (ParserAlias(false).parse(pos, alias_node, expected))
+    //     tryGetIdentifierNameInto(alias_node, res->alias);
 
     if (!ParserExpressionList(false).parse(pos, res->expression_list, expected))
         return false;

From a19472ddd58d121c8cda910dd7690fa37fb66065 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Thu, 23 May 2024 17:53:17 +0000
Subject: [PATCH 351/392] Connect code.

---
 src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index aef3c03255e..ad94dd2c173 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -78,9 +78,9 @@ std::unique_ptr<QueryPlan> createLocalPlan(
         new_context->setSetting("enable_positional_arguments", Field(false));
         auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
         // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl;
-        query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
-        WriteBufferFromOwnString buf;
-        query_plan->explainPlan(buf, {.header=true, .actions=true});
+        // query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
+        // WriteBufferFromOwnString buf;
+        // query_plan->explainPlan(buf, {.header=true, .actions=true});
         // std::cerr << buf.str() << std::endl;
     }
     else

From 1e5872cb4ea8237d24528d2595a6708a36204a00 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 May 2024 11:02:31 +0200
Subject: [PATCH 352/392] Update DistributedCreateLocalPlan.cpp

---
 src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index ad94dd2c173..e4d908e2af0 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -78,7 +78,7 @@ std::unique_ptr<QueryPlan> createLocalPlan(
         new_context->setSetting("enable_positional_arguments", Field(false));
         auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
         // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl;
-        // query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
+        query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
         // WriteBufferFromOwnString buf;
         // query_plan->explainPlan(buf, {.header=true, .actions=true});
         // std::cerr << buf.str() << std::endl;

From 634f7c35e8348cbf0c77de729bde131d34ca6336 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 May 2024 12:43:40 +0000
Subject: [PATCH 353/392] Better.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 40 +++++++++++--------
 .../02374_analyzer_array_join.reference       | 24 ++++++-----
 .../0_stateless/02374_analyzer_array_join.sql |  3 ++
 3 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 6bce3dff49d..871c3842de0 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1545,7 +1545,7 @@ private:
 
     ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
 
-    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table = true);
+    ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias = false);
 
     ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
 
@@ -3919,6 +3919,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     for (const auto & array_join_column_expression : array_join_column_expressions_nodes)
     {
         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
+        // std::cerr << "========== " << array_join_column_expression->dumpTree() << std::endl;
         // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl;
 
         IdentifierView identifier_view(identifier_lookup.identifier);
@@ -6358,10 +6359,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
   *
   * 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
   */
-ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool use_alias_table)
+ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
 {
     checkStackSize();
 
+    // std::cerr << "resolveExpressionNode  " << ignore_alias << "\n" << node->dumpTree() << std::endl;
+
     auto resolved_expression_it = resolved_expressions.find(node);
     if (resolved_expression_it != resolved_expressions.end())
     {
@@ -6378,6 +6381,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             evaluateScalarSubqueryIfNeeded(node, subquery_scope);
         }
 
+        // std::cerr << "resolveExpressionNode taken from cache \n" << node->dumpTree() << "\n PN " << (resolved_expression_it->second.empty() ? "" : resolved_expression_it->second.front()) << std::endl;
         return resolved_expression_it->second;
     }
 
@@ -6388,7 +6392,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
     {
         auto projection_name_it = node_to_projection_name.find(node);
         if (projection_name_it != node_to_projection_name.end())
+        {
+            // std::cerr << "resolveExpressionNode taken projection name from map : " << projection_name_it->second << " for \n" << node->dumpTree() << std::endl;
             result_projection_names.push_back(projection_name_it->second);
+        }
     }
     else
     {
@@ -6408,7 +6415,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
       * To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use
       * alias table because in alias table subquery could be evaluated as scalar.
       */
-    //bool use_alias_table = true;
+    bool use_alias_table = !ignore_alias;
     if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType())))
         use_alias_table = false;
 
@@ -6708,7 +6715,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
     if (is_duplicated_alias)
         scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION});
 
-    resolved_expressions.emplace(node, result_projection_names);
+    if (!ignore_alias)
+        resolved_expressions.emplace(node, result_projection_names);
 
     scope.popExpressionNode();
     bool expression_was_root = scope.expressions_in_resolve_process_stack.empty();
@@ -7672,7 +7680,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
         if (auto * identifier_node = array_join_expression->as<IdentifierNode>())
             identifier_full_name = identifier_node->getIdentifier().getFullName();
 
-        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, false);
+        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true);
 
         auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
         {
@@ -7749,17 +7757,17 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
       * with type after ARRAY JOIN.
       */
     array_join_nodes = std::move(array_join_column_expressions);
-    for (auto & array_join_column_expression : array_join_nodes)
-    {
-        auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
-        if (it != scope.aliases.alias_name_to_expression_node->end())
-        {
-            auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
-            auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
-                array_join_column_expression_typed.getColumnSource());
-            it->second = std::move(array_join_column);
-        }
-    }
+    // for (auto & array_join_column_expression : array_join_nodes)
+    // {
+    //     auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
+    //     if (it != scope.aliases.alias_name_to_expression_node->end())
+    //     {
+    //         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
+    //         auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
+    //             array_join_column_expression_typed.getColumnSource());
+    //         it->second = std::move(array_join_column);
+    //     }
+    // }
 }
 
 void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope)
diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.reference b/tests/queries/0_stateless/02374_analyzer_array_join.reference
index 44f3e5a95e9..ad7750228d6 100644
--- a/tests/queries/0_stateless/02374_analyzer_array_join.reference
+++ b/tests/queries/0_stateless/02374_analyzer_array_join.reference
@@ -47,11 +47,11 @@ SELECT id, value, value_1, value_2 FROM test_table ARRAY JOIN [[1, 2, 3]] AS val
 0	Value	[1,2,3]	3
 SELECT 1 AS value FROM test_table ARRAY JOIN [1,2,3] AS value;
 1
-2
-3
 1
-2
-3
+1
+1
+1
+1
 SELECT 'ARRAY JOIN with column';
 ARRAY JOIN with column
 SELECT id, value, test_table.value_array FROM test_table ARRAY JOIN value_array;
@@ -91,12 +91,12 @@ SELECT id, value, value_array AS value_array_array_alias FROM test_table ARRAY J
 SELECT '--';
 --
 SELECT id AS value FROM test_table ARRAY JOIN value_array AS value;
-1
-2
-3
-4
-5
-6
+0
+0
+0
+0
+0
+0
 SELECT '--';
 --
 SELECT id, value, value_array AS value_array_array_alias, value_array_array_alias_element FROM test_table ARRAY JOIN value_array_array_alias AS value_array_array_alias_element;
@@ -132,3 +132,7 @@ WHERE NOT ignore(elem)
 GROUP BY
     sum(ignore(ignore(ignore(1., 1, 36, 8, 8), ignore(52, 37, 37, '03147_parquet_memory_tracking.parquet', 37, 37, toUInt256(37), 37, 37, toNullable(37), 37, 37), 1., 1, 36, 8, 8), emptyArrayToSingle(arrayMap(x -> toString(x), arrayMap(x -> nullIf(x, 2), arrayJoin([[1]])))))) IGNORE NULLS,
     modulo(toLowCardinality('03147_parquet_memory_tracking.parquet'), number, toLowCardinality(3)); -- { serverError UNKNOWN_IDENTIFIER }
+[1,2]	1
+[1,2]	2
+1
+2
diff --git a/tests/queries/0_stateless/02374_analyzer_array_join.sql b/tests/queries/0_stateless/02374_analyzer_array_join.sql
index dfd3b755aff..8c26df1806e 100644
--- a/tests/queries/0_stateless/02374_analyzer_array_join.sql
+++ b/tests/queries/0_stateless/02374_analyzer_array_join.sql
@@ -80,3 +80,6 @@ GROUP BY
 -- { echoOff }
 
 DROP TABLE test_table;
+
+select [1, 2] as arr, x from system.one array join arr as x;
+select x + 1 as x from (select [number] as arr from numbers(2)) as s array join arr as x;

From 9794a193cfb88d7a49b12b9a60986884bf3ebfda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 15:05:49 +0200
Subject: [PATCH 354/392] Rename aggregate_function_group_array_has_limit_size

---
 .../AggregateFunctionGroupArray.cpp                   | 11 ++++++-----
 src/Core/ServerSettings.h                             |  3 ++-
 src/Core/SettingsEnums.cpp                            |  5 +++++
 src/Core/SettingsEnums.h                              |  8 ++++++++
 .../configs/group_array_max_element_size.xml          |  2 +-
 .../integration/test_group_array_element_size/test.py |  8 ++++----
 6 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index d4fb7afcb78..c21b1d376d9 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -753,10 +753,11 @@ size_t getMaxArraySize()
     return 0xFFFFFF;
 }
 
-bool hasLimitArraySize()
+bool discardOnLimitReached()
 {
     if (auto context = Context::getGlobalContextInstance())
-        return context->getServerSettings().aggregate_function_group_array_has_limit_size;
+        return context->getServerSettings().aggregate_function_group_array_action_when_limit_is_reached
+            == GroupArrayActionWhenLimitReached::DISCARD;
 
     return false;
 }
@@ -767,7 +768,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
 {
     assertUnary(name, argument_types);
 
-    bool limit_size = hasLimitArraySize();
+    bool has_limit = discardOnLimitReached();
     UInt64 max_elems = getMaxArraySize();
 
     if (parameters.empty())
@@ -784,14 +785,14 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
             (type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
 
-        limit_size = true;
+        has_limit = true;
         max_elems = parameters[0].get<UInt64>();
     }
     else
         throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
             "Incorrect number of parameters for aggregate function {}, should be 0 or 1", name);
 
-    if (!limit_size)
+    if (!has_limit)
     {
         if (Tlast)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index ea0b155b22d..45f235116ab 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -3,6 +3,7 @@
 
 #include <Core/BaseSettings.h>
 #include <Core/Defines.h>
+#include <Core/SettingsEnums.h>
 
 
 namespace Poco::Util
@@ -51,7 +52,7 @@ namespace DB
     M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
     M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
     M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
-    M(Bool, aggregate_function_group_array_has_limit_size, false, "When the max array element size is exceeded, a `Too large array size` exception will be thrown by default. When set to true, no exception will be thrown, and the excess elements will be discarded.", 0) \
+    M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
     M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
     M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
     M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 0caf6e8d609..05985316566 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -229,4 +229,9 @@ IMPLEMENT_SETTING_ENUM(SQLSecurityType, ErrorCodes::BAD_ARGUMENTS,
     {{"DEFINER", SQLSecurityType::DEFINER},
      {"INVOKER", SQLSecurityType::INVOKER},
      {"NONE", SQLSecurityType::NONE}})
+
+IMPLEMENT_SETTING_ENUM(
+    GroupArrayActionWhenLimitReached,
+    ErrorCodes::BAD_ARGUMENTS,
+    {{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}})
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index ab163ba96a3..575cd8700c8 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -370,4 +370,12 @@ DECLARE_SETTING_ENUM(SchemaInferenceMode)
 DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior)
 
 DECLARE_SETTING_ENUM(SQLSecurityType)
+
+enum class GroupArrayActionWhenLimitReached : uint8_t
+{
+    THROW,
+    DISCARD
+};
+DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached)
+
 }
diff --git a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml
index 80409d3e18b..32d5d131a44 100644
--- a/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml
+++ b/tests/integration/test_group_array_element_size/configs/group_array_max_element_size.xml
@@ -1,4 +1,4 @@
 <clickhouse>
     <aggregate_function_group_array_max_element_size>10</aggregate_function_group_array_max_element_size>
-    <aggregate_function_group_array_has_limit_size>false</aggregate_function_group_array_has_limit_size>
+    <aggregate_function_group_array_action_when_limit_is_reached>throw</aggregate_function_group_array_action_when_limit_is_reached>
 </clickhouse>
diff --git a/tests/integration/test_group_array_element_size/test.py b/tests/integration/test_group_array_element_size/test.py
index 1eb7647d734..90b2712ffbf 100644
--- a/tests/integration/test_group_array_element_size/test.py
+++ b/tests/integration/test_group_array_element_size/test.py
@@ -80,8 +80,8 @@ def test_limit_size(started_cluster):
 
     node2.replace_in_config(
         "/etc/clickhouse-server/config.d/group_array_max_element_size.xml",
-        "false",
-        "true",
+        "throw",
+        "discard",
     )
 
     node2.restart_clickhouse()
@@ -91,8 +91,8 @@ def test_limit_size(started_cluster):
 
     node2.replace_in_config(
         "/etc/clickhouse-server/config.d/group_array_max_element_size.xml",
-        "true",
-        "false",
+        "discard",
+        "throw",
     )
 
     node2.restart_clickhouse()

From 16fb2fc5616ae462c1f658f9765c82d935b456e4 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 24 May 2024 13:13:19 +0000
Subject: [PATCH 355/392] Split tests 03039_dynamic_all_merge_algorithms to
 avoid timeouts

---
 ...9_dynamic_aggregating_merge_tree.reference | 32 +++++++++++++++
 .../03039_dynamic_aggregating_merge_tree.sh   | 40 +++++++++++++++++++
 ...39_dynamic_collapsing_merge_tree.reference | 20 ++++++++++
 .../03039_dynamic_collapsing_merge_tree.sh    | 38 ++++++++++++++++++
 ...039_dynamic_replacing_merge_tree.reference | 20 ++++++++++
 .../03039_dynamic_replacing_merge_tree.sh     | 39 ++++++++++++++++++
 ...03039_dynamic_summing_merge_tree.reference | 32 +++++++++++++++
 .../03039_dynamic_summing_merge_tree.sh       | 40 +++++++++++++++++++
 ..._versioned_collapsing_merge_tree.reference | 20 ++++++++++
 ...dynamic_versioned_collapsing_merge_tree.sh | 38 ++++++++++++++++++
 10 files changed, 319 insertions(+)
 create mode 100644 tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh
 create mode 100644 tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference
 create mode 100755 tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh

diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference
new file mode 100644
index 00000000000..3c186fcc935
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.reference
@@ -0,0 +1,32 @@
+MergeTree compact + horizontal merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree wide + horizontal merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree compact + vertical merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree wide + vertical merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
new file mode 100755
index 00000000000..c433d409c7c
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Fix some settings to avoid timeouts because of some settings randomization
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+
+function test()
+{
+    $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number"
+    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference
new file mode 100644
index 00000000000..fc293cc2ec8
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.reference
@@ -0,0 +1,20 @@
+MergeTree compact + horizontal merge
+100000	String
+100000	UInt64
+50000	String
+50000	UInt64
+MergeTree wide + horizontal merge
+100000	String
+100000	UInt64
+50000	String
+50000	UInt64
+MergeTree compact + vertical merge
+100000	String
+100000	UInt64
+50000	String
+50000	UInt64
+MergeTree wide + vertical merge
+100000	String
+100000	UInt64
+50000	String
+50000	UInt64
diff --git a/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh
new file mode 100755
index 00000000000..881c9ec64cc
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_collapsing_merge_tree.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Fix some settings to avoid timeouts because of some settings randomization
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+
+function test()
+{
+    $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference
new file mode 100644
index 00000000000..132b9df6b26
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.reference
@@ -0,0 +1,20 @@
+MergeTree compact + horizontal merge
+100000	String
+100000	UInt64
+50000	UInt64
+100000	String
+MergeTree wide + horizontal merge
+100000	String
+100000	UInt64
+50000	UInt64
+100000	String
+MergeTree compact + vertical merge
+100000	String
+100000	UInt64
+50000	UInt64
+100000	String
+MergeTree wide + vertical merge
+100000	String
+100000	UInt64
+50000	UInt64
+100000	String
diff --git a/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh
new file mode 100755
index 00000000000..fc9039ac98c
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_replacing_merge_tree.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Fix some settings to avoid timeouts because of some settings randomization
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+
+
+function test()
+{
+    $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference
new file mode 100644
index 00000000000..3c186fcc935
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.reference
@@ -0,0 +1,32 @@
+MergeTree compact + horizontal merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree wide + horizontal merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree compact + vertical merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
+MergeTree wide + vertical merge
+100000	String
+100000	UInt64
+200000	1
+50000	String
+100000	UInt64
+100000	1
+50000	2
diff --git a/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh
new file mode 100755
index 00000000000..f9da70e95ca
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_summing_merge_tree.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Fix some settings to avoid timeouts because of some settings randomization
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+
+function test()
+{
+    $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference
new file mode 100644
index 00000000000..cabb0fdefab
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.reference
@@ -0,0 +1,20 @@
+MergeTree compact + horizontal merge
+100000	String
+100000	UInt64
+75000	String
+75000	UInt64
+MergeTree wide + horizontal merge
+100000	String
+100000	UInt64
+75000	String
+75000	UInt64
+MergeTree compact + vertical merge
+100000	String
+100000	UInt64
+75000	String
+75000	UInt64
+MergeTree wide + vertical merge
+100000	String
+100000	UInt64
+75000	String
+75000	UInt64
diff --git a/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh
new file mode 100755
index 00000000000..ca313307a6d
--- /dev/null
+++ b/tests/queries/0_stateless/03039_dynamic_versioned_collapsing_merge_tree.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# reset --log_comment
+CLICKHOUSE_LOG_COMMENT=
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# Fix some settings to avoid timeouts because of some settings randomization
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1  --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+
+function test()
+{
+    $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;"
+    $CH_CLIENT -q "system stop merges test"
+    $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)"
+    $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)"
+
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
+    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
+    $CH_CLIENT -q "drop table test"
+}
+
+$CH_CLIENT -q "drop table if exists test;"
+
+echo "MergeTree compact + horizontal merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
+
+echo "MergeTree wide + horizontal merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
+
+echo "MergeTree compact + vertical merge"
+test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
+
+echo "MergeTree wide + vertical merge"
+test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"

From 09750cb83b0ed72c5527aaf6ab9211203aa6b7f8 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 24 May 2024 13:14:02 +0000
Subject: [PATCH 356/392] Delete old tests

---
 ...9_dynamic_all_merge_algorithms_1.reference | 88 -------------------
 .../03039_dynamic_all_merge_algorithms_1.sh   | 65 --------------
 ...9_dynamic_all_merge_algorithms_2.reference | 44 ----------
 .../03039_dynamic_all_merge_algorithms_2.sh   | 50 -----------
 4 files changed, 247 deletions(-)
 delete mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
 delete mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
 delete mode 100644 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
 delete mode 100755 tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh

diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
deleted file mode 100644
index 6c69b81c183..00000000000
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.reference
+++ /dev/null
@@ -1,88 +0,0 @@
-MergeTree compact + horizontal merge
-ReplacingMergeTree
-100000	String
-100000	UInt64
-50000	UInt64
-100000	String
-SummingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-AggregatingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-MergeTree wide + horizontal merge
-ReplacingMergeTree
-100000	String
-100000	UInt64
-50000	UInt64
-100000	String
-SummingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-AggregatingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-MergeTree compact + vertical merge
-ReplacingMergeTree
-100000	String
-100000	UInt64
-50000	UInt64
-100000	String
-SummingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-AggregatingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-MergeTree wide + vertical merge
-ReplacingMergeTree
-100000	String
-100000	UInt64
-50000	UInt64
-100000	String
-SummingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
-AggregatingMergeTree
-100000	String
-100000	UInt64
-200000	1
-50000	String
-100000	UInt64
-100000	1
-50000	2
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
deleted file mode 100755
index 9cfd2294c8d..00000000000
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_1.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env bash
-# Tags: long
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# reset --log_comment
-CLICKHOUSE_LOG_COMMENT=
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --optimize_aggregation_in_order 0 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
-
-
-function test()
-{
-    echo "ReplacingMergeTree"
-    $CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=ReplacingMergeTree order by id settings $1;"
-    $CH_CLIENT -q "system stop merges test"
-    $CH_CLIENT -q "insert into test select number, number from numbers(100000)"
-    $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(50000, 100000)"
-
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "drop table test"
-
-    echo "SummingMergeTree"
-    $CH_CLIENT -q "create table test (id UInt64, sum UInt64, d Dynamic) engine=SummingMergeTree(sum) order by id settings $1;"
-    $CH_CLIENT -q "system stop merges test"
-    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
-    $CH_CLIENT -q "insert into test select number, 1, 'str_' || toString(number) from numbers(50000, 100000)"
-
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
-    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from test group by sum order by sum, count()"
-    $CH_CLIENT -q "drop table test"
-
-    echo "AggregatingMergeTree"
-    $CH_CLIENT -q "create table test (id UInt64, sum AggregateFunction(sum, UInt64), d Dynamic) engine=AggregatingMergeTree() order by id settings $1;"
-    $CH_CLIENT -q "system stop merges test"
-    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), number from numbers(100000) group by number"
-    $CH_CLIENT -q "insert into test select number, sumState(1::UInt64), 'str_' || toString(number) from numbers(50000, 100000) group by number"
-
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
-    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "select count(), sum from (select sumMerge(sum) as sum from test group by id, _part) group by sum order by sum, count()"
-    $CH_CLIENT -q "drop table test"
-}
-
-$CH_CLIENT -q "drop table if exists test;"
-
-echo "MergeTree compact + horizontal merge"
-test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=10000000000, vertical_merge_algorithm_min_columns_to_activate=100000000000"
-
-echo "MergeTree wide + horizontal merge"
-test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1,vertical_merge_algorithm_min_rows_to_activate=1000000000, vertical_merge_algorithm_min_columns_to_activate=1000000000000"
-
-echo "MergeTree compact + vertical merge"
-test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
-
-echo "MergeTree wide + vertical merge"
-test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
deleted file mode 100644
index af6c7d8d567..00000000000
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.reference
+++ /dev/null
@@ -1,44 +0,0 @@
-MergeTree compact + horizontal merge
-CollapsingMergeTree
-100000	String
-100000	UInt64
-50000	String
-50000	UInt64
-VersionedCollapsingMergeTree
-100000	String
-100000	UInt64
-75000	String
-75000	UInt64
-MergeTree wide + horizontal merge
-CollapsingMergeTree
-100000	String
-100000	UInt64
-50000	String
-50000	UInt64
-VersionedCollapsingMergeTree
-100000	String
-100000	UInt64
-75000	String
-75000	UInt64
-MergeTree compact + vertical merge
-CollapsingMergeTree
-100000	String
-100000	UInt64
-50000	String
-50000	UInt64
-VersionedCollapsingMergeTree
-100000	String
-100000	UInt64
-75000	String
-75000	UInt64
-MergeTree wide + vertical merge
-CollapsingMergeTree
-100000	String
-100000	UInt64
-50000	String
-50000	UInt64
-VersionedCollapsingMergeTree
-100000	String
-100000	UInt64
-75000	String
-75000	UInt64
diff --git a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh b/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
deleted file mode 100755
index 02362012960..00000000000
--- a/tests/queries/0_stateless/03039_dynamic_all_merge_algorithms_2.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env bash
-# Tags: long
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# reset --log_comment
-CLICKHOUSE_LOG_COMMENT=
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1  --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
-
-
-function test()
-{
-    echo "CollapsingMergeTree"
-    $CH_CLIENT -q "create table test (id UInt64, sign Int8, d Dynamic) engine=CollapsingMergeTree(sign) order by id settings $1;"
-    $CH_CLIENT -q "system stop merges test"
-    $CH_CLIENT -q "insert into test select number, 1, number from numbers(100000)"
-    $CH_CLIENT -q "insert into test select number, -1, 'str_' || toString(number) from numbers(50000, 100000)"
-
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "drop table test"
-
-    echo "VersionedCollapsingMergeTree"
-    $CH_CLIENT -q "create table test (id UInt64, sign Int8, version UInt8, d Dynamic) engine=VersionedCollapsingMergeTree(sign, version) order by id settings $1;"
-    $CH_CLIENT -q "system stop merges test"
-    $CH_CLIENT -q "insert into test select number, 1, 1, number from numbers(100000)"
-    $CH_CLIENT -q "insert into test select number, -1, number >= 75000 ? 2 : 1, 'str_' || toString(number) from numbers(50000, 100000)"
-
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -nm -q "system start merges test; optimize table test final"
-    $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)"
-    $CH_CLIENT -q "drop table test"
-}
-
-$CH_CLIENT -q "drop table if exists test;"
-
-echo "MergeTree compact + horizontal merge"
-test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000"
-
-echo "MergeTree wide + horizontal merge"
-test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1"
-
-echo "MergeTree compact + vertical merge"
-test "min_rows_for_wide_part=100000000000, min_bytes_for_wide_part=1000000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"
-
-echo "MergeTree wide + vertical merge"
-test "min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1"

From cb37b098ef23b0575b987edf35db2276bdb02a69 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 16:17:25 +0200
Subject: [PATCH 357/392] CI: add secrets to reusable stage wf yml

---
 .github/workflows/reusable_test_stage.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/reusable_test_stage.yml b/.github/workflows/reusable_test_stage.yml
index d7bd55fab43..8926b43d372 100644
--- a/.github/workflows/reusable_test_stage.yml
+++ b/.github/workflows/reusable_test_stage.yml
@@ -10,6 +10,10 @@ name: StageWF
         description: ci data
         type: string
         required: true
+    secrets:
+      secret_envs:
+        description: if given, it's passed to the environments
+        required: false
 
 jobs:
   s:
@@ -23,3 +27,5 @@ jobs:
       test_name: ${{ matrix.job_name_and_runner_type.job_name }}
       runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
       data: ${{ inputs.data }}
+    secrets:
+      secret_envs: ${{ secrets.secret_envs }}

From 4fba9a5c3c3e79bc4b0174410057206b266eb052 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 May 2024 14:35:45 +0000
Subject: [PATCH 358/392] Cleanup.

---
 src/Analyzer/ArrayJoinNode.cpp                | 16 ----
 src/Analyzer/ColumnNode.cpp                   |  7 +-
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 88 +------------------
 src/Analyzer/QueryTreeBuilder.cpp             |  4 +-
 src/Analyzer/createUniqueTableAliases.cpp     | 31 -------
 src/Parsers/ASTTablesInSelectQuery.cpp        |  9 --
 src/Parsers/ASTTablesInSelectQuery.h          |  4 -
 src/Parsers/ParserTablesInSelectQuery.cpp     |  4 -
 .../QueryPlan/DistributedCreateLocalPlan.cpp  |  8 --
 9 files changed, 6 insertions(+), 165 deletions(-)

diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp
index 37c198f8472..27d7229d46a 100644
--- a/src/Analyzer/ArrayJoinNode.cpp
+++ b/src/Analyzer/ArrayJoinNode.cpp
@@ -55,8 +55,6 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
     auto array_join_ast = std::make_shared<ASTArrayJoin>();
     array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
 
-    // array_join_ast->setAlias(getAlias());
-
     auto array_join_expressions_ast = std::make_shared<ASTExpressionList>();
     const auto & array_join_expressions = getJoinExpressions().getNodes();
 
@@ -70,21 +68,7 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
         else
             array_join_expression_ast = array_join_expression->toAST(options);
 
-        // QueryTreeNodePtr column_source;
-        // if (column_node)
-        //     column_source = column_node->getColumnSourceOrNull();
-
-        // if (column_source && column_source->hasAlias())
-        // {
-        //     const auto & column_alias = column_node->getAlias();
-        //     const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias;
-
-        //     if (!name_or_alias.starts_with("__"))
-        //         array_join_expression_ast->setAlias(fmt::format("{}.{}", column_source->getAlias(), name_or_alias));
-        // }
-        // else
         array_join_expression_ast->setAlias(array_join_expression->getAlias());
-
         array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
     }
 
diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp
index d12eac68ab4..2b514a85121 100644
--- a/src/Analyzer/ColumnNode.cpp
+++ b/src/Analyzer/ColumnNode.cpp
@@ -103,15 +103,10 @@ ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
     if (column_source && options.fully_qualified_identifiers)
     {
         auto node_type = column_source->getNodeType();
-
-        // if (node_type == QueryTreeNodeType::ARRAY_JOIN && column_source->hasAlias())
-        //      return std::make_shared<ASTIdentifier>(std::string(fmt::format("{}.{}", column_source->getAlias(), column.name)));
-
         if (node_type == QueryTreeNodeType::TABLE ||
             node_type == QueryTreeNodeType::TABLE_FUNCTION ||
             node_type == QueryTreeNodeType::QUERY ||
-            node_type == QueryTreeNodeType::UNION)// ||
-            //node_type == QueryTreeNodeType::ARRAY_JOIN)
+            node_type == QueryTreeNodeType::UNION)
         {
             if (column_source->hasAlias())
             {
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 871c3842de0..a5992148b14 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -607,6 +607,8 @@ struct ScopeAliases
     std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
     std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
 
+    /// Names which are aliases from ARRAY JOIN.
+    /// This is needed to properly qualify columns from matchers and avoid name collision.
     std::unordered_set<std::string> array_join_aliases;
 
     std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
@@ -1070,25 +1072,10 @@ public:
     void visitImpl(QueryTreeNodePtr & node)
     {
         updateAliasesIfNeeded(node, false /*is_lambda_node*/);
-
-        // if (auto * array_join_node = node->as<ArrayJoinNode>())
-        // {
-        //     for (const auto & elem : array_join_node->getJoinExpressions())
-        //     {
-        //         for (auto & child : elem->getChildren())
-        //         {
-        //             // std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl;
-        //             visit(child);
-        //         }
-        //     }
-        // }
     }
 
     bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child)
     {
-        // if (parent->getNodeType() == QueryTreeNodeType::ARRAY_JOIN)
-        //     return false;
-
         if (auto * lambda_node = child->as<LambdaNode>())
         {
             updateAliasesIfNeeded(child, true /*is_lambda_node*/);
@@ -1131,8 +1118,6 @@ private:
         if (node->getNodeType() == QueryTreeNodeType::WINDOW)
             return;
 
-        // std::cerr << ">>>>>>>>>> " << node->dumpTree() << std::endl;
-
         const auto & alias = node->getAlias();
 
         if (is_lambda_node)
@@ -2926,7 +2911,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     IdentifierResolveSettings identifier_resolve_settings)
 {
     const auto & identifier_bind_part = identifier_lookup.identifier.front();
-    // std::cerr << "tryResolveIdentifierFromAliases " << identifier_lookup.dump() << std::endl;
 
     auto * it = scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME);
     if (it == nullptr)
@@ -2955,7 +2939,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
     }
 
     auto node_type = alias_node->getNodeType();
-    // std::cerr << "tryResolveIdentifierFromAliases 1.5 \n" << alias_node->dumpTree() << std::endl;
 
     /// Resolve expression if necessary
     if (node_type == QueryTreeNodeType::IDENTIFIER)
@@ -2964,7 +2947,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
 
         auto & alias_identifier_node = alias_node->as<IdentifierNode &>();
         auto identifier = alias_identifier_node.getIdentifier();
-        // std::cerr << "tryResolveIdentifierFromAliases 2 " << identifier.getFullName() << std::endl;
         auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
         if (!lookup_result.resolved_identifier)
         {
@@ -3141,7 +3123,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
     size_t identifier_column_qualifier_parts,
     bool can_be_not_found)
 {
-    // std::cerr << "tryResolveIdentifierFromStorage " << identifier.getFullName() << std::endl;
     auto identifier_without_column_qualifier = identifier;
     identifier_without_column_qualifier.popFirst(identifier_column_qualifier_parts);
 
@@ -3284,7 +3265,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromStorage(
     {
         auto qualified_identifier_with_removed_part = qualified_identifier;
         qualified_identifier_with_removed_part.popFirst();
-        // std::cerr << "tryResolveIdentifierFromStorage qualified_identifier_with_removed_part" << qualified_identifier_with_removed_part.getFullName() << std::endl;
 
         if (qualified_identifier_with_removed_part.empty())
             break;
@@ -3818,8 +3798,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveExpressionFromArrayJoinExpressions(con
     const QueryTreeNodePtr & table_expression_node,
     IdentifierResolveScope & scope)
 {
-    // std::cerr << "tryResolveExpressionFromArrayJoinExpressions " << scope.dump() << std::endl;
-
     const auto & array_join_node = table_expression_node->as<const ArrayJoinNode &>();
     const auto & array_join_column_expressions_list = array_join_node.getJoinExpressions();
     const auto & array_join_column_expressions_nodes = array_join_column_expressions_list.getNodes();
@@ -3897,14 +3875,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     const QueryTreeNodePtr & table_expression_node,
     IdentifierResolveScope & scope)
 {
-    // std::cerr << "tryResolveIdentifierFromArrayJoin " << identifier_lookup.identifier.getFullName() << std::endl;
-
     const auto & from_array_join_node = table_expression_node->as<const ArrayJoinNode &>();
     auto resolved_identifier = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, from_array_join_node.getTableExpression(), scope);
 
-    // std::cerr << "tryResolveIdentifierFromArrayJoin 2 " << scope.table_expressions_in_resolve_process.contains(table_expression_node.get())
-    //  << ' ' << identifier_lookup.dump()  << ' ' << (resolved_identifier ? resolved_identifier->dumpTree() : "not resolved ") << std::endl;
-
     if (scope.table_expressions_in_resolve_process.contains(table_expression_node.get()) || !identifier_lookup.isExpressionLookup())
         return resolved_identifier;
 
@@ -3919,8 +3892,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
     for (const auto & array_join_column_expression : array_join_column_expressions_nodes)
     {
         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
-        // std::cerr << "========== " << array_join_column_expression->dumpTree() << std::endl;
-        // std::cerr << "========== " << identifier_lookup.identifier.getFullName() << ' ' << from_array_join_node.getAlias() << ' ' << array_join_column_expression_typed.getAlias() << std::endl;
 
         IdentifierView identifier_view(identifier_lookup.identifier);
 
@@ -3955,15 +3926,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
 
         if (compound_expr)
             return compound_expr;
-
-        // const auto & parts = identifier_lookup.identifier.getParts();
-        // if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName() ||
-        //     (parts.size() == 2 && parts.front() == from_array_join_node.getAlias() && parts.back() == array_join_column_expression_typed.getAlias()))
-        // {
-        //     auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
-        //         array_join_column_expression_typed.getColumnSource());
-        //     return array_join_column;
-        // }
     }
 
     if (!resolved_identifier)
@@ -3980,8 +3942,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTreeNode(const Ident
     const QueryTreeNodePtr & join_tree_node,
     IdentifierResolveScope & scope)
 {
-    // std::cerr << "tryResolveIdentifierFromJoinTreeNode " << identifier_lookup.identifier.getFullName() << std::endl;
-
     auto join_tree_node_type = join_tree_node->getNodeType();
 
     switch (join_tree_node_type)
@@ -4185,8 +4145,6 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifier(const IdentifierLook
     IdentifierResolveScope & scope,
     IdentifierResolveSettings identifier_resolve_settings)
 {
-    // std::cerr << "tryResolveIdentifier " << identifier_lookup.identifier.getFullName() << std::endl;
-
     auto it = scope.identifier_lookup_to_resolve_state.find(identifier_lookup);
     if (it != scope.identifier_lookup_to_resolve_state.end())
     {
@@ -6363,8 +6321,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
 {
     checkStackSize();
 
-    // std::cerr << "resolveExpressionNode  " << ignore_alias << "\n" << node->dumpTree() << std::endl;
-
     auto resolved_expression_it = resolved_expressions.find(node);
     if (resolved_expression_it != resolved_expressions.end())
     {
@@ -6381,7 +6337,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             evaluateScalarSubqueryIfNeeded(node, subquery_scope);
         }
 
-        // std::cerr << "resolveExpressionNode taken from cache \n" << node->dumpTree() << "\n PN " << (resolved_expression_it->second.empty() ? "" : resolved_expression_it->second.front()) << std::endl;
         return resolved_expression_it->second;
     }
 
@@ -6392,10 +6347,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
     {
         auto projection_name_it = node_to_projection_name.find(node);
         if (projection_name_it != node_to_projection_name.end())
-        {
-            // std::cerr << "resolveExpressionNode taken projection name from map : " << projection_name_it->second << " for \n" << node->dumpTree() << std::endl;
             result_projection_names.push_back(projection_name_it->second);
-        }
     }
     else
     {
@@ -7651,36 +7603,25 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
     for (auto & array_join_expression : array_join_nodes)
     {
         auto array_join_expression_alias = array_join_expression->getAlias();
-        // if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias))
-        //     throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
-        //         "ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
-        //         array_join_expression->formatASTForErrorMessage(),
-        //         array_join_expression_alias,
-        //         scope.scope_node->formatASTForErrorMessage());
-
-        /// Add array join expression into scope
 
         for (const auto & elem : array_join_nodes)
         {
             if (elem->hasAlias())
                 scope.aliases.array_join_aliases.insert(elem->getAlias());
+
             for (auto & child : elem->getChildren())
             {
-                //std::cerr << "<<<<<<<<<< " << child->dumpTree() << std::endl;
                 if (child)
                     expressions_visitor.visit(child);
-                //visit(child);
             }
         }
 
-        // expressions_visitor.visit(array_join_expression);
-
         std::string identifier_full_name;
 
         if (auto * identifier_node = array_join_expression->as<IdentifierNode>())
             identifier_full_name = identifier_node->getIdentifier().getFullName();
 
-        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true);
+        resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true /*ignore_alias*/);
 
         auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
         {
@@ -7747,27 +7688,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
         }
     }
 
-    /** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because
-      * during resolution of ARRAY JOIN expression list we must use column type before ARRAY JOIN.
-      *
-      * Example: SELECT id, value_element FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value
-      * It is expected that `value_element AS value` expression inside ARRAY JOIN expression list will be
-      * resolved as `value_element` expression with type before ARRAY JOIN.
-      * And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression
-      * with type after ARRAY JOIN.
-      */
     array_join_nodes = std::move(array_join_column_expressions);
-    // for (auto & array_join_column_expression : array_join_nodes)
-    // {
-    //     auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
-    //     if (it != scope.aliases.alias_name_to_expression_node->end())
-    //     {
-    //         auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
-    //         auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
-    //             array_join_column_expression_typed.getColumnSource());
-    //         it->second = std::move(array_join_column);
-    //     }
-    // }
 }
 
 void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope)
@@ -8552,7 +8473,6 @@ QueryAnalysisPass::QueryAnalysisPass(bool only_analyze_) : only_analyze(only_ana
 
 void QueryAnalysisPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context)
 {
-    // std::cerr << ".... qap\n" << query_tree_node->dumpTree() << std::endl;
     QueryAnalyzer analyzer(only_analyze);
     analyzer.resolve(query_tree_node, table_expression, context);
     createUniqueTableAliases(query_tree_node, table_expression, context);
diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp
index 02d742f5e49..6a5db4bc1de 100644
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@@ -957,7 +957,6 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
 
             auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context);
             auto array_join_node = std::make_shared<ArrayJoinNode>(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join);
-            // array_join_node->setAlias(array_join_expression.tryGetAlias());
 
             /** Original AST is not set because it will contain only array join part and does
               * not include left table expression.
@@ -1046,8 +1045,7 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr &
 QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context)
 {
     QueryTreeBuilder builder(std::move(query), context);
-    auto qt = builder.getQueryTreeNode();
-    return qt;
+    return builder.getQueryTreeNode();
 }
 
 }
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index 30b8c0a433b..b36ba1cafaa 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -61,37 +61,6 @@ public:
                     node->setAlias(alias);
                 }
 
-                if (auto * array_join = node->as<ArrayJoinNode>())
-                {
-                    //size_t counter = 0;
-                    for (auto & column : array_join->getJoinExpressions())
-                    {
-                        if (auto * column_node = column->as<ColumnNode>())
-                        {
-                            if (!column_node->hasAlias())
-                                column_node->setAlias(column_node->getColumnName());
-                        }
-                    }
-                }
-
-                // if (auto * array_join = node->as<ArrayJoinNode>())
-                // {
-                //     for (auto & column : array_join->getJoinExpressions())
-                //     {
-                //         if (auto * column_node = column->as<ColumnNode>())
-                //         {
-                //             const auto & column_alias = column_node->getAlias();
-                //             const auto & name_or_alias = column_alias.empty() ? column_node->getColumnName() : column_alias;
-
-                //             if (!name_or_alias.starts_with("__"))
-                //             {
-
-                //                 column_node->setAlias(fmt::format("{}.{}", alias, name_or_alias));
-                //             }
-                //         }
-                //     }
-                // }
-
                 break;
             }
             default:
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index b4058a0950d..e782bad797e 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -247,12 +247,6 @@ void ASTTableJoin::formatImpl(const FormatSettings & settings, FormatState & sta
     formatImplAfterTable(settings, state, frame);
 }
 
-// static void writeAlias(const String & name, const ASTWithAlias::FormatSettings & settings)
-// {
-//     settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_alias : "");
-//     settings.writeIdentifier(name);
-//     settings.ostr << (settings.hilite ? IAST::hilite_none : "");
-// }
 
 void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const
 {
@@ -264,9 +258,6 @@ void ASTArrayJoin::formatImpl(const FormatSettings & settings, FormatState & sta
         << indent_str
         << (kind == Kind::Left ? "LEFT " : "") << "ARRAY JOIN" << (settings.hilite ? hilite_none : "");
 
-    // if (!alias.empty())
-    //     writeAlias(alias, settings);
-
     settings.one_line
         ? expression_list->formatImpl(settings, state, frame)
         : expression_list->as<ASTExpressionList &>().formatImplMultiline(settings, state, frame);
diff --git a/src/Parsers/ASTTablesInSelectQuery.h b/src/Parsers/ASTTablesInSelectQuery.h
index 212436b0d9e..f3f329ca2b6 100644
--- a/src/Parsers/ASTTablesInSelectQuery.h
+++ b/src/Parsers/ASTTablesInSelectQuery.h
@@ -95,10 +95,6 @@ struct ASTArrayJoin : public IAST
 
     /// List of array or nested names to JOIN, possible with aliases.
     ASTPtr expression_list;
-    // String alias;
-
-    // String tryGetAlias() const override { return alias; }
-    // void setAlias(const String & to) override { alias = to; }
 
     using IAST::IAST;
     String getID(char) const override { return "ArrayJoin"; }
diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp
index c96b6c1584d..b4d48ae67e9 100644
--- a/src/Parsers/ParserTablesInSelectQuery.cpp
+++ b/src/Parsers/ParserTablesInSelectQuery.cpp
@@ -98,10 +98,6 @@ bool ParserArrayJoin::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     if (!has_array_join)
         return false;
 
-    // ASTPtr alias_node;
-    // if (ParserAlias(false).parse(pos, alias_node, expected))
-    //     tryGetIdentifierNameInto(alias_node, res->alias);
-
     if (!ParserExpressionList(false).parse(pos, res->expression_list, expected))
         return false;
 
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index e4d908e2af0..d4545482477 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -2,7 +2,6 @@
 
 #include <Common/config_version.h>
 #include <Common/checkStackSize.h>
-#include "Parsers/queryToString.h"
 #include <Core/ProtocolDefines.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@@ -69,19 +68,12 @@ std::unique_ptr<QueryPlan> createLocalPlan(
 
     if (context->getSettingsRef().allow_experimental_analyzer)
     {
-        // std::cerr << query_ast->dumpTree() << std::endl;
-        // std::cerr << queryToString(query_ast) << std::endl;
-
         /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to
         /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace
         /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289).
         new_context->setSetting("enable_positional_arguments", Field(false));
         auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options);
-        // std::cerr << interpreter.getQueryTree()->dumpTree() << std::endl;
         query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
-        // WriteBufferFromOwnString buf;
-        // query_plan->explainPlan(buf, {.header=true, .actions=true});
-        // std::cerr << buf.str() << std::endl;
     }
     else
     {

From dff7a2f1f6bab1a49669a06f95990d34e71c2cf6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 May 2024 14:37:33 +0000
Subject: [PATCH 359/392] Cleanup.

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 --
 src/Analyzer/createUniqueTableAliases.cpp | 3 ---
 2 files changed, 5 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index a5992148b14..3fca66e6eb8 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -3995,8 +3995,6 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoinTree(const Identifie
     if (identifier_lookup.isFunctionLookup())
         return {};
 
-    // std::cerr << "tryResolveIdentifierFromJoinTree " << identifier_lookup.identifier.getFullName() << std::endl;
-
     /// Try to resolve identifier from table columns
     if (auto resolved_identifier = tryResolveIdentifierFromTableColumns(identifier_lookup, scope))
         return resolved_identifier;
diff --git a/src/Analyzer/createUniqueTableAliases.cpp b/src/Analyzer/createUniqueTableAliases.cpp
index b36ba1cafaa..8f850fe8dec 100644
--- a/src/Analyzer/createUniqueTableAliases.cpp
+++ b/src/Analyzer/createUniqueTableAliases.cpp
@@ -1,8 +1,6 @@
 #include <memory>
 #include <unordered_map>
 #include <Analyzer/createUniqueTableAliases.h>
-#include <Analyzer/ArrayJoinNode.h>
-#include <Analyzer/ColumnNode.h>
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/IQueryTreeNode.h>
@@ -60,7 +58,6 @@ public:
                     alias = fmt::format("__table{}", ++next_id);
                     node->setAlias(alias);
                 }
-
                 break;
             }
             default:

From b254be618087e8f949f420406e791b24d11c960a Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 16:57:08 +0200
Subject: [PATCH 360/392] CI: add secrets to reusable build stage wf yml

---
 .github/workflows/reusable_build_stage.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/reusable_build_stage.yml b/.github/workflows/reusable_build_stage.yml
index 4463645880b..a8e84819c95 100644
--- a/.github/workflows/reusable_build_stage.yml
+++ b/.github/workflows/reusable_build_stage.yml
@@ -13,6 +13,10 @@ name: BuildStageWF
         description: ci data
         type: string
         required: true
+    secrets:
+      secret_envs:
+        description: if given, it's passed to the environments
+        required: false
 
 jobs:
   s:
@@ -30,3 +34,5 @@ jobs:
       # for now let's do I deep checkout for builds
       checkout_depth: 0
       data: ${{ inputs.data }}
+    secrets:
+      secret_envs: ${{ secrets.secret_envs }}

From 4982d7c85cc7a71ddef773cd57df540e7b8cd33a Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 16:59:47 +0200
Subject: [PATCH 361/392] fix for mark release ready

---
 .github/workflows/master.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 7c55098bdfd..c2a893a8e99 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -136,7 +136,7 @@ jobs:
 
   MarkReleaseReady:
     if: ${{ !failure() && !cancelled() }}
-    needs: [RunConfig, Builds_1]
+    needs: [RunConfig, Builds_1, Builds_2]
     runs-on: [self-hosted, style-checker-aarch64]
     steps:
       - name: Debug

From b3f836fbb1b451c08d57f4956c0a9c5137fe5ede Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:08:30 +0200
Subject: [PATCH 362/392] Run 03147_system_columns_access_checks only on
 release

---
 tests/queries/0_stateless/03147_system_columns_access_checks.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03147_system_columns_access_checks.sh b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
index 2bd7fb083ea..b027ea28504 100755
--- a/tests/queries/0_stateless/03147_system_columns_access_checks.sh
+++ b/tests/queries/0_stateless/03147_system_columns_access_checks.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: no-fasttest, no-parallel, no-ordinary-database, long
+# Tags: no-fasttest, no-parallel, no-ordinary-database, long, no-debug, no-asan, no-tsan, no-msan
 
 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh

From d5b763d03d581b70b1243ab589223d85d231fe89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:21:50 +0200
Subject: [PATCH 363/392] Limit max time for 01442_merge_detach_attach_long

---
 .../01442_merge_detach_attach_long.sh         | 26 +++++++++++++------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
index acb2550d48c..e7c20158b5d 100755
--- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
+++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-parallel, no-debug
+# Tags: long, no-parallel
 
 set -e
 
@@ -11,14 +11,24 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS t"
 ${CLICKHOUSE_CLIENT} --query="CREATE TABLE t (x Int8) ENGINE = MergeTree ORDER BY tuple()"
 
-for _ in {1..100}; do
-    ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)"
-    ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)"
-    ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null &
-    ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()"
-    ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0"
-done
+function thread_ops()
+{
+    local TIMELIMIT=$((SECONDS+$1))
+    local it=0
+    while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 100 ];
+    do
+        it=$((it+1))
+        ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)"
+        ${CLICKHOUSE_CLIENT} --query="INSERT INTO t VALUES (0)"
+        ${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE t FINAL" 2>/dev/null &
+        ${CLICKHOUSE_CLIENT} --query="ALTER TABLE t DETACH PARTITION tuple()"
+        ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM t HAVING count() > 0"
+    done
+}
+export -f thread_ops
 
+TIMEOUT=60
+thread_ops $TIMEOUT &
 wait
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE t"

From bd415cc83192a734dccb00bd004775e46bd74b7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:27:47 +0200
Subject: [PATCH 364/392] Reduce 02228_merge_tree_insert_memory_usage
 partitions

---
 .../02228_merge_tree_insert_memory_usage.sql       | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql
index 8924627a717..26a201ec89f 100644
--- a/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql
+++ b/tests/queries/0_stateless/02228_merge_tree_insert_memory_usage.sql
@@ -1,16 +1,16 @@
 -- Tags: long, no-parallel
-SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 1024 partitions, so disable fault injections
+SET insert_keeper_fault_injection_probability=0; -- to succeed this test can require too many retries due to 100 partitions, so disable fault injections
 
 -- regression for MEMORY_LIMIT_EXCEEDED error because of deferred final part flush
 
 drop table if exists data_02228;
-create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 1024;
-insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0;
-insert into data_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED }
+create table data_02228 (key1 UInt32, sign Int8, s UInt64) engine = CollapsingMergeTree(sign) order by (key1) partition by key1 % 100;
+insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0;
+insert into data_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED }
 drop table data_02228;
 
 drop table if exists data_rep_02228 SYNC;
-create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 1024;
-insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0;
-insert into data_rep_02228 select number, 1, number from numbers_mt(100e3) settings max_memory_usage='300Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=10000000; -- { serverError MEMORY_LIMIT_EXCEEDED }
+create table data_rep_02228 (key1 UInt32, sign Int8, s UInt64) engine = ReplicatedCollapsingMergeTree('/clickhouse/{database}', 'r1', sign) order by (key1) partition by key1 % 100;
+insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=0;
+insert into data_rep_02228 select number, 1, number from numbers_mt(10_000) settings max_memory_usage='30Mi', max_partitions_per_insert_block=1024, max_insert_delayed_streams_for_parallel_write=1000000; -- { serverError MEMORY_LIMIT_EXCEEDED }
 drop table data_rep_02228 SYNC;

From b396e63ea5721f72e0a1efb15e1c108c93dfad2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:30:26 +0200
Subject: [PATCH 365/392] Reduce sizes in 02735_parquet_encoder

---
 tests/queries/0_stateless/02735_parquet_encoder.sql | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql
index fe45a2a317d..9320d0e57c3 100644
--- a/tests/queries/0_stateless/02735_parquet_encoder.sql
+++ b/tests/queries/0_stateless/02735_parquet_encoder.sql
@@ -41,7 +41,7 @@ create temporary table basic_types_02735 as select * from generateRandom('
     decimal128 Decimal128(20),
     decimal256 Decimal256(40),
     ipv4 IPv4,
-    ipv6 IPv6') limit 10101;
+    ipv6 IPv6') limit 1011;
 insert into function file(basic_types_02735.parquet) select * from basic_types_02735;
 desc file(basic_types_02735.parquet);
 select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet));
@@ -59,7 +59,7 @@ create temporary table nullables_02735 as select * from generateRandom('
     fstr Nullable(FixedString(12)),
     i256 Nullable(Int256),
     decimal256 Nullable(Decimal256(40)),
-    ipv6 Nullable(IPv6)') limit 10000;
+    ipv6 Nullable(IPv6)') limit 1000;
 insert into function file(nullables_02735.parquet) select * from nullables_02735;
 select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet));
 drop table nullables_02735;
@@ -83,7 +83,7 @@ create table arrays_02735 engine = Memory as select * from generateRandom('
     decimal64 Array(Decimal64(10)),
     ipv4 Array(IPv4),
     msi Map(String, Int16),
-    tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000;
+    tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 1000;
 insert into function file(arrays_02735.parquet) select * from arrays_02735;
 create temporary table arrays_out_02735 as arrays_02735;
 insert into arrays_out_02735 select * from file(arrays_02735.parquet);
@@ -107,7 +107,7 @@ create temporary table madness_02735 as select * from generateRandom('
     mln Map(LowCardinality(String), Nullable(Int8)),
     t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)),
     n Nested(hello UInt64, world Tuple(first String, second FixedString(1)))
-    ') limit 10000;
+    ') limit 1000;
 insert into function file(madness_02735.parquet) select * from madness_02735;
 insert into function file(a.csv) select * from madness_02735 order by tuple(*);
 insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world);

From 24797a093a216479d70b2b0e065d9f3850d484bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:31:39 +0200
Subject: [PATCH 366/392] Remove 02344_insert_profile_events_stress from
 sanitizer run as it's too slow

---
 .../queries/0_stateless/02344_insert_profile_events_stress.sql  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
index f9fdd3b943f..e9a790bea5d 100644
--- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
+++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql
@@ -1,4 +1,4 @@
--- Tags: no-parallel, long, no-debug, no-tsan
+-- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan
 
 create table data_02344 (key Int) engine=Null;
 -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung.

From 049ca7c71e5c3543e4a63d22f075de2ff96373c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:34:48 +0200
Subject: [PATCH 367/392] Reduce
 01396_inactive_replica_cleanup_nodes_zookeeper!

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index 67a2a70b509..11102b128b2 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: replica, no-debug, no-parallel
+# Tags: replica, no-parallel
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
@@ -11,7 +11,7 @@ REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')")
 # Check that if we have one inactive replica and a huge number of INSERTs to active replicas,
 # the number of nodes in ZooKeeper does not grow unbounded.
 
-SCALE=5000
+SCALE=1000
 
 $CLICKHOUSE_CLIENT -n --query "
     DROP TABLE IF EXISTS r1;

From 7f9734d0cc9dc270ea129b75881234ace3cdf1fa Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Fri, 24 May 2024 15:38:21 +0000
Subject: [PATCH 368/392] Fix Logical error: Bad cast for Buffer table with
 prewhere.

---
 src/Storages/StorageBuffer.cpp                              | 2 ++
 .../0_stateless/00910_buffer_prewhere_different_types.sql   | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index d9a0b2b4d59..a3f6b6afc5d 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -302,6 +302,8 @@ void StorageBuffer::read(
                 auto src_table_query_info = query_info;
                 if (src_table_query_info.prewhere_info)
                 {
+                    src_table_query_info.prewhere_info = src_table_query_info.prewhere_info->clone();
+
                     auto actions_dag = ActionsDAG::makeConvertingActions(
                             header_after_adding_defaults.getColumnsWithTypeAndName(),
                             header.getColumnsWithTypeAndName(),
diff --git a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql
index 8f305914cb8..702d9bb3e6c 100644
--- a/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql
+++ b/tests/queries/0_stateless/00910_buffer_prewhere_different_types.sql
@@ -2,8 +2,14 @@ DROP TABLE IF EXISTS buffer_table1__fuzz_28;
 DROP TABLE IF EXISTS merge_tree_table1;
 
 CREATE TABLE merge_tree_table1 (`x` UInt32) ENGINE = MergeTree ORDER BY x;
+
+CREATE TABLE buffer_table1__fuzz_24 (`s` Nullable(Int128), `x` Nullable(FixedString(17))) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152);
+SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
+
 INSERT INTO merge_tree_table1 VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
 
+SELECT s FROM buffer_table1__fuzz_24 PREWHERE factorial(toNullable(10)); -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
+
 SET send_logs_level='error';
 
 CREATE TABLE buffer_table1__fuzz_28 (`x` Nullable(UInt32)) ENGINE = Buffer(currentDatabase(), 'merge_tree_table1', 16, 10, 60, 10, 1000, 1048576, 2097152);

From 2669df7296a1b362807693d0cc41833ecf80a148 Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Fri, 24 May 2024 17:30:36 +0200
Subject: [PATCH 369/392] add secrets to reusable build yml

---
 .github/workflows/reusable_build.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml
index 80d78d93e1b..5e254d785ec 100644
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@@ -33,6 +33,10 @@ name: Build ClickHouse
       additional_envs:
         description: additional ENV variables to setup the job
         type: string
+    secrets:
+      secret_envs:
+        description: if given, it's passed to the environments
+        required: false
 
 jobs:
   Build:
@@ -54,6 +58,7 @@ jobs:
         run: |
           cat >> "$GITHUB_ENV" << 'EOF'
           ${{inputs.additional_envs}}
+          ${{secrets.secret_envs}}
           DOCKER_TAG<<DOCKER_JSON
           ${{ toJson(fromJson(inputs.data).docker_data.images) }}
           DOCKER_JSON

From 4822a9e36df6156b1ee43bae2bee15016c8f764b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Fri, 24 May 2024 17:44:14 +0200
Subject: [PATCH 370/392] Restore tags

---
 .../01396_inactive_replica_cleanup_nodes_zookeeper.sh           | 2 +-
 tests/queries/0_stateless/01442_merge_detach_attach_long.sh     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index 11102b128b2..1c1eb4489ee 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: replica, no-parallel
+# Tags: replica, no-debug, no-parallel
 
 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
diff --git a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
index e7c20158b5d..85fdf7ed764 100755
--- a/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
+++ b/tests/queries/0_stateless/01442_merge_detach_attach_long.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-parallel
+# Tags: long, no-parallel, no-debug
 
 set -e
 

From 772d38a0c139ca5ee76bd7886d70db874db503c0 Mon Sep 17 00:00:00 2001
From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com>
Date: Fri, 24 May 2024 18:11:21 +0200
Subject: [PATCH 371/392] Update s3queue.md

---
 docs/en/engines/table-engines/integrations/s3queue.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md
index 8ebab80423f..aa7fa512480 100644
--- a/docs/en/engines/table-engines/integrations/s3queue.md
+++ b/docs/en/engines/table-engines/integrations/s3queue.md
@@ -202,8 +202,7 @@ Example:
   CREATE TABLE s3queue_engine_table (name String, value UInt32)
     ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip')
     SETTINGS
-        mode = 'unordered',
-        keeper_path = '/clickhouse/s3queue/';
+        mode = 'unordered';
 
   CREATE TABLE stats (name String, value UInt32)
     ENGINE = MergeTree() ORDER BY name;

From e59097274a72216e99dbec83cbbe4f5142463799 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 24 May 2024 13:56:16 -0300
Subject: [PATCH 372/392] test for #64211

---
 ...uted_merge_global_in_primary_key.reference | 19 +++++
 ...istributed_merge_global_in_primary_key.sql | 83 +++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference
 create mode 100644 tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql

diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference
new file mode 100644
index 00000000000..f572a3570f4
--- /dev/null
+++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.reference
@@ -0,0 +1,19 @@
+------------------- Distributed ------------------
+1
+---------- merge() over distributed --------------
+2
+---------- merge() over local --------------------
+1
+1
+1
+---------- remote() over Merge -------------------
+2
+---------- Distributed over Merge ----------------
+1
+---------- remote() over Merge -------------------
+2
+---------- Merge over Distributed -----------------
+1
+1
+1
+2
diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
new file mode 100644
index 00000000000..78176e346f4
--- /dev/null
+++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
@@ -0,0 +1,83 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/64211
+
+create database test;
+use test;
+
+CREATE TABLE test_local (name String) 
+ENGINE = MergeTree
+ORDER BY name as select 'x';
+
+CREATE TABLE test_distributed as test_local
+ENGINE = Distributed(default, currentDatabase(), test_local);
+
+CREATE TABLE test_merge as test_local
+ENGINE = Merge(currentDatabase(), 'test_local');
+
+CREATE TABLE test_merge_distributed as test_local
+ENGINE = Distributed(default, currentDatabase(), test_merge);
+
+CREATE TABLE test_distributed_merge as test_local
+ENGINE = Merge(currentDatabase(), 'test_distributed');
+
+SELECT '------------------- Distributed ------------------';
+SELECT count()
+FROM test_distributed
+WHERE name GLOBAL IN (SELECT name FROM test_distributed);
+
+SELECT '---------- merge() over distributed --------------';
+SELECT count()
+FROM merge(currentDatabase(), 'test_distributed')
+WHERE name GLOBAL IN (SELECT name FROM test_distributed);
+
+SELECT '---------- merge() over local --------------------';
+SELECT count()
+FROM merge(currentDatabase(), 'test_local')
+WHERE name GLOBAL IN (SELECT name FROM test_distributed);
+
+SELECT count()
+FROM merge(currentDatabase(), 'test_local')
+WHERE name GLOBAL IN (SELECT name FROM merge(currentDatabase(), 'test_local'));
+
+SELECT count()
+FROM merge(currentDatabase(), 'test_local')
+WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge));
+
+SELECT '---------- remote() over Merge -------------------';
+SELECT count()
+FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)
+WHERE name GLOBAL IN (SELECT name FROM test_distributed);
+
+SELECT '---------- Distributed over Merge ----------------';
+SELECT count()
+FROM test_merge_distributed
+WHERE name GLOBAL IN (SELECT name FROM test_merge_distributed);
+
+SELECT '---------- remote() over Merge -------------------';
+SELECT count()
+FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge)
+WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge));
+
+SELECT '---------- Merge over Distributed -----------------';
+SELECT count()
+FROM test_distributed_merge
+WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge));
+
+SELECT count()
+FROM test_distributed_merge
+WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge));
+
+SELECT count()
+FROM test_distributed_merge
+WHERE name GLOBAL IN (SELECT name FROM test_distributed_merge);
+
+SELECT count()
+FROM remote('127.0.0.{1,2}', currentDatabase(), test_distributed_merge)
+WHERE name GLOBAL IN (SELECT name FROM remote('127.0.0.{1,2}', currentDatabase(), test_merge));
+
+
+DROP TABLE test_merge;
+DROP TABLE test_merge_distributed;
+DROP TABLE test_distributed_merge;
+DROP TABLE test_distributed;
+DROP TABLE test_local;
+drop  database test;

From 9a917db4b3eade94941225b4a792f4d2331459ba Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 24 May 2024 14:27:26 -0300
Subject: [PATCH 373/392] Update
 01227_distributed_merge_global_in_primary_key.sql

---
 .../01227_distributed_merge_global_in_primary_key.sql         | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
index 78176e346f4..e73d07c193f 100644
--- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
+++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
@@ -1,8 +1,5 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/64211
 
-create database test;
-use test;
-
 CREATE TABLE test_local (name String) 
 ENGINE = MergeTree
 ORDER BY name as select 'x';
@@ -80,4 +77,3 @@ DROP TABLE test_merge_distributed;
 DROP TABLE test_distributed_merge;
 DROP TABLE test_distributed;
 DROP TABLE test_local;
-drop  database test;

From 91a84f8e17192a70b48d3152ad8b48107d60c117 Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 24 May 2024 15:03:45 -0300
Subject: [PATCH 374/392] Update
 01227_distributed_merge_global_in_primary_key.sql

---
 .../01227_distributed_merge_global_in_primary_key.sql         | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
index e73d07c193f..5cd4aaab1e6 100644
--- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
+++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
@@ -5,13 +5,13 @@ ENGINE = MergeTree
 ORDER BY name as select 'x';
 
 CREATE TABLE test_distributed as test_local
-ENGINE = Distributed(default, currentDatabase(), test_local);
+ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_local);
 
 CREATE TABLE test_merge as test_local
 ENGINE = Merge(currentDatabase(), 'test_local');
 
 CREATE TABLE test_merge_distributed as test_local
-ENGINE = Distributed(default, currentDatabase(), test_merge);
+ENGINE = Distributed(test_shard_localhost, currentDatabase(), test_merge);
 
 CREATE TABLE test_distributed_merge as test_local
 ENGINE = Merge(currentDatabase(), 'test_distributed');

From 3ed1ec2f63582819f005d591459f30cdbff0daff Mon Sep 17 00:00:00 2001
From: Denny Crane <denis.zhuravlov@gmail.com>
Date: Fri, 24 May 2024 23:54:56 -0300
Subject: [PATCH 375/392] Update
 tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql

Co-authored-by: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
---
 .../01227_distributed_merge_global_in_primary_key.sql       | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
index 5cd4aaab1e6..6b0dd4c8747 100644
--- a/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
+++ b/tests/queries/0_stateless/01227_distributed_merge_global_in_primary_key.sql
@@ -1,5 +1,9 @@
 -- https://github.com/ClickHouse/ClickHouse/issues/64211
-
+DROP TABLE IF EXISTS test_merge;
+DROP TABLE IF EXISTS test_merge_distributed;
+DROP TABLE IF EXISTS test_distributed_merge;
+DROP TABLE IF EXISTS test_distributed;
+DROP TABLE IF EXISTS test_local;
 CREATE TABLE test_local (name String) 
 ENGINE = MergeTree
 ORDER BY name as select 'x';

From 031591f3dd5ae155e3a8d8cf061e2956a29e6a4a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sat, 25 May 2024 15:48:45 +0200
Subject: [PATCH 376/392] Fix settings changes history

---
 src/Core/SettingsChangesHistory.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h
index a89516436e8..16f28d94640 100644
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@@ -85,6 +85,14 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"24.6", {{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
+              {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
+              {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
+              {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
+              {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
+              {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
+              {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
+              }},
     {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
               {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
               {"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},
@@ -93,13 +101,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
               {"cross_join_min_bytes_to_compress", 0, 1_GiB, "Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached."},
               {"http_max_chunk_size", 0, 0, "Internal limitation"},
               {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
-              {"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
-              {"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
-              {"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
-              {"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
-              {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
-              {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
-              {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
               {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
               {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
               {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},

From 142d67d1b298478a0df46b2585d4719a9ef55f4e Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Sun, 26 May 2024 11:16:48 +0200
Subject: [PATCH 377/392] Fix S3ObjectStorage::applyNewSettings

---
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 21 ++++++++-----------
 src/Disks/ObjectStorages/S3/S3ObjectStorage.h |  5 +----
 .../ObjectStorage/S3/Configuration.cpp        |  2 +-
 3 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index c07313b52db..69485bd4d01 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -575,24 +575,21 @@ void S3ObjectStorage::applyNewSettings(
     ContextPtr context,
     const ApplyNewSettingsOptions & options)
 {
-    auto new_s3_settings = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
-    if (!static_headers.empty())
-    {
-        new_s3_settings->auth_settings.headers.insert(
-            new_s3_settings->auth_settings.headers.end(),
-            static_headers.begin(), static_headers.end());
-    }
+    auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
+    auto modified_settings = std::make_unique<S3ObjectStorageSettings>(*s3_settings.get());
+    modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings);
 
     if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
-        new_s3_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
+        modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
 
-    auto current_s3_settings = s3_settings.get();
-    if (options.allow_client_change && (current_s3_settings->auth_settings.hasUpdates(new_s3_settings->auth_settings) || for_disk_s3))
+    auto current_settings = s3_settings.get();
+    if (options.allow_client_change
+        && (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3))
     {
-        auto new_client = getClient(config, config_prefix, context, *new_s3_settings, for_disk_s3, &uri);
+        auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri);
         client.set(std::move(new_client));
     }
-    s3_settings.set(std::move(new_s3_settings));
+    s3_settings.set(std::move(modified_settings));
 }
 
 std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
index 1fff6d67e23..062ddd4e2a2 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@@ -54,8 +54,7 @@ private:
         const S3Capabilities & s3_capabilities_,
         ObjectStorageKeysGeneratorPtr key_generator_,
         const String & disk_name_,
-        bool for_disk_s3_ = true,
-        const HTTPHeaderEntries & static_headers_ = {})
+        bool for_disk_s3_ = true)
         : uri(uri_)
         , disk_name(disk_name_)
         , client(std::move(client_))
@@ -64,7 +63,6 @@ private:
         , key_generator(std::move(key_generator_))
         , log(getLogger(logger_name))
         , for_disk_s3(for_disk_s3_)
-        , static_headers(static_headers_)
     {
     }
 
@@ -189,7 +187,6 @@ private:
     LoggerPtr log;
 
     const bool for_disk_s3;
-    const HTTPHeaderEntries static_headers;
 };
 
 }
diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp
index 6b6cde0c431..4b217b94730 100644
--- a/src/Storages/ObjectStorage/S3/Configuration.cpp
+++ b/src/Storages/ObjectStorage/S3/Configuration.cpp
@@ -136,7 +136,7 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context,
 
     return std::make_shared<S3ObjectStorage>(
         std::move(client), std::move(s3_settings), url, s3_capabilities,
-        key_generator, "StorageS3", false, headers_from_ast);
+        key_generator, "StorageS3", false);
 }
 
 void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection)

From 14f259d9d7a9d53ed8d1c64be36be20a622bf7ce Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 26 May 2024 13:54:35 +0000
Subject: [PATCH 378/392] Fix flaky test

---
 tests/queries/0_stateless/03130_generateSnowflakeId.reference | 2 --
 tests/queries/0_stateless/03130_generateSnowflakeId.sql       | 2 --
 2 files changed, 4 deletions(-)

diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
index 6ec0cafab16..f5b7872f81e 100644
--- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
@@ -1,11 +1,9 @@
 -- generateSnowflakeID
 1
-1
 0
 0
 1
 100
 -- generateSnowflakeIDThreadMonotonic
 1
-1
 100
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
index 903be5b786c..57cdd21a9fe 100644
--- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
@@ -1,6 +1,5 @@
 SELECT '-- generateSnowflakeID';
 
-SELECT bitShiftLeft(toUInt64(generateSnowflakeID()), 52) = 0; -- check machine sequence number is zero
 SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero
 
 SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs
@@ -18,7 +17,6 @@ FROM
 
 SELECT '-- generateSnowflakeIDThreadMonotonic';
 
-SELECT bitShiftLeft(toUInt64(generateSnowflakeIDThreadMonotonic()), 52) = 0; -- check machine sequence number is zero
 SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeIDThreadMonotonic()), 63), 1) = 0; -- check first bit is zero
 
 SELECT generateSnowflakeIDThreadMonotonic(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }

From 8f4422d72917c1885a892200e267268f6b2e3b98 Mon Sep 17 00:00:00 2001
From: Robert Schulze <robert@clickhouse.com>
Date: Sun, 26 May 2024 14:07:50 +0000
Subject: [PATCH 379/392] Test analyzer and non-analyzer execution

---
 .../02494_query_cache_nested_query_bug.reference     |  2 ++
 .../02494_query_cache_nested_query_bug.sh            | 12 +++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
index b261da18d51..9ec033cefb1 100644
--- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
+++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.reference
@@ -1,2 +1,4 @@
+2
+0
 1
 0
diff --git a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
index a5339a098dc..6bc3d03ac66 100755
--- a/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
+++ b/tests/queries/0_stateless/02494_query_cache_nested_query_bug.sh
@@ -15,11 +15,17 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)"
 ${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)"
 
-SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
+SETTINGS_NO_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
+SETTINGS_ANALYZER="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
 
 # Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated
 # with EXPLAIN PLAN, we need to check the logs.
-${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l
-${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l
+${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l
+${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_NO_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l
+
+${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE"
+
+${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l
+${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS_ANALYZER" 2>&1 | grep "Aggregated. " | wc -l
 
 ${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE"

From 3ee2307024c9a7b2c54247335f0fb0f0f54380e4 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 27 May 2024 10:04:19 +0200
Subject: [PATCH 380/392] Revert "Refactoring of Server.h: Isolate server
 management from other logic"

---
 programs/server/Server.cpp                    | 987 +++++++++++++++++-
 programs/server/Server.h                      |  95 +-
 src/CMakeLists.txt                            |   1 -
 src/Server/ServersManager/IServersManager.cpp | 268 -----
 src/Server/ServersManager/IServersManager.h   |  74 --
 .../ServersManager/InterServersManager.cpp    | 327 ------
 .../ServersManager/InterServersManager.h      |  44 -
 .../ServersManager/ProtocolServersManager.cpp | 523 ----------
 .../ServersManager/ProtocolServersManager.h   |  37 -
 9 files changed, 1032 insertions(+), 1324 deletions(-)
 delete mode 100644 src/Server/ServersManager/IServersManager.cpp
 delete mode 100644 src/Server/ServersManager/IServersManager.h
 delete mode 100644 src/Server/ServersManager/InterServersManager.cpp
 delete mode 100644 src/Server/ServersManager/InterServersManager.h
 delete mode 100644 src/Server/ServersManager/ProtocolServersManager.cpp
 delete mode 100644 src/Server/ServersManager/ProtocolServersManager.h

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index b62ae40924c..223bc1f77e7 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -6,6 +6,8 @@
 #include <sys/types.h>
 #include <pwd.h>
 #include <unistd.h>
+#include <Poco/Net/HTTPServer.h>
+#include <Poco/Net/NetException.h>
 #include <Poco/Util/HelpFormatter.h>
 #include <Poco/Environment.h>
 #include <Common/scope_guard_safe.h>
@@ -42,9 +44,11 @@
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
 #include <Common/assertProcessUserMatchesDataOwner.h>
+#include <Common/makeSocketAddress.h>
 #include <Common/FailPoint.h>
 #include <Common/CPUID.h>
 #include <Common/HTTPConnectionPool.h>
+#include <Server/waitServersToFinish.h>
 #include <Interpreters/Cache/FileCacheFactory.h>
 #include <Core/ServerUUID.h>
 #include <IO/ReadHelpers.h>
@@ -79,19 +83,29 @@
 #include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
 #include <Common/Scheduler/Nodes/registerResourceManagers.h>
 #include <Common/Config/ConfigReloader.h>
+#include <Server/HTTPHandlerFactory.h>
 #include "MetricsTransmitter.h"
 #include <Common/StatusFile.h>
+#include <Server/TCPHandlerFactory.h>
+#include <Server/TCPServer.h>
 #include <Common/SensitiveDataMasker.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/getHashOfLoadedBinary.h>
 #include <Common/filesystemHelpers.h>
 #include <Compression/CompressionCodecEncrypted.h>
+#include <Server/HTTP/HTTPServerConnectionFactory.h>
+#include <Server/MySQLHandlerFactory.h>
+#include <Server/PostgreSQLHandlerFactory.h>
+#include <Server/ProxyV1HandlerFactory.h>
+#include <Server/TLSHandlerFactory.h>
+#include <Server/ProtocolServerAdapter.h>
+#include <Server/KeeperReadinessHandler.h>
+#include <Server/HTTP/HTTPServer.h>
 #include <Server/CloudPlacementInfo.h>
-#include <Server/ServersManager/InterServersManager.h>
-#include <Server/ServersManager/ProtocolServersManager.h>
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Core/ServerSettings.h>
 #include <filesystem>
+#include <unordered_set>
 
 #include "config.h"
 #include <Common/config_version.h>
@@ -105,9 +119,19 @@
 #endif
 
 #if USE_SSL
+#    include <Poco/Net/SecureServerSocket.h>
 #    include <Server/CertificateReloader.h>
 #endif
 
+#if USE_GRPC
+#   include <Server/GRPCServer.h>
+#endif
+
+#if USE_NURAFT
+#    include <Coordination/FourLetterCommand.h>
+#    include <Server/KeeperTCPHandlerFactory.h>
+#endif
+
 #if USE_JEMALLOC
 #    include <jemalloc/jemalloc.h>
 #endif
@@ -135,6 +159,18 @@ namespace ProfileEvents
 {
     extern const Event MainConfigLoads;
     extern const Event ServerStartupMilliseconds;
+    extern const Event InterfaceNativeSendBytes;
+    extern const Event InterfaceNativeReceiveBytes;
+    extern const Event InterfaceHTTPSendBytes;
+    extern const Event InterfaceHTTPReceiveBytes;
+    extern const Event InterfacePrometheusSendBytes;
+    extern const Event InterfacePrometheusReceiveBytes;
+    extern const Event InterfaceInterserverSendBytes;
+    extern const Event InterfaceInterserverReceiveBytes;
+    extern const Event InterfaceMySQLSendBytes;
+    extern const Event InterfaceMySQLReceiveBytes;
+    extern const Event InterfacePostgreSQLSendBytes;
+    extern const Event InterfacePostgreSQLReceiveBytes;
 }
 
 namespace fs = std::filesystem;
@@ -202,9 +238,11 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int NO_ELEMENTS_IN_CONFIG;
+    extern const int SUPPORT_IS_DISABLED;
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
     extern const int INVALID_CONFIG_PARAMETER;
+    extern const int NETWORK_ERROR;
     extern const int CORRUPTED_DATA;
 }
 
@@ -219,6 +257,115 @@ static std::string getCanonicalPath(std::string && path)
     return std::move(path);
 }
 
+Poco::Net::SocketAddress Server::socketBindListen(
+    const Poco::Util::AbstractConfiguration & config,
+    Poco::Net::ServerSocket & socket,
+    const std::string & host,
+    UInt16 port,
+    [[maybe_unused]] bool secure) const
+{
+    auto address = makeSocketAddress(host, port, &logger());
+    socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false));
+    /// If caller requests any available port from the OS, discover it after binding.
+    if (port == 0)
+    {
+        address = socket.address();
+        LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port());
+    }
+
+    socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096));
+
+    return address;
+}
+
+Strings getListenHosts(const Poco::Util::AbstractConfiguration & config)
+{
+    auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host");
+    if (listen_hosts.empty())
+    {
+        listen_hosts.emplace_back("::1");
+        listen_hosts.emplace_back("127.0.0.1");
+    }
+    return listen_hosts;
+}
+
+Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config)
+{
+    auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
+    if (!interserver_listen_hosts.empty())
+      return interserver_listen_hosts;
+
+    /// Use more general restriction in case of emptiness
+    return getListenHosts(config);
+}
+
+bool getListenTry(const Poco::Util::AbstractConfiguration & config)
+{
+    bool listen_try = config.getBool("listen_try", false);
+    if (!listen_try)
+    {
+        Poco::Util::AbstractConfiguration::Keys protocols;
+        config.keys("protocols", protocols);
+        listen_try =
+            DB::getMultipleValuesFromConfig(config, "", "listen_host").empty() &&
+            std::none_of(protocols.begin(), protocols.end(), [&](const auto & protocol)
+            {
+                return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port");
+            });
+    }
+    return listen_try;
+}
+
+
+void Server::createServer(
+    Poco::Util::AbstractConfiguration & config,
+    const std::string & listen_host,
+    const char * port_name,
+    bool listen_try,
+    bool start_server,
+    std::vector<ProtocolServerAdapter> & servers,
+    CreateServerFunc && func) const
+{
+    /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file.
+    if (config.getString(port_name, "").empty())
+        return;
+
+    /// If we already have an active server for this listen_host/port_name, don't create it again
+    for (const auto & server : servers)
+    {
+        if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name)
+            return;
+    }
+
+    auto port = config.getInt(port_name);
+    try
+    {
+        servers.push_back(func(port));
+        if (start_server)
+        {
+            servers.back().start();
+            LOG_INFO(&logger(), "Listening for {}", servers.back().getDescription());
+        }
+        global_context->registerServerPort(port_name, port);
+    }
+    catch (const Poco::Exception &)
+    {
+        if (listen_try)
+        {
+            LOG_WARNING(&logger(), "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
+                "then consider to "
+                "specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
+                "file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
+                " Example for disabled IPv4: <listen_host>::</listen_host>",
+                listen_host, port, getCurrentExceptionMessage(false));
+        }
+        else
+        {
+            throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
+        }
+    }
+}
+
 
 #if defined(OS_LINUX)
 namespace
@@ -518,7 +665,6 @@ try
 
     ServerSettings server_settings;
     server_settings.loadSettingsFromConfig(config());
-    Poco::ThreadPool server_pool(3, server_settings.max_connections);
 
     ASTAlterCommand::setFormatAlterCommandsWithParentheses(server_settings.format_alter_operations_with_parentheses);
 
@@ -575,6 +721,11 @@ try
     CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
     CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
 
+    Poco::ThreadPool server_pool(3, server_settings.max_connections);
+    std::mutex servers_lock;
+    std::vector<ProtocolServerAdapter> servers;
+    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
+
     /** Context contains all that query execution is dependent:
       *  settings, available functions, data types, aggregate functions, databases, ...
       */
@@ -624,10 +775,6 @@ try
 
     bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log");
 
-    std::mutex servers_lock;
-    ProtocolServersManager servers(context(), &logger());
-    InterServersManager servers_to_start_before_tables(context(), &logger());
-
     // Initialize global thread pool. Do it before we fetch configs from zookeeper
     // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
     // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well.
@@ -659,7 +806,32 @@ try
 
         LOG_DEBUG(log, "Shut down storages.");
 
-        servers_to_start_before_tables.stopServers(server_settings, servers_lock);
+        if (!servers_to_start_before_tables.empty())
+        {
+            LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
+            size_t current_connections = 0;
+            {
+                std::lock_guard lock(servers_lock);
+                for (auto & server : servers_to_start_before_tables)
+                {
+                    server.stop();
+                    current_connections += server.currentConnections();
+                }
+            }
+
+            if (current_connections)
+                LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+            else
+                LOG_INFO(log, "Closed all listening sockets.");
+
+            if (current_connections > 0)
+                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, server_settings.shutdown_wait_unfinished);
+
+            if (current_connections)
+                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
+            else
+                LOG_INFO(log, "Closed connections to servers for tables.");
+        }
 
         global_context->shutdownKeeperDispatcher();
 
@@ -756,13 +928,19 @@ try
         server_settings.asynchronous_heavy_metrics_update_period_s,
         [&]() -> std::vector<ProtocolServerMetrics>
         {
+            std::vector<ProtocolServerMetrics> metrics;
+
             std::lock_guard lock(servers_lock);
-            std::vector<ProtocolServerMetrics> metrics1 = servers_to_start_before_tables.getMetrics();
-            std::vector<ProtocolServerMetrics> metrics2 = servers.getMetrics();
-            metrics1.reserve(metrics1.size() + metrics2.size());
-            metrics1.insert(metrics1.end(), std::make_move_iterator(metrics2.begin()), std::make_move_iterator(metrics2.end()));
-            return metrics1;
-        });
+            metrics.reserve(servers_to_start_before_tables.size() + servers.size());
+
+            for (const auto & server : servers_to_start_before_tables)
+                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+
+            for (const auto & server : servers)
+                metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
+            return metrics;
+        }
+    );
 
     zkutil::validateZooKeeperConfig(config());
     bool has_zookeeper = zkutil::hasZooKeeperConfig(config());
@@ -1410,8 +1588,7 @@ try
                 if (global_context->isServerCompletelyStarted())
                 {
                     std::lock_guard lock(servers_lock);
-                    servers.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config);
-                    servers_to_start_before_tables.updateServers(*config, *this, servers_lock, server_pool, async_metrics, latest_config);
+                    updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
                 }
             }
 
@@ -1458,17 +1635,141 @@ try
             /// Must be the last.
             latest_config = config;
         },
-        /* already_loaded = */ false); /// Reload it right now (initial loading)
+        /* already_loaded = */ false);  /// Reload it right now (initial loading)
 
-    servers_to_start_before_tables.createServers(
-        config(),
-        *this,
-        servers_lock,
-        server_pool,
-        async_metrics,
-        /* start_servers= */ false,
-        ServerType(ServerType::Type::QUERIES_ALL)
-    );
+    const auto listen_hosts = getListenHosts(config());
+    const auto interserver_listen_hosts = getInterserverListenHosts(config());
+    const auto listen_try = getListenTry(config());
+
+    if (config().has("keeper_server.server_id"))
+    {
+#if USE_NURAFT
+        //// If we don't have configured connection probably someone trying to use clickhouse-server instead
+        //// of clickhouse-keeper, so start synchronously.
+        bool can_initialize_keeper_async = false;
+
+        if (has_zookeeper) /// We have configured connection to some zookeeper cluster
+        {
+            /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
+            /// synchronously.
+            can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
+        }
+        /// Initialize keeper RAFT.
+        global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
+        FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
+
+        auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration &
+        {
+            return global_context->getConfigRef();
+        };
+
+        for (const auto & listen_host : listen_hosts)
+        {
+            /// TCP Keeper
+            const char * port_name = "keeper_server.tcp_port";
+            createServer(
+                config(), listen_host, port_name, listen_try, /* start_server: */ false,
+                servers_to_start_before_tables,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+                    Poco::Net::ServerSocket socket;
+                    auto address = socketBindListen(config(), socket, listen_host, port);
+                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        port_name,
+                        "Keeper (tcp): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new KeeperTCPHandlerFactory(
+                                config_getter, global_context->getKeeperDispatcher(),
+                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
+                                global_context->getSettingsRef().send_timeout.totalSeconds(),
+                                false), server_pool, socket));
+                });
+
+            const char * secure_port_name = "keeper_server.tcp_port_secure";
+            createServer(
+                config(), listen_host, secure_port_name, listen_try, /* start_server: */ false,
+                servers_to_start_before_tables,
+                [&](UInt16 port) -> ProtocolServerAdapter
+                {
+#if USE_SSL
+                    Poco::Net::SecureServerSocket socket;
+                    auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true);
+                    socket.setReceiveTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
+                    socket.setSendTimeout(Poco::Timespan(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
+                    return ProtocolServerAdapter(
+                        listen_host,
+                        secure_port_name,
+                        "Keeper with secure protocol (tcp_secure): " + address.toString(),
+                        std::make_unique<TCPServer>(
+                            new KeeperTCPHandlerFactory(
+                                config_getter, global_context->getKeeperDispatcher(),
+                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
+                                global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket));
+#else
+                    UNUSED(port);
+                    throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+                });
+
+            /// HTTP control endpoints
+            port_name = "keeper_server.http_control.port";
+            createServer(config(), listen_host, port_name, listen_try, /* start_server: */ false,
+            servers_to_start_before_tables,
+            [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                auto http_context = httpContext();
+                Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);
+                Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+                http_params->setTimeout(http_context->getReceiveTimeout());
+                http_params->setKeepAliveTimeout(keep_alive_timeout);
+
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config(), socket, listen_host, port);
+                socket.setReceiveTimeout(http_context->getReceiveTimeout());
+                socket.setSendTimeout(http_context->getSendTimeout());
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "HTTP Control: http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        std::move(http_context),
+                        createKeeperHTTPControlMainHandlerFactory(
+                            config_getter(),
+                            global_context->getKeeperDispatcher(),
+                            "KeeperHTTPControlHandler-factory"), server_pool, socket, http_params));
+            });
+        }
+#else
+        throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
+#endif
+
+    }
+
+    {
+        std::lock_guard lock(servers_lock);
+        /// We should start interserver communications before (and more important shutdown after) tables.
+        /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
+        /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
+        /// communicate with zookeeper, execute merges, etc.
+        createInterserverServers(
+            config(),
+            interserver_listen_hosts,
+            listen_try,
+            server_pool,
+            async_metrics,
+            servers_to_start_before_tables,
+            /* start_servers= */ false);
+
+
+        for (auto & server : servers_to_start_before_tables)
+        {
+            server.start();
+            LOG_INFO(log, "Listening for {}", server.getDescription());
+        }
+    }
 
     /// Initialize access storages.
     auto & access_control = global_context->getAccessControl();
@@ -1498,18 +1799,19 @@ try
     global_context->setStopServersCallback([&](const ServerType & server_type)
     {
         std::lock_guard lock(servers_lock);
-        servers.stopServers(server_type);
+        stopServers(servers, server_type);
     });
 
     global_context->setStartServersCallback([&](const ServerType & server_type)
     {
         std::lock_guard lock(servers_lock);
-        servers.createServers(
+        createServers(
             config(),
-            *this,
-            servers_lock,
+            listen_hosts,
+            listen_try,
             server_pool,
             async_metrics,
+            servers,
             /* start_servers= */ true,
             server_type);
     });
@@ -1722,21 +2024,18 @@ try
 
         {
             std::lock_guard lock(servers_lock);
-            servers.createServers(
-                config(),
-                *this,
-                servers_lock,
-                server_pool,
-                async_metrics,
-                false,
-                ServerType(ServerType::Type::QUERIES_ALL));
+            createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers);
             if (servers.empty())
-                throw Exception(
-                    ErrorCodes::NO_ELEMENTS_IN_CONFIG,
-                    "No servers started (add valid listen_host and 'tcp_port' "
-                    "or 'http_port' to configuration file.)");
+                throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+                                "No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
+                                "to configuration file.)");
         }
 
+        if (servers.empty())
+             throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG,
+                             "No servers started (add valid listen_host and 'tcp_port' or 'http_port' "
+                             "to configuration file.)");
+
 #if USE_SSL
         CertificateReloader::instance().tryLoad(config());
 #endif
@@ -1808,7 +2107,12 @@ try
 
         {
             std::lock_guard lock(servers_lock);
-            servers.startServers();
+            for (auto & server : servers)
+            {
+                server.start();
+                LOG_INFO(log, "Listening for {}", server.getDescription());
+            }
+
             global_context->setServerCompletelyStarted();
             LOG_INFO(log, "Ready for connections.");
         }
@@ -1844,10 +2148,46 @@ try
             access_control.stopPeriodicReloading();
 
             is_cancelled = true;
-            const auto remaining_connections = servers.stopServers(server_settings, servers_lock);
+
+            LOG_DEBUG(log, "Waiting for current connections to close.");
+
+            size_t current_connections = 0;
+            {
+                std::lock_guard lock(servers_lock);
+                for (auto & server : servers)
+                {
+                    server.stop();
+                    current_connections += server.currentConnections();
+                }
+            }
+
+            if (current_connections)
+                LOG_WARNING(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
+            else
+                LOG_INFO(log, "Closed all listening sockets.");
+
+            /// Wait for unfinished backups and restores.
+            /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries
+            /// (because killAllQueries() will cancel all running backups/restores).
+            if (server_settings.shutdown_wait_backups_and_restores)
+                global_context->waitAllBackupsAndRestores();
+
+            /// Killing remaining queries.
+            if (!server_settings.shutdown_wait_unfinished_queries)
+                global_context->getProcessList().killAllQueries();
+
+            if (current_connections)
+                current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
+
+            if (current_connections)
+                LOG_WARNING(log, "Closed connections. But {} remain."
+                    " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>", current_connections);
+            else
+                LOG_INFO(log, "Closed connections.");
+
             dns_cache_updater.reset();
 
-            if (remaining_connections)
+            if (current_connections)
             {
                 /// There is no better way to force connections to close in Poco.
                 /// Otherwise connection handlers will continue to live
@@ -1881,4 +2221,561 @@ catch (...)
     return code ? code : -1;
 }
 
+std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
+    const Poco::Util::AbstractConfiguration & config,
+    const std::string & protocol,
+    Poco::Net::HTTPServerParams::Ptr http_params,
+    AsynchronousMetrics & async_metrics,
+    bool & is_secure)
+{
+    auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
+    {
+        if (type == "tcp")
+            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));
+
+        if (type == "tls")
+#if USE_SSL
+            return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name));
+#else
+            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+
+        if (type == "proxy1")
+            return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name));
+        if (type == "mysql")
+            return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
+        if (type == "postgres")
+            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
+        if (type == "http")
+            return TCPServerConnectionFactory::Ptr(
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)
+            );
+        if (type == "prometheus")
+            return TCPServerConnectionFactory::Ptr(
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)
+            );
+        if (type == "interserver")
+            return TCPServerConnectionFactory::Ptr(
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes)
+            );
+
+        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
+    };
+
+    std::string conf_name = "protocols." + protocol;
+    std::string prefix = conf_name + ".";
+    std::unordered_set<std::string> pset {conf_name};
+
+    auto stack = std::make_unique<TCPProtocolStackFactory>(*this, conf_name);
+
+    while (true)
+    {
+        // if there is no "type" - it's a reference to another protocol and this is just an endpoint
+        if (config.has(prefix + "type"))
+        {
+            std::string type = config.getString(prefix + "type");
+            if (type == "tls")
+            {
+                if (is_secure)
+                    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol);
+                is_secure = true;
+            }
+
+            stack->append(create_factory(type, conf_name));
+        }
+
+        if (!config.has(prefix + "impl"))
+            break;
+
+        conf_name = "protocols." + config.getString(prefix + "impl");
+        prefix = conf_name + ".";
+
+        if (!pset.insert(conf_name).second)
+            throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
+    }
+
+    return stack;
+}
+
+HTTPContextPtr Server::httpContext() const
+{
+    return std::make_shared<HTTPContext>(context());
+}
+
+void Server::createServers(
+    Poco::Util::AbstractConfiguration & config,
+    const Strings & listen_hosts,
+    bool listen_try,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    std::vector<ProtocolServerAdapter> & servers,
+    bool start_servers,
+    const ServerType & server_type)
+{
+    const Settings & settings = global_context->getSettingsRef();
+
+    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+    http_params->setTimeout(settings.http_receive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
+
+    Poco::Util::AbstractConfiguration::Keys protocols;
+    config.keys("protocols", protocols);
+
+    for (const auto & protocol : protocols)
+    {
+        if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
+            continue;
+
+        std::string prefix = "protocols." + protocol + ".";
+        std::string port_name = prefix + "port";
+        std::string description {"<undefined> protocol"};
+        if (config.has(prefix + "description"))
+            description = config.getString(prefix + "description");
+
+        if (!config.has(prefix + "port"))
+            continue;
+
+        std::vector<std::string> hosts;
+        if (config.has(prefix + "host"))
+            hosts.push_back(config.getString(prefix + "host"));
+        else
+            hosts = listen_hosts;
+
+        for (const auto & host : hosts)
+        {
+            bool is_secure = false;
+            auto stack = buildProtocolStackFromConfig(config, protocol, http_params, async_metrics, is_secure);
+
+            if (stack->empty())
+                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol);
+
+            createServer(config, host, port_name.c_str(), listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, host, port, is_secure);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+
+                return ProtocolServerAdapter(
+                    host,
+                    port_name.c_str(),
+                    description + ": " + address.toString(),
+                    std::make_unique<TCPServer>(
+                        stack.release(),
+                        server_pool,
+                        socket,
+                        new Poco::Net::TCPServerParams));
+            });
+        }
+    }
+
+    for (const auto & listen_host : listen_hosts)
+    {
+        const char * port_name;
+
+        if (server_type.shouldStart(ServerType::Type::HTTP))
+        {
+            /// HTTP
+            port_name = "http_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port);
+                socket.setReceiveTimeout(settings.http_receive_timeout);
+                socket.setSendTimeout(settings.http_send_timeout);
+
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::HTTPS))
+        {
+            /// HTTPS
+            port_name = "https_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+#if USE_SSL
+                Poco::Net::SecureServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+                socket.setReceiveTimeout(settings.http_receive_timeout);
+                socket.setSendTimeout(settings.http_send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "https://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
+#else
+                UNUSED(port);
+                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP))
+        {
+            /// TCP
+            port_name = "tcp_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "native protocol (tcp): " + address.toString(),
+                    std::make_unique<TCPServer>(
+                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                        server_pool,
+                        socket,
+                        new Poco::Net::TCPServerParams));
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
+        {
+            /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
+            port_name = "tcp_with_proxy_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "native protocol (tcp) with PROXY: " + address.toString(),
+                    std::make_unique<TCPServer>(
+                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                        server_pool,
+                        socket,
+                        new Poco::Net::TCPServerParams));
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
+        {
+            /// TCP with SSL
+            port_name = "tcp_port_secure";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+    #if USE_SSL
+                Poco::Net::SecureServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+                socket.setReceiveTimeout(settings.receive_timeout);
+                socket.setSendTimeout(settings.send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "secure native protocol (tcp_secure): " + address.toString(),
+                    std::make_unique<TCPServer>(
+                        new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
+                        server_pool,
+                        socket,
+                        new Poco::Net::TCPServerParams));
+    #else
+                UNUSED(port);
+                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+    #endif
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::MYSQL))
+        {
+            port_name = "mysql_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+                socket.setReceiveTimeout(Poco::Timespan());
+                socket.setSendTimeout(settings.send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "MySQL compatibility protocol: " + address.toString(),
+                    std::make_unique<TCPServer>(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
+        {
+            port_name = "postgresql_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true);
+                socket.setReceiveTimeout(Poco::Timespan());
+                socket.setSendTimeout(settings.send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "PostgreSQL compatibility protocol: " + address.toString(),
+                    std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
+            });
+        }
+
+#if USE_GRPC
+        if (server_type.shouldStart(ServerType::Type::GRPC))
+        {
+            port_name = "grpc_port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::SocketAddress server_address(listen_host, port);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "gRPC protocol: " + server_address.toString(),
+                    std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, &logger())));
+            });
+        }
+#endif
+        if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
+        {
+            /// Prometheus (if defined and not setup yet with http_port)
+            port_name = "prometheus.port";
+            createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, listen_host, port);
+                socket.setReceiveTimeout(settings.http_receive_timeout);
+                socket.setSendTimeout(settings.http_send_timeout);
+                return ProtocolServerAdapter(
+                    listen_host,
+                    port_name,
+                    "Prometheus: http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes));
+            });
+        }
+    }
+}
+
+void Server::createInterserverServers(
+    Poco::Util::AbstractConfiguration & config,
+    const Strings & interserver_listen_hosts,
+    bool listen_try,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    std::vector<ProtocolServerAdapter> & servers,
+    bool start_servers,
+    const ServerType & server_type)
+{
+    const Settings & settings = global_context->getSettingsRef();
+
+    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
+    http_params->setTimeout(settings.http_receive_timeout);
+    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
+
+    /// Now iterate over interserver_listen_hosts
+    for (const auto & interserver_listen_host : interserver_listen_hosts)
+    {
+        const char * port_name;
+
+        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
+        {
+            /// Interserver IO HTTP
+            port_name = "interserver_http_port";
+            createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+                Poco::Net::ServerSocket socket;
+                auto address = socketBindListen(config, socket, interserver_listen_host, port);
+                socket.setReceiveTimeout(settings.http_receive_timeout);
+                socket.setSendTimeout(settings.http_send_timeout);
+                return ProtocolServerAdapter(
+                    interserver_listen_host,
+                    port_name,
+                    "replica communication (interserver): http://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        httpContext(),
+                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params,
+                        ProfileEvents::InterfaceInterserverReceiveBytes,
+                        ProfileEvents::InterfaceInterserverSendBytes));
+            });
+        }
+
+        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
+        {
+            port_name = "interserver_https_port";
+            createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter
+            {
+#if USE_SSL
+                Poco::Net::SecureServerSocket socket;
+                auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true);
+                socket.setReceiveTimeout(settings.http_receive_timeout);
+                socket.setSendTimeout(settings.http_send_timeout);
+                return ProtocolServerAdapter(
+                    interserver_listen_host,
+                    port_name,
+                    "secure replica communication (interserver): https://" + address.toString(),
+                    std::make_unique<HTTPServer>(
+                        httpContext(),
+                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
+                        server_pool,
+                        socket,
+                        http_params,
+                        ProfileEvents::InterfaceInterserverReceiveBytes,
+                        ProfileEvents::InterfaceInterserverSendBytes));
+#else
+                UNUSED(port);
+                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
+#endif
+            });
+        }
+    }
+}
+
+void Server::stopServers(
+    std::vector<ProtocolServerAdapter> & servers,
+    const ServerType & server_type
+) const
+{
+    LoggerRawPtr log = &logger();
+
+    /// Remove servers once all their connections are closed
+    auto check_server = [&log](const char prefix[], auto & server)
+    {
+        if (!server.isStopping())
+            return false;
+        size_t current_connections = server.currentConnections();
+        LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
+            server.getDescription(),
+            prefix,
+            !current_connections ? "finished" : "waiting",
+            current_connections);
+        return !current_connections;
+    };
+
+    std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
+
+    for (auto & server : servers)
+    {
+        if (!server.isStopping())
+        {
+            const std::string server_port_name = server.getPortName();
+
+            if (server_type.shouldStop(server_port_name))
+                server.stop();
+        }
+    }
+
+    std::erase_if(servers, std::bind_front(check_server, ""));
+}
+
+void Server::updateServers(
+    Poco::Util::AbstractConfiguration & config,
+    Poco::ThreadPool & server_pool,
+    AsynchronousMetrics & async_metrics,
+    std::vector<ProtocolServerAdapter> & servers,
+    std::vector<ProtocolServerAdapter> & servers_to_start_before_tables)
+{
+    LoggerRawPtr log = &logger();
+
+    const auto listen_hosts = getListenHosts(config);
+    const auto interserver_listen_hosts = getInterserverListenHosts(config);
+    const auto listen_try = getListenTry(config);
+
+    /// Remove servers once all their connections are closed
+    auto check_server = [&log](const char prefix[], auto & server)
+    {
+        if (!server.isStopping())
+            return false;
+        size_t current_connections = server.currentConnections();
+        LOG_DEBUG(log, "Server {}{}: {} ({} connections)",
+            server.getDescription(),
+            prefix,
+            !current_connections ? "finished" : "waiting",
+            current_connections);
+        return !current_connections;
+    };
+
+    std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)"));
+
+    Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config();
+
+    std::vector<ProtocolServerAdapter *> all_servers;
+    all_servers.reserve(servers.size() + servers_to_start_before_tables.size());
+    for (auto & server : servers)
+        all_servers.push_back(&server);
+
+    for (auto & server : servers_to_start_before_tables)
+        all_servers.push_back(&server);
+
+    for (auto * server : all_servers)
+    {
+        if (!server->isStopping())
+        {
+            std::string port_name = server->getPortName();
+            bool has_host = false;
+            bool is_http = false;
+            if (port_name.starts_with("protocols."))
+            {
+                std::string protocol = port_name.substr(0, port_name.find_last_of('.'));
+                has_host = config.has(protocol + ".host");
+
+                std::string conf_name = protocol;
+                std::string prefix = protocol + ".";
+                std::unordered_set<std::string> pset {conf_name};
+                while (true)
+                {
+                    if (config.has(prefix + "type"))
+                    {
+                        std::string type = config.getString(prefix + "type");
+                        if (type == "http")
+                        {
+                            is_http = true;
+                            break;
+                        }
+                    }
+
+                    if (!config.has(prefix + "impl"))
+                        break;
+
+                    conf_name = "protocols." + config.getString(prefix + "impl");
+                    prefix = conf_name + ".";
+
+                    if (!pset.insert(conf_name).second)
+                        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
+                }
+            }
+            else
+            {
+                /// NOTE: better to compare using getPortName() over using
+                /// dynamic_cast<> since HTTPServer is also used for prometheus and
+                /// internal replication communications.
+                is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port";
+            }
+
+            if (!has_host)
+                has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end();
+            bool has_port = !config.getString(port_name, "").empty();
+            bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
+            if (force_restart)
+                LOG_TRACE(log, "<http_handlers> had been changed, will reload {}", server->getDescription());
+
+            if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart)
+            {
+                server->stop();
+                LOG_INFO(log, "Stopped listening for {}", server->getDescription());
+            }
+        }
+    }
+
+    createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true);
+    createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true);
+
+    std::erase_if(servers, std::bind_front(check_server, ""));
+    std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, ""));
+}
+
 }
diff --git a/programs/server/Server.h b/programs/server/Server.h
index b4931ce53d1..3f03dd137ef 100644
--- a/programs/server/Server.h
+++ b/programs/server/Server.h
@@ -1,10 +1,15 @@
 #pragma once
 
 #include <Server/IServer.h>
+
 #include <Daemon/BaseDaemon.h>
+#include <Server/HTTP/HTTPContext.h>
+#include <Server/TCPProtocolStackFactory.h>
+#include <Server/ServerType.h>
+#include <Poco/Net/HTTPServerParams.h>
 
 /** Server provides three interfaces:
-  * 1. HTTP, GRPC - simple interfaces for any applications.
+  * 1. HTTP - simple interface for any applications.
   * 2. TCP - interface for native clickhouse-client and for server to server internal communications.
   *    More rich and efficient, but less compatible
   *     - data is transferred by columns;
@@ -13,21 +18,43 @@
   * 3. Interserver HTTP - for replication.
   */
 
+namespace Poco
+{
+    namespace Net
+    {
+        class ServerSocket;
+    }
+}
+
 namespace DB
 {
+class AsynchronousMetrics;
+class ProtocolServerAdapter;
 
 class Server : public BaseDaemon, public IServer
 {
 public:
     using ServerApplication::run;
 
-    Poco::Util::LayeredConfiguration & config() const override { return BaseDaemon::config(); }
+    Poco::Util::LayeredConfiguration & config() const override
+    {
+        return BaseDaemon::config();
+    }
 
-    Poco::Logger & logger() const override { return BaseDaemon::logger(); }
+    Poco::Logger & logger() const override
+    {
+        return BaseDaemon::logger();
+    }
 
-    ContextMutablePtr context() const override { return global_context; }
+    ContextMutablePtr context() const override
+    {
+        return global_context;
+    }
 
-    bool isCancelled() const override { return BaseDaemon::isCancelled(); }
+    bool isCancelled() const override
+    {
+        return BaseDaemon::isCancelled();
+    }
 
     void defineOptions(Poco::Util::OptionSet & _options) override;
 
@@ -46,6 +73,64 @@ private:
     ContextMutablePtr global_context;
     /// Updated/recent config, to compare http_handlers
     ConfigurationPtr latest_config;
+
+    HTTPContextPtr httpContext() const;
+
+    Poco::Net::SocketAddress socketBindListen(
+        const Poco::Util::AbstractConfiguration & config,
+        Poco::Net::ServerSocket & socket,
+        const std::string & host,
+        UInt16 port,
+        [[maybe_unused]] bool secure = false) const;
+
+    std::unique_ptr<TCPProtocolStackFactory> buildProtocolStackFromConfig(
+        const Poco::Util::AbstractConfiguration & config,
+        const std::string & protocol,
+        Poco::Net::HTTPServerParams::Ptr http_params,
+        AsynchronousMetrics & async_metrics,
+        bool & is_secure);
+
+    using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
+    void createServer(
+        Poco::Util::AbstractConfiguration & config,
+        const std::string & listen_host,
+        const char * port_name,
+        bool listen_try,
+        bool start_server,
+        std::vector<ProtocolServerAdapter> & servers,
+        CreateServerFunc && func) const;
+
+    void createServers(
+        Poco::Util::AbstractConfiguration & config,
+        const Strings & listen_hosts,
+        bool listen_try,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        std::vector<ProtocolServerAdapter> & servers,
+        bool start_servers = false,
+        const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
+
+    void createInterserverServers(
+        Poco::Util::AbstractConfiguration & config,
+        const Strings & interserver_listen_hosts,
+        bool listen_try,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        std::vector<ProtocolServerAdapter> & servers,
+        bool start_servers = false,
+        const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL));
+
+    void updateServers(
+        Poco::Util::AbstractConfiguration & config,
+        Poco::ThreadPool & server_pool,
+        AsynchronousMetrics & async_metrics,
+        std::vector<ProtocolServerAdapter> & servers,
+        std::vector<ProtocolServerAdapter> & servers_to_start_before_tables);
+
+    void stopServers(
+        std::vector<ProtocolServerAdapter> & servers,
+        const ServerType & server_type
+    ) const;
 };
 
 }
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 33042fbc7fc..f2e10a27b75 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -236,7 +236,6 @@ add_object_library(clickhouse_client Client)
 add_object_library(clickhouse_bridge BridgeHelper)
 add_object_library(clickhouse_server Server)
 add_object_library(clickhouse_server_http Server/HTTP)
-add_object_library(clickhouse_server_manager Server/ServersManager)
 add_object_library(clickhouse_formats Formats)
 add_object_library(clickhouse_processors Processors)
 add_object_library(clickhouse_processors_executors Processors/Executors)
diff --git a/src/Server/ServersManager/IServersManager.cpp b/src/Server/ServersManager/IServersManager.cpp
deleted file mode 100644
index 8b1eee94303..00000000000
--- a/src/Server/ServersManager/IServersManager.cpp
+++ /dev/null
@@ -1,268 +0,0 @@
-#include <Server/ServersManager/IServersManager.h>
-
-#include <Interpreters/Context.h>
-#include <Server/waitServersToFinish.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <Common/Config/AbstractConfigurationComparison.h>
-#include <Common/getMultipleKeysFromConfig.h>
-#include <Common/logger_useful.h>
-#include <Common/makeSocketAddress.h>
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-extern const int NETWORK_ERROR;
-extern const int INVALID_CONFIG_PARAMETER;
-}
-
-IServersManager::IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_)
-    : global_context(global_context_), logger(logger_)
-{
-}
-
-
-bool IServersManager::empty() const
-{
-    return servers.empty();
-}
-
-std::vector<ProtocolServerMetrics> IServersManager::getMetrics() const
-{
-    std::vector<ProtocolServerMetrics> metrics;
-    metrics.reserve(servers.size());
-    for (const auto & server : servers)
-        metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
-    return metrics;
-}
-
-void IServersManager::startServers()
-{
-    for (auto & server : servers)
-    {
-        server.start();
-        LOG_INFO(logger, "Listening for {}", server.getDescription());
-    }
-}
-
-void IServersManager::stopServers(const ServerType & server_type)
-{
-    /// Remove servers once all their connections are closed
-    auto check_server = [&](const char prefix[], auto & server)
-    {
-        if (!server.isStopping())
-            return false;
-        size_t current_connections = server.currentConnections();
-        LOG_DEBUG(
-            logger,
-            "Server {}{}: {} ({} connections)",
-            server.getDescription(),
-            prefix,
-            !current_connections ? "finished" : "waiting",
-            current_connections);
-        return !current_connections;
-    };
-
-    std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)"));
-
-    for (auto & server : servers)
-    {
-        if (!server.isStopping() && server_type.shouldStop(server.getPortName()))
-            server.stop();
-    }
-
-    std::erase_if(servers, std::bind_front(check_server, ""));
-}
-
-void IServersManager::updateServers(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & iserver,
-    std::mutex & servers_lock,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    ConfigurationPtr latest_config)
-{
-    stopServersForUpdate(config, latest_config);
-    createServers(config, iserver, servers_lock, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL));
-}
-
-Poco::Net::SocketAddress IServersManager::socketBindListen(
-    const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const
-{
-    auto address = makeSocketAddress(host, port, logger);
-    socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false));
-    /// If caller requests any available port from the OS, discover it after binding.
-    if (port == 0)
-    {
-        address = socket.address();
-        LOG_DEBUG(logger, "Requested any available port (port == 0), actual port is {:d}", address.port());
-    }
-
-    socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096));
-    return address;
-}
-
-void IServersManager::createServer(
-    const Poco::Util::AbstractConfiguration & config,
-    const std::string & listen_host,
-    const char * port_name,
-    bool start_server,
-    CreateServerFunc && func)
-{
-    /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file.
-    if (config.getString(port_name, "").empty())
-        return;
-
-    /// If we already have an active server for this listen_host/port_name, don't create it again
-    for (const auto & server : servers)
-    {
-        if (!server.isStopping() && server.getListenHost() == listen_host && server.getPortName() == port_name)
-            return;
-    }
-
-    auto port = config.getInt(port_name);
-    try
-    {
-        servers.push_back(func(port));
-        if (start_server)
-        {
-            servers.back().start();
-            LOG_INFO(logger, "Listening for {}", servers.back().getDescription());
-        }
-        global_context->registerServerPort(port_name, port);
-    }
-    catch (const Poco::Exception &)
-    {
-        if (!getListenTry(config))
-        {
-            throw Exception(ErrorCodes::NETWORK_ERROR, "Listen [{}]:{} failed: {}", listen_host, port, getCurrentExceptionMessage(false));
-        }
-        LOG_WARNING(
-            logger,
-            "Listen [{}]:{} failed: {}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, "
-            "then consider to "
-            "specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
-            "file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
-            " Example for disabled IPv4: <listen_host>::</listen_host>",
-            listen_host,
-            port,
-            getCurrentExceptionMessage(false));
-    }
-}
-
-void IServersManager::stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config)
-{
-    /// Remove servers once all their connections are closed
-    auto check_server = [&](const char prefix[], auto & server)
-    {
-        if (!server.isStopping())
-            return false;
-        size_t current_connections = server.currentConnections();
-        LOG_DEBUG(
-            logger,
-            "Server {}{}: {} ({} connections)",
-            server.getDescription(),
-            prefix,
-            !current_connections ? "finished" : "waiting",
-            current_connections);
-        return !current_connections;
-    };
-
-    std::erase_if(servers, std::bind_front(check_server, " (from one of previous reload)"));
-
-    const auto listen_hosts = getListenHosts(config);
-    const Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : config;
-
-    for (auto & server : servers)
-    {
-        if (server.isStopping())
-            return;
-        std::string port_name = server.getPortName();
-        bool has_host = false;
-        bool is_http = false;
-        if (port_name.starts_with("protocols."))
-        {
-            std::string protocol = port_name.substr(0, port_name.find_last_of('.'));
-            has_host = config.has(protocol + ".host");
-
-            std::string conf_name = protocol;
-            std::string prefix = protocol + ".";
-            std::unordered_set<std::string> pset{conf_name};
-            while (true)
-            {
-                if (config.has(prefix + "type"))
-                {
-                    std::string type = config.getString(prefix + "type");
-                    if (type == "http")
-                    {
-                        is_http = true;
-                        break;
-                    }
-                }
-
-                if (!config.has(prefix + "impl"))
-                    break;
-
-                conf_name = "protocols." + config.getString(prefix + "impl");
-                prefix = conf_name + ".";
-
-                if (!pset.insert(conf_name).second)
-                    throw Exception(
-                        ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
-            }
-        }
-        else
-        {
-            /// NOTE: better to compare using getPortName() over using
-            /// dynamic_cast<> since HTTPServer is also used for prometheus and
-            /// internal replication communications.
-            is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port";
-        }
-
-        if (!has_host)
-            has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end();
-        bool has_port = !config.getString(port_name, "").empty();
-        bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers");
-        if (force_restart)
-            LOG_TRACE(logger, "<http_handlers> had been changed, will reload {}", server.getDescription());
-
-        if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart)
-        {
-            server.stop();
-            LOG_INFO(logger, "Stopped listening for {}", server.getDescription());
-        }
-    }
-
-    std::erase_if(servers, std::bind_front(check_server, ""));
-}
-
-Strings IServersManager::getListenHosts(const Poco::Util::AbstractConfiguration & config) const
-{
-    auto listen_hosts = DB::getMultipleValuesFromConfig(config, "", "listen_host");
-    if (listen_hosts.empty())
-    {
-        listen_hosts.emplace_back("::1");
-        listen_hosts.emplace_back("127.0.0.1");
-    }
-    return listen_hosts;
-}
-
-bool IServersManager::getListenTry(const Poco::Util::AbstractConfiguration & config) const
-{
-    bool listen_try = config.getBool("listen_try", false);
-    if (!listen_try)
-    {
-        Poco::Util::AbstractConfiguration::Keys protocols;
-        config.keys("protocols", protocols);
-        listen_try = DB::getMultipleValuesFromConfig(config, "", "listen_host").empty()
-            && std::none_of(
-                         protocols.begin(),
-                         protocols.end(),
-                         [&](const auto & protocol)
-                         { return config.has("protocols." + protocol + ".host") && config.has("protocols." + protocol + ".port"); });
-    }
-    return listen_try;
-}
-
-}
diff --git a/src/Server/ServersManager/IServersManager.h b/src/Server/ServersManager/IServersManager.h
deleted file mode 100644
index 7e1d9d50d82..00000000000
--- a/src/Server/ServersManager/IServersManager.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#pragma once
-
-#include <mutex>
-#include <Core/ServerSettings.h>
-#include <Interpreters/Context_fwd.h>
-#include <Server/IServer.h>
-#include <Server/ProtocolServerAdapter.h>
-#include <Server/ServerType.h>
-#include <Poco/Logger.h>
-#include <Poco/Net/ServerSocket.h>
-#include <Poco/ThreadPool.h>
-#include <Poco/Util/AbstractConfiguration.h>
-#include <Common/AsynchronousMetrics.h>
-#include <Common/Config/ConfigProcessor.h>
-
-namespace DB
-{
-
-class IServersManager
-{
-public:
-    IServersManager(ContextMutablePtr global_context_, Poco::Logger * logger_);
-    virtual ~IServersManager() = default;
-
-    bool empty() const;
-    std::vector<ProtocolServerMetrics> getMetrics() const;
-
-    virtual void createServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        std::mutex & servers_lock,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        bool start_servers,
-        const ServerType & server_type)
-        = 0;
-
-    void startServers();
-
-    void stopServers(const ServerType & server_type);
-    virtual size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) = 0;
-
-    virtual void updateServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        std::mutex & servers_lock,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        ConfigurationPtr latest_config);
-
-protected:
-    ContextMutablePtr global_context;
-    Poco::Logger * logger;
-
-    std::vector<ProtocolServerAdapter> servers;
-
-    Poco::Net::SocketAddress socketBindListen(
-        const Poco::Util::AbstractConfiguration & config, Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port) const;
-
-    using CreateServerFunc = std::function<ProtocolServerAdapter(UInt16)>;
-    void createServer(
-        const Poco::Util::AbstractConfiguration & config,
-        const std::string & listen_host,
-        const char * port_name,
-        bool start_server,
-        CreateServerFunc && func);
-
-    void stopServersForUpdate(const Poco::Util::AbstractConfiguration & config, ConfigurationPtr latest_config);
-
-    Strings getListenHosts(const Poco::Util::AbstractConfiguration & config) const;
-    bool getListenTry(const Poco::Util::AbstractConfiguration & config) const;
-};
-
-}
diff --git a/src/Server/ServersManager/InterServersManager.cpp b/src/Server/ServersManager/InterServersManager.cpp
deleted file mode 100644
index 4425d468248..00000000000
--- a/src/Server/ServersManager/InterServersManager.cpp
+++ /dev/null
@@ -1,327 +0,0 @@
-#include <Server/ServersManager/InterServersManager.h>
-
-#include <Interpreters/Context.h>
-#include <Server/HTTP/HTTPServer.h>
-#include <Server/HTTPHandlerFactory.h>
-#include <Server/KeeperReadinessHandler.h>
-#include <Server/waitServersToFinish.h>
-#include <Poco/Net/HTTPServerParams.h>
-#include <Common/Config/AbstractConfigurationComparison.h>
-#include <Common/ProfileEvents.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/getMultipleKeysFromConfig.h>
-#include <Common/logger_useful.h>
-
-#if USE_SSL
-#    include <Poco/Net/SecureServerSocket.h>
-#endif
-
-#if USE_NURAFT
-#    include <Coordination/FourLetterCommand.h>
-#    include <Server/KeeperTCPHandlerFactory.h>
-#endif
-
-namespace ProfileEvents
-{
-extern const Event InterfaceInterserverSendBytes;
-extern const Event InterfaceInterserverReceiveBytes;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-extern const int SUPPORT_IS_DISABLED;
-}
-
-void InterServersManager::createServers(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & server,
-    std::mutex & servers_lock,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    bool start_servers,
-    const ServerType & server_type)
-{
-    if (config.has("keeper_server.server_id"))
-    {
-#if USE_NURAFT
-        //// If we don't have configured connection probably someone trying to use clickhouse-server instead
-        //// of clickhouse-keeper, so start synchronously.
-        bool can_initialize_keeper_async = false;
-
-        if (zkutil::hasZooKeeperConfig(config)) /// We have configured connection to some zookeeper cluster
-        {
-            /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start
-            /// synchronously.
-            can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster();
-        }
-        /// Initialize keeper RAFT.
-        global_context->initializeKeeperDispatcher(can_initialize_keeper_async);
-        FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher());
-
-        auto config_getter = [this]() -> const Poco::Util::AbstractConfiguration & { return global_context->getConfigRef(); };
-
-        for (const auto & listen_host : getListenHosts(config))
-        {
-            /// TCP Keeper
-            constexpr auto port_name = "keeper_server.tcp_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                /* start_server = */ false,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(
-                        Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
-                    socket.setSendTimeout(
-                        Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "Keeper (tcp): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new KeeperTCPHandlerFactory(
-                                config_getter,
-                                global_context->getKeeperDispatcher(),
-                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
-                                global_context->getSettingsRef().send_timeout.totalSeconds(),
-                                false),
-                            server_pool,
-                            socket));
-                });
-
-            constexpr auto secure_port_name = "keeper_server.tcp_port_secure";
-            createServer(
-                config,
-                listen_host,
-                secure_port_name,
-                /* start_server = */ false,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-#    if USE_SSL
-                    Poco::Net::SecureServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(
-                        Poco::Timespan(config.getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0));
-                    socket.setSendTimeout(
-                        Poco::Timespan(config.getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0));
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        secure_port_name,
-                        "Keeper with secure protocol (tcp_secure): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new KeeperTCPHandlerFactory(
-                                config_getter,
-                                global_context->getKeeperDispatcher(),
-                                global_context->getSettingsRef().receive_timeout.totalSeconds(),
-                                global_context->getSettingsRef().send_timeout.totalSeconds(),
-                                true),
-                            server_pool,
-                            socket));
-#    else
-                    UNUSED(port);
-                    throw Exception(
-                        ErrorCodes::SUPPORT_IS_DISABLED,
-                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#    endif
-                });
-
-            /// HTTP control endpoints
-            createServer(
-                config,
-                listen_host,
-                /* port_name = */ "keeper_server.http_control.port",
-                /* start_server = */ false,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    auto http_context = std::make_shared<HTTPContext>(global_context);
-                    Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0);
-                    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-                    http_params->setTimeout(http_context->getReceiveTimeout());
-                    http_params->setKeepAliveTimeout(keep_alive_timeout);
-
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(http_context->getReceiveTimeout());
-                    socket.setSendTimeout(http_context->getSendTimeout());
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "HTTP Control: http://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::move(http_context),
-                            createKeeperHTTPControlMainHandlerFactory(
-                                config_getter(), global_context->getKeeperDispatcher(), "KeeperHTTPControlHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params));
-                });
-        }
-#else
-        throw Exception(
-            ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
-#endif
-    }
-
-    {
-        std::lock_guard lock(servers_lock);
-        /// We should start interserver communications before (and more important shutdown after) tables.
-        /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down.
-        /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can
-        /// communicate with zookeeper, execute merges, etc.
-        createInterserverServers(config, server, server_pool, async_metrics, start_servers, server_type);
-        startServers();
-    }
-}
-
-size_t InterServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock)
-{
-    if (servers.empty())
-    {
-        return 0;
-    }
-
-    LOG_DEBUG(logger, "Waiting for current connections to servers for tables to finish.");
-
-    size_t current_connections = 0;
-    {
-        std::lock_guard lock(servers_lock);
-        for (auto & server : servers)
-        {
-            server.stop();
-            current_connections += server.currentConnections();
-        }
-    }
-
-    if (current_connections)
-        LOG_INFO(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
-    else
-        LOG_INFO(logger, "Closed all listening sockets.");
-
-    if (current_connections > 0)
-        current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
-
-    if (current_connections)
-        LOG_INFO(
-            logger,
-            "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections "
-            "after context shutdown.",
-            current_connections);
-    else
-        LOG_INFO(logger, "Closed connections to servers for tables.");
-    return current_connections;
-}
-
-void InterServersManager::updateServers(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & iserver,
-    std::mutex & /*servers_lock*/,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    ConfigurationPtr latest_config)
-{
-    stopServersForUpdate(config, latest_config);
-    createInterserverServers(config, iserver, server_pool, async_metrics, true, ServerType(ServerType::Type::QUERIES_ALL));
-}
-
-Strings InterServersManager::getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const
-{
-    auto interserver_listen_hosts = DB::getMultipleValuesFromConfig(config, "", "interserver_listen_host");
-    if (!interserver_listen_hosts.empty())
-        return interserver_listen_hosts;
-
-    /// Use more general restriction in case of emptiness
-    return getListenHosts(config);
-}
-
-void InterServersManager::createInterserverServers(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & server,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    bool start_servers,
-    const ServerType & server_type)
-{
-    const Settings & settings = global_context->getSettingsRef();
-
-    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-    http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
-
-    /// Now iterate over interserver_listen_hosts
-    for (const auto & interserver_listen_host : getInterserverListenHosts(config))
-    {
-        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP))
-        {
-            /// Interserver IO HTTP
-            constexpr auto port_name = "interserver_http_port";
-            createServer(
-                config,
-                interserver_listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, interserver_listen_host, port);
-                    socket.setReceiveTimeout(settings.http_receive_timeout);
-                    socket.setSendTimeout(settings.http_send_timeout);
-                    return ProtocolServerAdapter(
-                        interserver_listen_host,
-                        port_name,
-                        "replica communication (interserver): http://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::make_shared<HTTPContext>(global_context),
-                            createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params,
-                            ProfileEvents::InterfaceInterserverReceiveBytes,
-                            ProfileEvents::InterfaceInterserverSendBytes));
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS))
-        {
-            constexpr auto port_name = "interserver_https_port";
-            createServer(
-                config,
-                interserver_listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-#if USE_SSL
-                    Poco::Net::SecureServerSocket socket;
-                    auto address = socketBindListen(config, socket, interserver_listen_host, port);
-                    socket.setReceiveTimeout(settings.http_receive_timeout);
-                    socket.setSendTimeout(settings.http_send_timeout);
-                    return ProtocolServerAdapter(
-                        interserver_listen_host,
-                        port_name,
-                        "secure replica communication (interserver): https://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::make_shared<HTTPContext>(global_context),
-                            createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params,
-                            ProfileEvents::InterfaceInterserverReceiveBytes,
-                            ProfileEvents::InterfaceInterserverSendBytes));
-#else
-                    UNUSED(port);
-                    throw Exception(
-                        ErrorCodes::SUPPORT_IS_DISABLED,
-                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-                });
-        }
-    }
-}
-
-}
diff --git a/src/Server/ServersManager/InterServersManager.h b/src/Server/ServersManager/InterServersManager.h
deleted file mode 100644
index 8780eae18e0..00000000000
--- a/src/Server/ServersManager/InterServersManager.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#pragma once
-
-#include <Server/ServersManager/IServersManager.h>
-
-namespace DB
-{
-
-class InterServersManager : public IServersManager
-{
-public:
-    using IServersManager::IServersManager;
-
-    void createServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        std::mutex & servers_lock,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        bool start_servers,
-        const ServerType & server_type) override;
-
-    size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override;
-
-    void updateServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & iserver,
-        std::mutex & servers_lock,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        ConfigurationPtr latest_config) override;
-
-private:
-    Strings getInterserverListenHosts(const Poco::Util::AbstractConfiguration & config) const;
-
-    void createInterserverServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        bool start_servers,
-        const ServerType & server_type);
-};
-
-}
diff --git a/src/Server/ServersManager/ProtocolServersManager.cpp b/src/Server/ServersManager/ProtocolServersManager.cpp
deleted file mode 100644
index af57de3ac3c..00000000000
--- a/src/Server/ServersManager/ProtocolServersManager.cpp
+++ /dev/null
@@ -1,523 +0,0 @@
-#include <Server/ServersManager/ProtocolServersManager.h>
-
-#include <Interpreters/Context.h>
-#include <Interpreters/ProcessList.h>
-#include <Server/HTTP/HTTPServer.h>
-#include <Server/HTTP/HTTPServerConnectionFactory.h>
-#include <Server/HTTPHandlerFactory.h>
-#include <Server/MySQLHandlerFactory.h>
-#include <Server/PostgreSQLHandlerFactory.h>
-#include <Server/ProxyV1HandlerFactory.h>
-#include <Server/TCPHandlerFactory.h>
-#include <Server/TLSHandlerFactory.h>
-#include <Server/waitServersToFinish.h>
-#include <Common/ProfileEvents.h>
-#include <Common/getMultipleKeysFromConfig.h>
-#include <Common/makeSocketAddress.h>
-
-#if USE_SSL
-#    include <Poco/Net/SecureServerSocket.h>
-#endif
-
-#if USE_GRPC
-#    include <Server/GRPCServer.h>
-#endif
-
-namespace ProfileEvents
-{
-extern const Event InterfaceNativeSendBytes;
-extern const Event InterfaceNativeReceiveBytes;
-extern const Event InterfaceHTTPSendBytes;
-extern const Event InterfaceHTTPReceiveBytes;
-extern const Event InterfacePrometheusSendBytes;
-extern const Event InterfacePrometheusReceiveBytes;
-extern const Event InterfaceMySQLSendBytes;
-extern const Event InterfaceMySQLReceiveBytes;
-extern const Event InterfacePostgreSQLSendBytes;
-extern const Event InterfacePostgreSQLReceiveBytes;
-extern const Event InterfaceInterserverSendBytes;
-extern const Event InterfaceInterserverReceiveBytes;
-}
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-extern const int SUPPORT_IS_DISABLED;
-extern const int INVALID_CONFIG_PARAMETER;
-}
-
-void ProtocolServersManager::createServers(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & server,
-    std::mutex & /*servers_lock*/,
-    Poco::ThreadPool & server_pool,
-    AsynchronousMetrics & async_metrics,
-    bool start_servers,
-    const ServerType & server_type)
-{
-    auto listen_hosts = getListenHosts(config);
-    const Settings & settings = global_context->getSettingsRef();
-
-    Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
-    http_params->setTimeout(settings.http_receive_timeout);
-    http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
-
-    Poco::Util::AbstractConfiguration::Keys protocols;
-    config.keys("protocols", protocols);
-
-    for (const auto & protocol : protocols)
-    {
-        if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol))
-            continue;
-
-        std::string prefix = "protocols." + protocol + ".";
-        std::string port_name = prefix + "port";
-        std::string description{"<undefined> protocol"};
-        if (config.has(prefix + "description"))
-            description = config.getString(prefix + "description");
-
-        if (!config.has(prefix + "port"))
-            continue;
-
-        std::vector<std::string> hosts;
-        if (config.has(prefix + "host"))
-            hosts.push_back(config.getString(prefix + "host"));
-        else
-            hosts = listen_hosts;
-
-        for (const auto & host : hosts)
-        {
-            bool is_secure = false;
-            auto stack = buildProtocolStackFromConfig(config, server, protocol, http_params, async_metrics, is_secure);
-
-            if (stack->empty())
-                throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' stack empty", protocol);
-
-            createServer(
-                config,
-                host,
-                port_name.c_str(),
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, host, port);
-                    socket.setReceiveTimeout(settings.receive_timeout);
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        host,
-                        port_name.c_str(),
-                        description + ": " + address.toString(),
-                        std::make_unique<TCPServer>(stack.release(), server_pool, socket, new Poco::Net::TCPServerParams));
-                });
-        }
-    }
-
-    for (const auto & listen_host : listen_hosts)
-    {
-        if (server_type.shouldStart(ServerType::Type::HTTP))
-        {
-            /// HTTP
-            constexpr auto port_name = "http_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.http_receive_timeout);
-                    socket.setSendTimeout(settings.http_send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "http://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::make_shared<HTTPContext>(global_context),
-                            createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params,
-                            ProfileEvents::InterfaceHTTPReceiveBytes,
-                            ProfileEvents::InterfaceHTTPSendBytes));
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::HTTPS))
-        {
-            /// HTTPS
-            constexpr auto port_name = "https_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-#if USE_SSL
-                    Poco::Net::SecureServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.http_receive_timeout);
-                    socket.setSendTimeout(settings.http_send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "https://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::make_shared<HTTPContext>(global_context),
-                            createHandlerFactory(server, config, async_metrics, "HTTPSHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params,
-                            ProfileEvents::InterfaceHTTPReceiveBytes,
-                            ProfileEvents::InterfaceHTTPSendBytes));
-#else
-                    UNUSED(port);
-                    throw Exception(
-                        ErrorCodes::SUPPORT_IS_DISABLED,
-                        "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP))
-        {
-            /// TCP
-            constexpr auto port_name = "tcp_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.receive_timeout);
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "native protocol (tcp): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new TCPHandlerFactory(
-                                server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                            server_pool,
-                            socket,
-                            new Poco::Net::TCPServerParams));
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY))
-        {
-            /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt
-            constexpr auto port_name = "tcp_with_proxy_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.receive_timeout);
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "native protocol (tcp) with PROXY: " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new TCPHandlerFactory(
-                                server, false, true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                            server_pool,
-                            socket,
-                            new Poco::Net::TCPServerParams));
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::TCP_SECURE))
-        {
-            /// TCP with SSL
-            constexpr auto port_name = "tcp_port_secure";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-#if USE_SSL
-                    Poco::Net::SecureServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.receive_timeout);
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "secure native protocol (tcp_secure): " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new TCPHandlerFactory(
-                                server, true, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
-                            server_pool,
-                            socket,
-                            new Poco::Net::TCPServerParams));
-#else
-                    UNUSED(port);
-                    throw Exception(
-                        ErrorCodes::SUPPORT_IS_DISABLED,
-                        "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::MYSQL))
-        {
-            constexpr auto port_name = "mysql_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(Poco::Timespan());
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "MySQL compatibility protocol: " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new MySQLHandlerFactory(
-                                server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes),
-                            server_pool,
-                            socket,
-                            new Poco::Net::TCPServerParams));
-                });
-        }
-
-        if (server_type.shouldStart(ServerType::Type::POSTGRESQL))
-        {
-            constexpr auto port_name = "postgresql_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(Poco::Timespan());
-                    socket.setSendTimeout(settings.send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "PostgreSQL compatibility protocol: " + address.toString(),
-                        std::make_unique<TCPServer>(
-                            new PostgreSQLHandlerFactory(
-                                server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes),
-                            server_pool,
-                            socket,
-                            new Poco::Net::TCPServerParams));
-                });
-        }
-
-#if USE_GRPC
-        if (server_type.shouldStart(ServerType::Type::GRPC))
-        {
-            constexpr auto port_name = "grpc_port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::SocketAddress server_address(listen_host, port);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "gRPC protocol: " + server_address.toString(),
-                        std::make_unique<GRPCServer>(server, makeSocketAddress(listen_host, port, logger)));
-                });
-        }
-#endif
-        if (server_type.shouldStart(ServerType::Type::PROMETHEUS))
-        {
-            /// Prometheus (if defined and not setup yet with http_port)
-            constexpr auto port_name = "prometheus.port";
-            createServer(
-                config,
-                listen_host,
-                port_name,
-                start_servers,
-                [&](UInt16 port) -> ProtocolServerAdapter
-                {
-                    Poco::Net::ServerSocket socket;
-                    auto address = socketBindListen(config, socket, listen_host, port);
-                    socket.setReceiveTimeout(settings.http_receive_timeout);
-                    socket.setSendTimeout(settings.http_send_timeout);
-                    return ProtocolServerAdapter(
-                        listen_host,
-                        port_name,
-                        "Prometheus: http://" + address.toString(),
-                        std::make_unique<HTTPServer>(
-                            std::make_shared<HTTPContext>(global_context),
-                            createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"),
-                            server_pool,
-                            socket,
-                            http_params,
-                            ProfileEvents::InterfacePrometheusReceiveBytes,
-                            ProfileEvents::InterfacePrometheusSendBytes));
-                });
-        }
-    }
-}
-
-size_t ProtocolServersManager::stopServers(const ServerSettings & server_settings, std::mutex & servers_lock)
-{
-    if (servers.empty())
-    {
-        return 0;
-    }
-
-    LOG_DEBUG(logger, "Waiting for current connections to close.");
-
-    size_t current_connections = 0;
-    {
-        std::lock_guard lock(servers_lock);
-        for (auto & server : servers)
-        {
-            server.stop();
-            current_connections += server.currentConnections();
-        }
-    }
-
-    if (current_connections)
-        LOG_WARNING(logger, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
-    else
-        LOG_INFO(logger, "Closed all listening sockets.");
-
-    /// Wait for unfinished backups and restores.
-    /// This must be done after closing listening sockets (no more backups/restores) but before ProcessList::killAllQueries
-    /// (because killAllQueries() will cancel all running backups/restores).
-    if (server_settings.shutdown_wait_backups_and_restores)
-        global_context->waitAllBackupsAndRestores();
-    /// Killing remaining queries.
-    if (!server_settings.shutdown_wait_unfinished_queries)
-        global_context->getProcessList().killAllQueries();
-
-    if (current_connections)
-        current_connections = waitServersToFinish(servers, servers_lock, server_settings.shutdown_wait_unfinished);
-
-    if (current_connections)
-        LOG_WARNING(
-            logger,
-            "Closed connections. But {} remain."
-            " Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>",
-            current_connections);
-    else
-        LOG_INFO(logger, "Closed connections.");
-    return current_connections;
-}
-
-std::unique_ptr<TCPProtocolStackFactory> ProtocolServersManager::buildProtocolStackFromConfig(
-    const Poco::Util::AbstractConfiguration & config,
-    IServer & server,
-    const std::string & protocol,
-    Poco::Net::HTTPServerParams::Ptr http_params,
-    AsynchronousMetrics & async_metrics,
-    bool & is_secure) const
-{
-    auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
-    {
-        if (type == "tcp")
-            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(
-                server, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));
-
-        if (type == "tls")
-#if USE_SSL
-            return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(server, conf_name));
-#else
-            throw Exception(
-                ErrorCodes::SUPPORT_IS_DISABLED,
-                "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
-#endif
-
-        if (type == "proxy1")
-            return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(server, conf_name));
-        if (type == "mysql")
-            return TCPServerConnectionFactory::Ptr(
-                new MySQLHandlerFactory(server, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
-        if (type == "postgres")
-            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(
-                server, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
-        if (type == "http")
-            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
-                std::make_shared<HTTPContext>(global_context),
-                http_params,
-                createHandlerFactory(server, config, async_metrics, "HTTPHandler-factory"),
-                ProfileEvents::InterfaceHTTPReceiveBytes,
-                ProfileEvents::InterfaceHTTPSendBytes));
-        if (type == "prometheus")
-            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
-                std::make_shared<HTTPContext>(global_context),
-                http_params,
-                createHandlerFactory(server, config, async_metrics, "PrometheusHandler-factory"),
-                ProfileEvents::InterfacePrometheusReceiveBytes,
-                ProfileEvents::InterfacePrometheusSendBytes));
-        if (type == "interserver")
-            return TCPServerConnectionFactory::Ptr(new HTTPServerConnectionFactory(
-                std::make_shared<HTTPContext>(global_context),
-                http_params,
-                createHandlerFactory(server, config, async_metrics, "InterserverIOHTTPHandler-factory"),
-                ProfileEvents::InterfaceInterserverReceiveBytes,
-                ProfileEvents::InterfaceInterserverSendBytes));
-
-        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
-    };
-
-    std::string conf_name = "protocols." + protocol;
-    std::string prefix = conf_name + ".";
-    std::unordered_set<std::string> pset{conf_name};
-
-    auto stack = std::make_unique<TCPProtocolStackFactory>(server, conf_name);
-
-    while (true)
-    {
-        // if there is no "type" - it's a reference to another protocol and this is just an endpoint
-        if (config.has(prefix + "type"))
-        {
-            std::string type = config.getString(prefix + "type");
-            if (type == "tls")
-            {
-                if (is_secure)
-                    throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' contains more than one TLS layer", protocol);
-                is_secure = true;
-            }
-
-            stack->append(create_factory(type, conf_name));
-        }
-
-        if (!config.has(prefix + "impl"))
-            break;
-
-        conf_name = "protocols." + config.getString(prefix + "impl");
-        prefix = conf_name + ".";
-
-        if (!pset.insert(conf_name).second)
-            throw Exception(
-                ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol '{}' configuration contains a loop on '{}'", protocol, conf_name);
-    }
-
-    return stack;
-}
-
-}
diff --git a/src/Server/ServersManager/ProtocolServersManager.h b/src/Server/ServersManager/ProtocolServersManager.h
deleted file mode 100644
index e9eaaeb2184..00000000000
--- a/src/Server/ServersManager/ProtocolServersManager.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#pragma once
-
-#include <Server/ServersManager/IServersManager.h>
-#include <Server/TCPProtocolStackFactory.h>
-#include <Poco/Net/HTTPServerParams.h>
-
-namespace DB
-{
-
-class ProtocolServersManager : public IServersManager
-{
-public:
-    using IServersManager::IServersManager;
-
-    void createServers(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        std::mutex & servers_lock,
-        Poco::ThreadPool & server_pool,
-        AsynchronousMetrics & async_metrics,
-        bool start_servers,
-        const ServerType & server_type) override;
-
-    using IServersManager::stopServers;
-    size_t stopServers(const ServerSettings & server_settings, std::mutex & servers_lock) override;
-
-private:
-    std::unique_ptr<TCPProtocolStackFactory> buildProtocolStackFromConfig(
-        const Poco::Util::AbstractConfiguration & config,
-        IServer & server,
-        const std::string & protocol,
-        Poco::Net::HTTPServerParams::Ptr http_params,
-        AsynchronousMetrics & async_metrics,
-        bool & is_secure) const;
-};
-
-}

From c6660c70b17b8e3c1e22192b825deeb5f9f2120b Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 27 May 2024 10:27:50 +0200
Subject: [PATCH 381/392] Add missing reinterpret functions to documentation

---
 .../functions/type-conversion-functions.md    | 617 +++++++++++++++++-
 1 file changed, 611 insertions(+), 6 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 1030d92c76b..2360cecb9a5 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -996,12 +996,585 @@ Result:
 └─────────────────────────────────────────────┘
 ```
 
-## reinterpretAsUInt(8\|16\|32\|64)
+## reinterpretAsUInt8
 
-## reinterpretAsInt(8\|16\|32\|64)
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt8. 
 
-## reinterpretAsFloat*
+**Syntax**
 
+```sql
+reinterpretAsUInt8(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt8. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt8. [UInt8](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt8(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt8(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 1 │ Int8          │   1 │ UInt8           │
+└───┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsUInt16
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt16. 
+
+**Syntax**
+
+```sql
+reinterpretAsUInt16(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt16. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt16. [UInt16](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt8(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt16(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 1 │ UInt8         │   1 │ UInt16          │
+└───┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsUInt32
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt32. 
+
+**Syntax**
+
+```sql
+reinterpretAsUInt32(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt32. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt32. [UInt32](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt16(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt32(x) AS res,
+    toTypeName(res)
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ UInt16        │ 257 │ UInt32          │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsUInt64
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt64. 
+
+**Syntax**
+
+```sql
+reinterpretAsUInt64(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt64. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt64. [UInt64](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt32(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt64(x) AS res,
+    toTypeName(res)
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ UInt32        │ 257 │ UInt64          │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsUInt128
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt128. 
+
+**Syntax**
+
+```sql
+reinterpretAsUInt128(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt64. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt128. [UInt128](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt64(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt128(x) AS res,
+    toTypeName(res)
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ UInt64        │ 257 │ UInt128         │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsUInt256
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt256. 
+
+**Syntax**
+
+```sql
+reinterpretAsUInt256(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as UInt256. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as UInt256. [UInt256](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt128(257) AS x,
+    toTypeName(x),
+    reinterpretAsUInt256(x) AS res,
+    toTypeName(res)
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ UInt128       │ 257 │ UInt256         │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt8
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int8. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt8(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int8. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int8. [Int8](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toUInt8(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt8(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 1 │ UInt8         │   1 │ Int8            │
+└───┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt16
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int16. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt16(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int16. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int16. [Int16](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt8(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt16(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 1 │ Int8          │   1 │ Int16           │
+└───┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt32
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int32. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt32(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int32. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int32. [Int32](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt16(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt32(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ Int16         │ 257 │ Int32           │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt64
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int64. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt64(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int64. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int64. [Int64](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt32(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt64(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ Int32         │ 257 │ Int64           │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt128
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int128. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt128(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int128. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int128. [Int128](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt64(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt128(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ Int64         │ 257 │ Int128          │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsInt256
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int256. 
+
+**Syntax**
+
+```sql
+reinterpretAsInt256(x)
+```
+
+**Parameters**
+
+- `x`: value to byte reinterpret as Int256. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Int256. [Int256](../data-types/int-uint.md/#int-ranges).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+    toInt128(257) AS x,
+    toTypeName(x),
+    reinterpretAsInt256(x) AS res,
+    toTypeName(res);
+```
+
+Result:
+
+```response
+┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
+│ 257 │ Int128        │ 257 │ Int256          │
+└─────┴───────────────┴─────┴─────────────────┘
+```
+
+## reinterpretAsFloat32
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float32. 
+
+**Syntax**
+
+```sql
+reinterpretAsFloat32(x)
+```
+
+**Parameters**
+
+- `x`: value to reinterpret as Float32. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Float32. [Float32](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT reinterpretAsUInt32(toFloat32(0.2)) as x, reinterpretAsFloat32(x);
+```
+
+Result:
+
+```response
+┌──────────x─┬─reinterpretAsFloat32(x)─┐
+│ 1045220557 │                     0.2 │
+└────────────┴─────────────────────────┘
+```
+
+## reinterpretAsFloat64
+
+Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float64. 
+
+**Syntax**
+
+```sql
+reinterpretAsFloat64(x)
+```
+
+**Parameters**
+
+- `x`: value to reinterpret as Float64. 
+
+:::note
+Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
+:::
+
+**Returned value**
+
+- Reinterpreted value `x` as Float64. [Float64](../data-types/float.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT reinterpretAsUInt64(toFloat64(0.2)) as x, reinterpretAsFloat64(x);
+```
+
+Result:
+
+```response
+┌───────────────────x─┬─reinterpretAsFloat64(x)─┐
+│ 4596373779694328218 │                     0.2 │
+└─────────────────────┴─────────────────────────┘
+```
 
 ## reinterpretAsDate
 
@@ -1093,11 +1666,43 @@ Result:
 
 ## reinterpretAsString
 
-This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+This function accepts a number, date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
+
+**Syntax**
+
+```sql
+reinterpretAsString(x)
+```
+
+**Parameters**
+
+- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md).
+
+**Returned value**
+
+- String containing bytes representing `x`. [String](../data-types/fixedstring.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT 
+    reinterpretAsString(toDateTime('1970-01-01 01:01:05')), 
+    reinterpretAsString(toDate('1970-03-07'));
+```
+
+Result:
+
+```response
+┌─reinterpretAsString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsString(toDate('1970-03-07'))─┐
+│ A                                                      │ A                                         │
+└────────────────────────────────────────────────────────┴───────────────────────────────────────────┘
+```
 
 ## reinterpretAsFixedString
 
-This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
+This function accepts a number, date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
 
 **Syntax**
 
@@ -1137,7 +1742,7 @@ Result:
 In addition to the UUID functions listed here, there is dedicated [UUID function documentation](/docs/en/sql-reference/functions/uuid-functions.md).
 :::
 
-Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
+Accepts a 16 byte string and returns a UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
 
 **Syntax**
 

From 8b551cc832a765296213ce462a5472d589b1955d Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 27 May 2024 10:30:18 +0200
Subject: [PATCH 382/392] Remove unneeded test file - one already exists

---
 .../03156_reinterpret_functions.sql           | 36 -------------------
 1 file changed, 36 deletions(-)
 delete mode 100644 tests/queries/0_stateless/03156_reinterpret_functions.sql

diff --git a/tests/queries/0_stateless/03156_reinterpret_functions.sql b/tests/queries/0_stateless/03156_reinterpret_functions.sql
deleted file mode 100644
index 4acaaf47cef..00000000000
--- a/tests/queries/0_stateless/03156_reinterpret_functions.sql
+++ /dev/null
@@ -1,36 +0,0 @@
--- Date and DateTime
-
-SELECT reinterpretAsDate(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsDate('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsDate([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
-SELECT reinterpretAsDateTime(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsDateTime('A',''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsDateTime([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
-
-SELECT reinterpretAsDate(65);
-SELECT reinterpretAsDate('A');
-SELECT reinterpretAsDateTime(65);
-SELECT reinterpretAsDate('A');
-
--- Fixed String
-
-SELECT reinterpretAsFixedString(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFixedString(toDate('1970-01-01'),''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFixedString([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
-
-SELECT reinterpretAsFixedString(toDate('1970-03-07'));
-SELECT reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'));
-SELECT reinterpretAsFixedString(65);
-
--- Float32, Float64
-
-SELECT reinterpretAsFloat32(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFloat64(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFloat32('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFloat64('1970-01-01', ''); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
-SELECT reinterpretAsFloat32([0, 1, 2]); -- { clientError ILLEGAL_TYPE_OF_ARGUMENT}
-SELECT reinterpretAsFloat64([0, 1, 2]); -- { clientError4 ILLEGAL_TYPE_OF_ARGUMENT}
-
-
-
-

From 5a868304c04755bb62b30c45e408b65a3e78dcd0 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Mon, 27 May 2024 11:38:22 +0200
Subject: [PATCH 383/392] Revert "Remove some unnecessary `UNREACHABLE`s"

---
 programs/keeper-client/Commands.cpp                 |  3 +--
 programs/main.cpp                                   |  2 +-
 src/Access/AccessEntityIO.cpp                       |  3 ++-
 src/Access/AccessRights.cpp                         |  2 +-
 src/Access/IAccessStorage.cpp                       |  9 ++++++---
 .../AggregateFunctionGroupArray.cpp                 | 13 +++++++------
 .../AggregateFunctionSequenceNextNode.cpp           |  1 +
 src/AggregateFunctions/AggregateFunctionSum.h       |  1 +
 src/Common/DateLUTImpl.cpp                          |  1 +
 src/Common/IntervalKind.cpp                         | 10 ++++++++++
 src/Common/TargetSpecific.cpp                       |  2 ++
 src/Common/ThreadProfileEvents.cpp                  |  1 +
 src/Common/ZooKeeper/IKeeper.cpp                    |  2 ++
 src/Compression/CompressionCodecDeflateQpl.cpp      |  1 +
 src/Compression/CompressionCodecDoubleDelta.cpp     | 10 +++-------
 src/Coordination/KeeperReconfiguration.cpp          |  8 +-------
 src/Coordination/KeeperServer.cpp                   |  2 +-
 src/Core/Field.h                                    |  2 ++
 src/DataTypes/Serializations/ISerialization.cpp     |  1 +
 src/Disks/IO/CachedOnDiskReadBufferFromFile.h       |  1 +
 .../MetadataStorageTransactionState.cpp             |  1 +
 src/Disks/VolumeJBOD.cpp                            |  2 ++
 src/Formats/EscapingRuleUtils.cpp                   |  1 +
 src/Functions/FunctionsRound.h                      |  8 ++++++++
 src/Functions/FunctionsTimeWindow.cpp               |  2 ++
 src/Functions/PolygonUtils.h                        |  2 ++
 .../UserDefinedSQLObjectsZooKeeperStorage.cpp       |  1 +
 src/IO/CompressionMethod.cpp                        |  1 +
 src/IO/HadoopSnappyReadBuffer.h                     |  1 +
 src/Interpreters/AggregatedDataVariants.cpp         |  8 ++++++++
 src/Interpreters/Cache/FileSegment.cpp              |  1 +
 src/Interpreters/ComparisonGraph.cpp                |  1 +
 src/Interpreters/FilesystemCacheLog.cpp             |  1 +
 src/Interpreters/HashJoin.cpp                       |  3 +++
 src/Interpreters/HashJoin.h                         |  6 ++++++
 .../InterpreterTransactionControlQuery.cpp          |  1 +
 src/Interpreters/SetVariants.cpp                    |  4 ++++
 src/Parsers/ASTExplainQuery.h                       |  2 ++
 src/Parsers/Lexer.cpp                               |  4 +++-
 .../Formats/Impl/MsgPackRowInputFormat.cpp          |  1 +
 src/Processors/IProcessor.cpp                       |  2 ++
 src/Processors/QueryPlan/ReadFromMergeTree.cpp      |  6 ++++++
 src/Processors/QueryPlan/TotalsHavingStep.cpp       |  2 ++
 src/Processors/Transforms/FillingTransform.cpp      |  1 +
 .../Transforms/buildPushingToViewsChain.cpp         |  2 ++
 src/Storages/MergeTree/BackgroundJobsAssignee.cpp   |  1 +
 src/Storages/MergeTree/KeyCondition.cpp             |  2 ++
 src/Storages/MergeTree/MergeTreeData.cpp            |  2 ++
 src/Storages/MergeTree/MergeTreeDataWriter.cpp      |  2 ++
 .../PartMovesBetweenShardsOrchestrator.cpp          |  2 ++
 src/Storages/WindowView/StorageWindowView.cpp       |  3 +++
 51 files changed, 121 insertions(+), 30 deletions(-)

diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp
index 860840a2d06..a109912e6e0 100644
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@@ -10,7 +10,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int KEEPER_EXCEPTION;
 }
 
@@ -442,7 +441,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
             new_members = query->args[1].safeGet<String>();
             break;
         default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected operation: {}", operation);
+            UNREACHABLE();
     }
 
     auto response = client->zookeeper->reconfig(joining, leaving, new_members);
diff --git a/programs/main.cpp b/programs/main.cpp
index c270388f17f..bc8476e4ce4 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -155,8 +155,8 @@ auto instructionFailToString(InstructionFail fail)
             ret("AVX2");
         case InstructionFail::AVX512:
             ret("AVX512");
-#undef ret
     }
+    UNREACHABLE();
 }
 
 
diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp
index 1b073329296..b0dfd74c53b 100644
--- a/src/Access/AccessEntityIO.cpp
+++ b/src/Access/AccessEntityIO.cpp
@@ -144,7 +144,8 @@ AccessEntityPtr deserializeAccessEntity(const String & definition, const String
     catch (Exception & e)
     {
         e.addMessage("Could not parse " + file_path);
-        throw;
+        e.rethrow();
+        UNREACHABLE();
     }
 }
 
diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp
index 2127f4ada70..c10931f554c 100644
--- a/src/Access/AccessRights.cpp
+++ b/src/Access/AccessRights.cpp
@@ -258,7 +258,7 @@ namespace
             case TABLE_LEVEL: return AccessFlags::allFlagsGrantableOnTableLevel();
             case COLUMN_LEVEL: return AccessFlags::allFlagsGrantableOnColumnLevel();
         }
-        chassert(false);
+        UNREACHABLE();
     }
 }
 
diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp
index 8d4e7d3073e..8e51481e415 100644
--- a/src/Access/IAccessStorage.cpp
+++ b/src/Access/IAccessStorage.cpp
@@ -257,7 +257,8 @@ std::vector<UUID> IAccessStorage::insert(const std::vector<AccessEntityPtr> & mu
             }
             e.addMessage("After successfully inserting {}/{}: {}", successfully_inserted.size(), multiple_entities.size(), successfully_inserted_str);
         }
-        throw;
+        e.rethrow();
+        UNREACHABLE();
     }
 }
 
@@ -360,7 +361,8 @@ std::vector<UUID> IAccessStorage::remove(const std::vector<UUID> & ids, bool thr
             }
             e.addMessage("After successfully removing {}/{}: {}", removed_names.size(), ids.size(), removed_names_str);
         }
-        throw;
+        e.rethrow();
+        UNREACHABLE();
     }
 }
 
@@ -456,7 +458,8 @@ std::vector<UUID> IAccessStorage::update(const std::vector<UUID> & ids, const Up
             }
             e.addMessage("After successfully updating {}/{}: {}", names_of_updated.size(), ids.size(), names_of_updated_str);
         }
-        throw;
+        e.rethrow();
+        UNREACHABLE();
     }
 }
 
diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
index 930b2c6ce73..d4fb7afcb78 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@@ -60,13 +60,14 @@ struct GroupArrayTrait
 template <typename Trait>
 constexpr const char * getNameByTrait()
 {
-    if constexpr (Trait::last)
+    if (Trait::last)
         return "groupArrayLast";
-    switch (Trait::sampler)
-    {
-        case Sampler::NONE: return "groupArray";
-        case Sampler::RNG: return "groupArraySample";
-    }
+    if (Trait::sampler == Sampler::NONE)
+        return "groupArray";
+    else if (Trait::sampler == Sampler::RNG)
+        return "groupArraySample";
+
+    UNREACHABLE();
 }
 
 template <typename T>
diff --git a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
index a9dd53a75e8..bed10333af0 100644
--- a/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceNextNode.cpp
@@ -414,6 +414,7 @@ public:
                         break;
                 return (i == events_size) ? base - i : unmatched_idx;
         }
+        UNREACHABLE();
     }
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h
index 2ce03c530c2..58aaddf357a 100644
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@@ -463,6 +463,7 @@ public:
             return "sumWithOverflow";
         else if constexpr (Type == AggregateFunctionTypeSumKahan)
             return "sumKahan";
+        UNREACHABLE();
     }
 
     explicit AggregateFunctionSum(const DataTypes & argument_types_)
diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp
index c87d44a4b95..392ee64dcbf 100644
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@@ -41,6 +41,7 @@ UInt8 getDayOfWeek(const cctz::civil_day & date)
         case cctz::weekday::saturday:   return 6;
         case cctz::weekday::sunday:     return 7;
     }
+    UNREACHABLE();
 }
 
 inline cctz::time_point<cctz::seconds> lookupTz(const cctz::time_zone & cctz_time_zone, const cctz::civil_day & date)
diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp
index 1548d5cf9a5..22c7db504c3 100644
--- a/src/Common/IntervalKind.cpp
+++ b/src/Common/IntervalKind.cpp
@@ -34,6 +34,8 @@ Int64 IntervalKind::toAvgNanoseconds() const
         default:
             return toAvgSeconds() * NANOSECONDS_PER_SECOND;
     }
+
+    UNREACHABLE();
 }
 
 Int32 IntervalKind::toAvgSeconds() const
@@ -52,6 +54,7 @@ Int32 IntervalKind::toAvgSeconds() const
         case IntervalKind::Kind::Quarter: return 7889238; /// Exactly 1/4 of a year.
         case IntervalKind::Kind::Year: return 31556952;   /// The average length of a Gregorian year is equal to 365.2425 days
     }
+    UNREACHABLE();
 }
 
 Float64 IntervalKind::toSeconds() const
@@ -77,6 +80,7 @@ Float64 IntervalKind::toSeconds() const
         default:
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not possible to get precise number of seconds in non-precise interval");
     }
+    UNREACHABLE();
 }
 
 bool IntervalKind::isFixedLength() const
@@ -95,6 +99,7 @@ bool IntervalKind::isFixedLength() const
         case IntervalKind::Kind::Quarter:
         case IntervalKind::Kind::Year: return false;
     }
+    UNREACHABLE();
 }
 
 IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
@@ -136,6 +141,7 @@ const char * IntervalKind::toKeyword() const
         case IntervalKind::Kind::Quarter: return "QUARTER";
         case IntervalKind::Kind::Year: return "YEAR";
     }
+    UNREACHABLE();
 }
 
 
@@ -155,6 +161,7 @@ const char * IntervalKind::toLowercasedKeyword() const
         case IntervalKind::Kind::Quarter: return "quarter";
         case IntervalKind::Kind::Year: return "year";
     }
+    UNREACHABLE();
 }
 
 
@@ -185,6 +192,7 @@ const char * IntervalKind::toDateDiffUnit() const
         case IntervalKind::Kind::Year:
             return "year";
     }
+    UNREACHABLE();
 }
 
 
@@ -215,6 +223,7 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
         case IntervalKind::Kind::Year:
             return "toIntervalYear";
     }
+    UNREACHABLE();
 }
 
 
@@ -248,6 +257,7 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
         case IntervalKind::Kind::Year:
             return "toYear";
     }
+    UNREACHABLE();
 }
 
 
diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp
index 8540c9a9986..49f396c0926 100644
--- a/src/Common/TargetSpecific.cpp
+++ b/src/Common/TargetSpecific.cpp
@@ -54,6 +54,8 @@ String toString(TargetArch arch)
         case TargetArch::AMXTILE: return "amxtile";
         case TargetArch::AMXINT8: return "amxint8";
     }
+
+    UNREACHABLE();
 }
 
 }
diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp
index 23b41f23bde..6a63d484cd9 100644
--- a/src/Common/ThreadProfileEvents.cpp
+++ b/src/Common/ThreadProfileEvents.cpp
@@ -75,6 +75,7 @@ const char * TasksStatsCounters::metricsProviderString(MetricsProvider provider)
         case MetricsProvider::Netlink:
             return "netlink";
     }
+    UNREACHABLE();
 }
 
 bool TasksStatsCounters::checkIfAvailable()
diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp
index 7cca262baca..7d2602bde1e 100644
--- a/src/Common/ZooKeeper/IKeeper.cpp
+++ b/src/Common/ZooKeeper/IKeeper.cpp
@@ -146,6 +146,8 @@ const char * errorMessage(Error code)
         case Error::ZSESSIONMOVED:            return "Session moved to another server, so operation is ignored";
         case Error::ZNOTREADONLY:             return "State-changing request is passed to read-only server";
     }
+
+    UNREACHABLE();
 }
 
 bool isHardwareError(Error zk_return_code)
diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp
index f1b5b24e866..7e0653c69f8 100644
--- a/src/Compression/CompressionCodecDeflateQpl.cpp
+++ b/src/Compression/CompressionCodecDeflateQpl.cpp
@@ -466,6 +466,7 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
             sw_codec->doDecompressData(source, source_size, dest, uncompressed_size);
             return;
     }
+    UNREACHABLE();
 }
 
 void CompressionCodecDeflateQpl::flushAsynchronousDecompressRequests()
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index cbd8cd57a62..e6e8db4c699 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -21,11 +21,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
 /** NOTE DoubleDelta is surprisingly bad name. The only excuse is that it comes from an academic paper.
   * Most people will think that "double delta" is just applying delta transform twice.
   * But in fact it is something more than applying delta transform twice.
@@ -147,9 +142,9 @@ namespace ErrorCodes
 {
     extern const int CANNOT_COMPRESS;
     extern const int CANNOT_DECOMPRESS;
+    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
     extern const int ILLEGAL_CODEC_PARAMETER;
-    extern const int LOGICAL_ERROR;
 }
 
 namespace
@@ -168,8 +163,9 @@ inline Int64 getMaxValueForByteSize(Int8 byte_size)
         case sizeof(UInt64):
             return std::numeric_limits<Int64>::max();
         default:
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "only 1, 2, 4 and 8 data sizes are supported");
+            assert(false && "only 1, 2, 4 and 8 data sizes are supported");
     }
+    UNREACHABLE();
 }
 
 struct WriteSpec
diff --git a/src/Coordination/KeeperReconfiguration.cpp b/src/Coordination/KeeperReconfiguration.cpp
index 05211af6704..e3642913a7a 100644
--- a/src/Coordination/KeeperReconfiguration.cpp
+++ b/src/Coordination/KeeperReconfiguration.cpp
@@ -5,12 +5,6 @@
 
 namespace DB
 {
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
 ClusterUpdateActions joiningToClusterUpdates(const ClusterConfigPtr & cfg, std::string_view joining)
 {
     ClusterUpdateActions out;
@@ -85,7 +79,7 @@ String serializeClusterConfig(const ClusterConfigPtr & cfg, const ClusterUpdateA
             new_config.emplace_back(RaftServerConfig{*cfg->get_server(priority->id)});
         }
         else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected update");
+            UNREACHABLE();
     }
 
     for (const auto & item : cfg->get_servers())
diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp
index 736a01443ce..8d21ce2ab01 100644
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@@ -990,7 +990,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate(
         raft_instance->set_priority(update->id, update->priority, /*broadcast on live leader*/true);
         return Accepted;
     }
-    std::unreachable();
+    UNREACHABLE();
 }
 
 ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
diff --git a/src/Core/Field.h b/src/Core/Field.h
index 710614cd0a0..4424d669c4d 100644
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@@ -667,6 +667,8 @@ public:
             case Types::AggregateFunctionState: return f(field.template get<AggregateFunctionStateData>());
             case Types::CustomType: return f(field.template get<CustomType>());
         }
+
+        UNREACHABLE();
     }
 
     String dump() const;
diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp
index bbb1d1a6cd1..dbe27a5f3f6 100644
--- a/src/DataTypes/Serializations/ISerialization.cpp
+++ b/src/DataTypes/Serializations/ISerialization.cpp
@@ -36,6 +36,7 @@ String ISerialization::kindToString(Kind kind)
         case Kind::SPARSE:
             return "Sparse";
     }
+    UNREACHABLE();
 }
 
 ISerialization::Kind ISerialization::stringToKind(const String & str)
diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
index cb34f7932c3..3433698a162 100644
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h
@@ -140,6 +140,7 @@ private:
             case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
                 return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
         }
+        UNREACHABLE();
     }
 
     size_t first_offset = 0;
diff --git a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
index a37f4ce7e65..245578b5d9e 100644
--- a/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageTransactionState.cpp
@@ -17,6 +17,7 @@ std::string toString(MetadataStorageTransactionState state)
         case MetadataStorageTransactionState::PARTIALLY_ROLLED_BACK:
             return "PARTIALLY_ROLLED_BACK";
     }
+    UNREACHABLE();
 }
 
 }
diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp
index f8b9a57affe..d0e9d32ff5e 100644
--- a/src/Disks/VolumeJBOD.cpp
+++ b/src/Disks/VolumeJBOD.cpp
@@ -112,6 +112,7 @@ DiskPtr VolumeJBOD::getDisk(size_t /* index */) const
             return disks_by_size.top().disk;
         }
     }
+    UNREACHABLE();
 }
 
 ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
@@ -163,6 +164,7 @@ ReservationPtr VolumeJBOD::reserve(UInt64 bytes)
             return reservation;
         }
     }
+    UNREACHABLE();
 }
 
 bool VolumeJBOD::areMergesAvoided() const
diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp
index 9577ca2a8df..89a7a31d033 100644
--- a/src/Formats/EscapingRuleUtils.cpp
+++ b/src/Formats/EscapingRuleUtils.cpp
@@ -62,6 +62,7 @@ String escapingRuleToString(FormatSettings::EscapingRule escaping_rule)
         case FormatSettings::EscapingRule::Raw:
             return "Raw";
     }
+    UNREACHABLE();
 }
 
 void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule escaping_rule, const FormatSettings & format_settings)
diff --git a/src/Functions/FunctionsRound.h b/src/Functions/FunctionsRound.h
index dde57e8320d..99f3a14dfec 100644
--- a/src/Functions/FunctionsRound.h
+++ b/src/Functions/FunctionsRound.h
@@ -149,6 +149,8 @@ struct IntegerRoundingComputation
                 return x;
             }
         }
+
+        UNREACHABLE();
     }
 
     static ALWAYS_INLINE T compute(T x, T scale)
@@ -161,6 +163,8 @@ struct IntegerRoundingComputation
             case ScaleMode::Negative:
                 return computeImpl(x, scale);
         }
+
+        UNREACHABLE();
     }
 
     static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral<T>
@@ -243,6 +247,8 @@ inline float roundWithMode(float x, RoundingMode mode)
         case RoundingMode::Ceil: return ceilf(x);
         case RoundingMode::Trunc: return truncf(x);
     }
+
+    UNREACHABLE();
 }
 
 inline double roundWithMode(double x, RoundingMode mode)
@@ -254,6 +260,8 @@ inline double roundWithMode(double x, RoundingMode mode)
         case RoundingMode::Ceil: return ceil(x);
         case RoundingMode::Trunc: return trunc(x);
     }
+
+    UNREACHABLE();
 }
 
 template <typename T>
diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp
index f93a885ee65..1c9f28c9724 100644
--- a/src/Functions/FunctionsTimeWindow.cpp
+++ b/src/Functions/FunctionsTimeWindow.cpp
@@ -232,6 +232,7 @@ struct TimeWindowImpl<TUMBLE>
             default:
                 throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
         }
+        UNREACHABLE();
     }
 
     template <typename ToType, IntervalKind::Kind unit>
@@ -421,6 +422,7 @@ struct TimeWindowImpl<HOP>
             default:
                 throw Exception(ErrorCodes::SYNTAX_ERROR, "Fraction seconds are unsupported by windows yet");
         }
+        UNREACHABLE();
     }
 
     template <typename ToType, IntervalKind::Kind kind>
diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h
index 57f1243537d..c4851718da6 100644
--- a/src/Functions/PolygonUtils.h
+++ b/src/Functions/PolygonUtils.h
@@ -381,6 +381,8 @@ bool PointInPolygonWithGrid<CoordinateType>::contains(CoordinateType x, Coordina
         case CellType::complexPolygon:
             return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]);
     }
+
+    UNREACHABLE();
 }
 
 
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
index 766d63eafb0..568e0b9b5d2 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp
@@ -35,6 +35,7 @@ namespace
             case UserDefinedSQLObjectType::Function:
                 return "function_";
         }
+        UNREACHABLE();
     }
 
     constexpr std::string_view sql_extension = ".sql";
diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp
index 22913125e99..b8e1134d422 100644
--- a/src/IO/CompressionMethod.cpp
+++ b/src/IO/CompressionMethod.cpp
@@ -52,6 +52,7 @@ std::string toContentEncodingName(CompressionMethod method)
         case CompressionMethod::None:
             return "";
     }
+    UNREACHABLE();
 }
 
 CompressionMethod chooseHTTPCompressionMethod(const std::string & list)
diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h
index bbbb84dd6dd..73e52f2c503 100644
--- a/src/IO/HadoopSnappyReadBuffer.h
+++ b/src/IO/HadoopSnappyReadBuffer.h
@@ -88,6 +88,7 @@ public:
             case Status::TOO_LARGE_COMPRESSED_BLOCK:
                 return "TOO_LARGE_COMPRESSED_BLOCK";
         }
+        UNREACHABLE();
     }
 
     explicit HadoopSnappyReadBuffer(
diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp
index 8f82f15248f..87cfdda5948 100644
--- a/src/Interpreters/AggregatedDataVariants.cpp
+++ b/src/Interpreters/AggregatedDataVariants.cpp
@@ -117,6 +117,8 @@ size_t AggregatedDataVariants::size() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
@@ -134,6 +136,8 @@ size_t AggregatedDataVariants::sizeWithoutOverflowRow() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 const char * AggregatedDataVariants::getMethodName() const
@@ -151,6 +155,8 @@ const char * AggregatedDataVariants::getMethodName() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 bool AggregatedDataVariants::isTwoLevel() const
@@ -168,6 +174,8 @@ bool AggregatedDataVariants::isTwoLevel() const
         APPLY_FOR_AGGREGATED_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 bool AggregatedDataVariants::isConvertibleToTwoLevel() const
diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp
index 61a356fa3c3..9459029dc4c 100644
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@@ -799,6 +799,7 @@ String FileSegment::stateToString(FileSegment::State state)
         case FileSegment::State::DETACHED:
             return "DETACHED";
     }
+    UNREACHABLE();
 }
 
 bool FileSegment::assertCorrectness() const
diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp
index d53ff4b0227..4eacbae7a30 100644
--- a/src/Interpreters/ComparisonGraph.cpp
+++ b/src/Interpreters/ComparisonGraph.cpp
@@ -309,6 +309,7 @@ ComparisonGraphCompareResult ComparisonGraph<Node>::pathToCompareResult(Path pat
         case Path::GREATER: return inverse ? ComparisonGraphCompareResult::LESS : ComparisonGraphCompareResult::GREATER;
         case Path::GREATER_OR_EQUAL: return inverse ? ComparisonGraphCompareResult::LESS_OR_EQUAL : ComparisonGraphCompareResult::GREATER_OR_EQUAL;
     }
+    UNREACHABLE();
 }
 
 template <ComparisonGraphNodeType Node>
diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp
index aa489351a98..80fe1c3a8ef 100644
--- a/src/Interpreters/FilesystemCacheLog.cpp
+++ b/src/Interpreters/FilesystemCacheLog.cpp
@@ -26,6 +26,7 @@ static String typeToString(FilesystemCacheLogElement::CacheType type)
         case FilesystemCacheLogElement::CacheType::WRITE_THROUGH_CACHE:
             return "WRITE_THROUGH_CACHE";
     }
+    UNREACHABLE();
 }
 
 ColumnsDescription FilesystemCacheLogElement::getColumnsDescription()
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index 75da8bbc3e7..3a21c13db5e 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -705,6 +705,7 @@ namespace
             APPLY_FOR_JOIN_VARIANTS(M)
         #undef M
         }
+        UNREACHABLE();
     }
 }
 
@@ -2640,6 +2641,8 @@ private:
             default:
                 throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys (type: {})", parent.data->type);
         }
+
+        UNREACHABLE();
     }
 
     template <typename Map>
diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h
index a0996556f9a..86db8943926 100644
--- a/src/Interpreters/HashJoin.h
+++ b/src/Interpreters/HashJoin.h
@@ -322,6 +322,8 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
+
+            UNREACHABLE();
         }
 
         size_t getTotalByteCountImpl(Type which) const
@@ -336,6 +338,8 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
+
+            UNREACHABLE();
         }
 
         size_t getBufferSizeInCells(Type which) const
@@ -350,6 +354,8 @@ public:
                 APPLY_FOR_JOIN_VARIANTS(M)
             #undef M
             }
+
+            UNREACHABLE();
         }
 /// NOLINTEND(bugprone-macro-parentheses)
     };
diff --git a/src/Interpreters/InterpreterTransactionControlQuery.cpp b/src/Interpreters/InterpreterTransactionControlQuery.cpp
index 13872fbe3f5..d31ace758c4 100644
--- a/src/Interpreters/InterpreterTransactionControlQuery.cpp
+++ b/src/Interpreters/InterpreterTransactionControlQuery.cpp
@@ -33,6 +33,7 @@ BlockIO InterpreterTransactionControlQuery::execute()
         case ASTTransactionControl::SET_SNAPSHOT:
             return executeSetSnapshot(session_context, tcl.snapshot);
     }
+    UNREACHABLE();
 }
 
 BlockIO InterpreterTransactionControlQuery::executeBegin(ContextMutablePtr session_context)
diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp
index c600d096160..64796a013f1 100644
--- a/src/Interpreters/SetVariants.cpp
+++ b/src/Interpreters/SetVariants.cpp
@@ -41,6 +41,8 @@ size_t SetVariantsTemplate<Variant>::getTotalRowCount() const
         APPLY_FOR_SET_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 template <typename Variant>
@@ -55,6 +57,8 @@ size_t SetVariantsTemplate<Variant>::getTotalByteCount() const
         APPLY_FOR_SET_VARIANTS(M)
     #undef M
     }
+
+    UNREACHABLE();
 }
 
 template <typename Variant>
diff --git a/src/Parsers/ASTExplainQuery.h b/src/Parsers/ASTExplainQuery.h
index eb095b5dbbc..701bde8cebd 100644
--- a/src/Parsers/ASTExplainQuery.h
+++ b/src/Parsers/ASTExplainQuery.h
@@ -40,6 +40,8 @@ public:
             case TableOverride: return "EXPLAIN TABLE OVERRIDE";
             case CurrentTransaction: return "EXPLAIN CURRENT TRANSACTION";
         }
+
+        UNREACHABLE();
     }
 
     static ExplainKind fromString(const String & str)
diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp
index 5f2bd50524c..34855a7ce20 100644
--- a/src/Parsers/Lexer.cpp
+++ b/src/Parsers/Lexer.cpp
@@ -42,7 +42,7 @@ Token quotedString(const char *& pos, const char * const token_begin, const char
             continue;
         }
 
-        chassert(false);
+        UNREACHABLE();
     }
 }
 
@@ -538,6 +538,8 @@ const char * getTokenName(TokenType type)
 APPLY_FOR_TOKENS(M)
 #undef M
     }
+
+    UNREACHABLE();
 }
 
 
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index 6b7f1f5206c..98cbdeaaa4b 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -657,6 +657,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {:x} is not supported", object_ext.type());
         }
     }
+    UNREACHABLE();
 }
 
 std::optional<DataTypes> MsgPackSchemaReader::readRowAndGetDataTypes()
diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp
index 5ab5e5277aa..8b160153733 100644
--- a/src/Processors/IProcessor.cpp
+++ b/src/Processors/IProcessor.cpp
@@ -36,6 +36,8 @@ std::string IProcessor::statusToName(Status status)
         case Status::ExpandPipeline:
             return "ExpandPipeline";
     }
+
+    UNREACHABLE();
 }
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 24ea8c25fb6..6f0fa55c349 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1136,6 +1136,8 @@ static void addMergingFinal(
                 return std::make_shared<GraphiteRollupSortedTransform>(header, num_outputs,
                             sort_description, max_block_size_rows, /*max_block_size_bytes=*/0, merging_params.graphite_params, now);
         }
+
+        UNREACHABLE();
     };
 
     pipe.addTransform(get_merging_processor());
@@ -2123,6 +2125,8 @@ static const char * indexTypeToString(ReadFromMergeTree::IndexType type)
         case ReadFromMergeTree::IndexType::Skip:
             return "Skip";
     }
+
+    UNREACHABLE();
 }
 
 static const char * readTypeToString(ReadFromMergeTree::ReadType type)
@@ -2138,6 +2142,8 @@ static const char * readTypeToString(ReadFromMergeTree::ReadType type)
         case ReadFromMergeTree::ReadType::ParallelReplicas:
             return "Parallel";
     }
+
+    UNREACHABLE();
 }
 
 void ReadFromMergeTree::describeActions(FormatSettings & format_settings) const
diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp
index ac5e144bf4a..d1bd70fd0b2 100644
--- a/src/Processors/QueryPlan/TotalsHavingStep.cpp
+++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp
@@ -86,6 +86,8 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr
         case TotalsMode::AFTER_HAVING_AUTO:
             return "after_having_auto threshold " + std::to_string(auto_include_threshold);
     }
+
+    UNREACHABLE();
 }
 
 void TotalsHavingStep::describeActions(FormatSettings & settings) const
diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp
index bb38c3e1dc5..05fd2a7254f 100644
--- a/src/Processors/Transforms/FillingTransform.cpp
+++ b/src/Processors/Transforms/FillingTransform.cpp
@@ -67,6 +67,7 @@ static FillColumnDescription::StepFunction getStepFunction(
         FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
 #undef DECLARE_CASE
     }
+    UNREACHABLE();
 }
 
 static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & type)
diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp
index a1a886fb4f7..cdcfad4442c 100644
--- a/src/Processors/Transforms/buildPushingToViewsChain.cpp
+++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp
@@ -898,6 +898,8 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St
     {
         return std::current_exception();
     }
+
+    UNREACHABLE();
 }
 
 void FinalizingViewsTransform::work()
diff --git a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
index 0a69bf1109f..56a4378cf9a 100644
--- a/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
+++ b/src/Storages/MergeTree/BackgroundJobsAssignee.cpp
@@ -93,6 +93,7 @@ String BackgroundJobsAssignee::toString(Type type)
         case Type::Moving:
             return "Moving";
     }
+    UNREACHABLE();
 }
 
 void BackgroundJobsAssignee::start()
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 9666da574fb..bd8642b9f66 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -2964,6 +2964,8 @@ String KeyCondition::RPNElement::toString(std::string_view column_name, bool pri
         case ALWAYS_TRUE:
             return "true";
     }
+
+    UNREACHABLE();
 }
 
 
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index b6373a22d9c..4b3093eeaac 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -1177,6 +1177,8 @@ String MergeTreeData::MergingParams::getModeName() const
         case Graphite:      return "Graphite";
         case VersionedCollapsing: return "VersionedCollapsing";
     }
+
+    UNREACHABLE();
 }
 
 Int64 MergeTreeData::getMaxBlockNumber() const
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index df4087b8546..426e36ce9a9 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -360,6 +360,8 @@ Block MergeTreeDataWriter::mergeBlock(
                 return std::make_shared<GraphiteRollupSortedAlgorithm>(
                     block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr));
         }
+
+        UNREACHABLE();
     };
 
     auto merging_algorithm = get_merging_algorithm();
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
index 4228d7b70b6..78fcfabb704 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
@@ -616,6 +616,8 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
             }
         }
     }
+
+    UNREACHABLE();
 }
 
 void PartMovesBetweenShardsOrchestrator::removePins(const Entry & entry, zkutil::ZooKeeperPtr zk)
diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp
index 8bca1c97aad..a9ec1f6c694 100644
--- a/src/Storages/WindowView/StorageWindowView.cpp
+++ b/src/Storages/WindowView/StorageWindowView.cpp
@@ -297,6 +297,7 @@ namespace
             CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
         }
+        UNREACHABLE();
     }
 
     class AddingAggregatedChunkInfoTransform : public ISimpleTransform
@@ -919,6 +920,7 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec)
         CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
     }
+    UNREACHABLE();
 }
 
 UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
@@ -946,6 +948,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
         CASE_WINDOW_KIND(Year)
 #undef CASE_WINDOW_KIND
     }
+    UNREACHABLE();
 }
 
 void StorageWindowView::addFireSignal(std::set<UInt32> & signals)

From c42338b8e0e4a8239fb34001860c9dba091e926a Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 27 May 2024 11:51:46 +0200
Subject: [PATCH 384/392] Fix test

---
 src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 69485bd4d01..823e272cf01 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -578,6 +578,7 @@ void S3ObjectStorage::applyNewSettings(
     auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
     auto modified_settings = std::make_unique<S3ObjectStorageSettings>(*s3_settings.get());
     modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings);
+    modified_settings->request_settings = settings_from_config->request_settings;
 
     if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
         modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);

From 0676b155de8ebbea9cd9f8dcafdfe2dc8a03abfc Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 27 May 2024 12:12:39 +0200
Subject: [PATCH 385/392] Remove logging

---
 src/Storages/ObjectStorage/ReadBufferIterator.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index 5e89a0a1b9d..78cdc442f64 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -254,21 +254,17 @@ ReadBufferIterator::Data ReadBufferIterator::next()
             }
         }
 
-        LOG_TEST(getLogger("KSSENII"), "Will read columns from {}", current_object_info->getPath());
-
         std::unique_ptr<ReadBuffer> read_buf;
         CompressionMethod compression_method;
         using ObjectInfoInArchive = StorageObjectStorageSource::ArchiveIterator::ObjectInfoInArchive;
         if (const auto * object_info_in_archive = dynamic_cast<const ObjectInfoInArchive *>(current_object_info.get()))
         {
-            LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from archive", current_object_info->getPath());
             compression_method = chooseCompressionMethod(filename, configuration->compression_method);
             const auto & archive_reader = object_info_in_archive->archive_reader;
             read_buf = archive_reader->readFile(object_info_in_archive->path_in_archive, /*throw_on_not_found=*/true);
         }
         else
         {
-            LOG_TEST(getLogger("KSSENII"), "Will read columns from {} from s3", current_object_info->getPath());
             compression_method = chooseCompressionMethod(filename, configuration->compression_method);
             read_buf = object_storage->readObject(
                 StoredObject(current_object_info->getPath()),

From 2bffc72d64e62f9f5ddb177f4b617bcc6d2c6253 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Mon, 27 May 2024 10:57:26 +0000
Subject: [PATCH 386/392] Fix optimize_aggregation_in_order setting

---
 .../queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
index c433d409c7c..b8760ec0e1d 100755
--- a/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
+++ b/tests/queries/0_stateless/03039_dynamic_aggregating_merge_tree.sh
@@ -8,7 +8,7 @@ CLICKHOUSE_LOG_COMMENT=
 . "$CUR_DIR"/../shell_config.sh
 
 # Fix some settings to avoid timeouts because of some settings randomization
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128"
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_merge_tree_settings --allow_experimental_dynamic_type=1 --index_granularity_bytes 10485760 --index_granularity 8128 --merge_max_block_size 8128 --optimize_aggregation_in_order 0"
 
 function test()
 {

From ed6994d372b636b4981593303e8dfde654bc151b Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 27 May 2024 13:01:35 +0200
Subject: [PATCH 387/392] Clean settings in 02943_variant_read_subcolumns test

---
 tests/queries/0_stateless/02943_variant_read_subcolumns.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
index 6bbd127d933..5ca8dd5f36f 100755
--- a/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
+++ b/tests/queries/0_stateless/02943_variant_read_subcolumns.sh
@@ -7,8 +7,7 @@ CLICKHOUSE_LOG_COMMENT=
 # shellcheck source=../shell_config.sh
 . "$CUR_DIR"/../shell_config.sh
 
-CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1 --max_insert_threads 4 --group_by_two_level_threshold 752249 --group_by_two_level_threshold_bytes 15083870 --distributed_aggregation_memory_efficient 1 --fsync_metadata 1 --output_format_parallel_formatting 0 --input_format_parallel_parsing 0 --min_chunk_bytes_for_parallel_parsing 6583861 --max_read_buffer_size 640584 --prefer_localhost_replica 1 --max_block_size 38844 --max_threads 48 --optimize_append_index 0 --optimize_if_chain_to_multiif 1 --optimize_if_transform_strings_to_enum 0 --optimize_read_in_order 1 --optimize_or_like_chain 0 --optimize_substitute_columns 1 --enable_multiple_prewhere_read_steps 1 --read_in_order_two_level_merge_threshold 4 --optimize_aggregation_in_order 0 --aggregation_in_order_max_block_bytes 18284646 --use_uncompressed_cache 1 --min_bytes_to_use_direct_io 10737418240 --min_bytes_to_use_mmap_io 10737418240 --local_filesystem_read_method pread --remote_filesystem_read_method read --local_filesystem_read_prefetch 1 --filesystem_cache_segments_batch_size 0 --read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 --throw_on_error_from_cache_on_write_operations 1 --remote_filesystem_read_prefetch 0 --allow_prefetched_read_pool_for_remote_filesystem 0 --filesystem_prefetch_max_memory_usage 128Mi --filesystem_prefetches_limit 0 --filesystem_prefetch_min_bytes_for_single_read_task 16Mi --filesystem_prefetch_step_marks 50 --filesystem_prefetch_step_bytes 0 --compile_aggregate_expressions 1 --compile_sort_description 0 --merge_tree_coarse_index_granularity 31 --optimize_distinct_in_order 1 --max_bytes_before_external_sort 1 --max_bytes_before_external_group_by 1 --max_bytes_before_remerge_sort 2640239625 --min_compress_block_size 3114155 --max_compress_block_size 226550 --merge_tree_compact_parts_min_granules_to_multibuffer_read 118 --optimize_sorting_by_input_stream_properties 0 --http_response_buffer_size 543038 --http_wait_end_of_query False --enable_memory_bound_merging_of_aggregation_results 1 --min_count_to_compile_expression 3 --min_count_to_compile_aggregate_expression 3 --min_count_to_compile_sort_description 0 --session_timezone America/Mazatlan --prefer_warmed_unmerged_parts_seconds 8 --use_page_cache_for_disks_without_file_cache False --page_cache_inject_eviction True --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.82 "
-
+CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_suspicious_variant_types=1"
 
 function test()
 {

From 747f6ae39c98d2caac1ddd6f5958aecc7bb92e22 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nk@clickhouse.com>
Date: Mon, 27 May 2024 12:52:44 +0000
Subject: [PATCH 388/392] Add a comment after #64226

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 3fca66e6eb8..43edaaa53fd 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -3916,6 +3916,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
             return array_join_column;
         }
 
+        /// Resolve subcolumns. Example : SELECT x.y.z FROM tab ARRAY JOIN arr AS x
         auto compound_expr = tryResolveIdentifierFromCompoundExpression(
             identifier_lookup.identifier,
             identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/,

From 8f775037bfcf6e109ec4c79b5fd943f25789f240 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Mon, 27 May 2024 08:07:05 +0000
Subject: [PATCH 389/392] Address PR review

---
 src/Backups/BackupIO_S3.cpp                   | 17 +++++-----
 src/Disks/DiskEncrypted.h                     |  6 ++--
 src/Disks/IDisk.h                             | 12 ++++++-
 .../ObjectStorages/DiskObjectStorage.cpp      |  6 ++++
 src/Disks/ObjectStorages/DiskObjectStorage.h  |  6 ++++
 src/Disks/ObjectStorages/IObjectStorage.h     |  1 +
 .../ObjectStorages/S3/S3ObjectStorage.cpp     | 31 ++++++++++---------
 src/IO/S3/copyS3File.cpp                      | 15 +++++++--
 src/IO/S3/copyS3File.h                        |  4 +--
 .../test_backup_restore_s3/test.py            |  4 +++
 10 files changed, 72 insertions(+), 30 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index ee88556fbd6..be2f81a299c 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -188,6 +188,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 fs::path(s3_uri.key) / path_in_backup,
                 0,
                 file_size,
+                /* dest_s3_client= */ destination_disk->getObjectStorage()->getS3StorageClient(),
                 /* dest_bucket= */ blob_path[1],
                 /* dest_key= */ blob_path[0],
                 s3_settings.request_settings,
@@ -195,8 +196,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 blob_storage_log,
                 object_attributes,
                 threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupReaderS3"),
-                /* for_disk_s3= */ true,
-                destination_disk->getObjectStorage()->getS3StorageClient());
+                /* for_disk_s3= */ true);
 
             return file_size;
         };
@@ -258,15 +258,15 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
                 /* src_key= */ blob_path[0],
                 start_pos,
                 length,
-                s3_uri.bucket,
-                fs::path(s3_uri.key) / path_in_backup,
+                /* dest_s3_client= */ client,
+                /* dest_bucket= */ s3_uri.bucket,
+                /* dest_key= */ fs::path(s3_uri.key) / path_in_backup,
                 s3_settings.request_settings,
                 read_settings,
                 blob_storage_log,
                 {},
                 threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWriterS3"),
-                /*for_disk_s3=*/false,
-                client);
+                /*for_disk_s3=*/false);
             return; /// copied!
         }
     }
@@ -284,8 +284,9 @@ void BackupWriterS3::copyFile(const String & destination, const String & source,
         /* src_key= */ fs::path(s3_uri.key) / source,
         0,
         size,
-        s3_uri.bucket,
-        fs::path(s3_uri.key) / destination,
+        /* dest_s3_client= */ client,
+        /* dest_bucket= */ s3_uri.bucket,
+        /* dest_key= */ fs::path(s3_uri.key) / destination,
         s3_settings.request_settings,
         read_settings,
         blob_storage_log,
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 27cf3096344..9b575c65bce 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -350,10 +350,12 @@ public:
         return delegate;
     }
 
-    ObjectStoragePtr getObjectStorage() override
+#if USE_AWS_S3
+    std::shared_ptr<const S3::Client> getS3StorageClient() const override
     {
-        return delegate->getObjectStorage();
+        return delegate->getS3StorageClient();
     }
+#endif
 
 private:
     String wrappedPath(const String & path) const
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index b59e5b7f558..658acb01c74 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -14,7 +14,6 @@
 #include <Disks/DirectoryIterator.h>
 
 #include <memory>
-#include <mutex>
 #include <utility>
 #include <boost/noncopyable.hpp>
 #include <Poco/Timestamp.h>
@@ -471,6 +470,17 @@ public:
 
     virtual DiskPtr getDelegateDiskIfExists() const { return nullptr; }
 
+#if USE_AWS_S3
+    virtual std::shared_ptr<const S3::Client> getS3StorageClient() const
+    {
+        throw Exception(
+            ErrorCodes::NOT_IMPLEMENTED,
+            "Method getS3StorageClient() is not implemented for disk type: {}",
+            getDataSourceDescription().toString());
+    }
+#endif
+
+
 protected:
     friend class DiskDecorator;
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index abf0c1fad0b..5803a985000 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -582,6 +582,12 @@ UInt64 DiskObjectStorage::getRevision() const
     return metadata_helper->getRevision();
 }
 
+#if USE_AWS_S3
+std::shared_ptr<const S3::Client> DiskObjectStorage::getS3StorageClient() const
+{
+    return object_storage->getS3StorageClient();
+}
+#endif
 
 DiskPtr DiskObjectStorageReservation::getDisk(size_t i) const
 {
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index 2a27ddf89a7..ffef0a007da 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -6,6 +6,8 @@
 #include <Disks/ObjectStorages/IMetadataStorage.h>
 #include <Common/re2.h>
 
+#include "config.h"
+
 
 namespace CurrentMetrics
 {
@@ -210,6 +212,10 @@ public:
     bool supportsChmod() const override { return metadata_storage->supportsChmod(); }
     void chmod(const String & path, mode_t mode) override;
 
+#if USE_AWS_S3
+    std::shared_ptr<const S3::Client> getS3StorageClient() const override;
+#endif
+
 private:
 
     /// Create actual disk object storage transaction for operations
diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h
index c9f445b9a35..b49dc839561 100644
--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@@ -4,6 +4,7 @@
 #include <map>
 #include <mutex>
 #include <optional>
+#include <filesystem>
 
 #include <Poco/Timestamp.h>
 #include <Poco/Util/AbstractConfiguration.h>
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index 2e7bb6eeec9..12dda230b79 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -495,13 +495,14 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT
         try
         {
             copyS3File(
-                current_client,
-                uri.bucket,
-                object_from.remote_path,
-                0,
-                size,
-                dest_s3->uri.bucket,
-                object_to.remote_path,
+                /*src_s3_client=*/current_client,
+                /*src_bucket=*/uri.bucket,
+                /*src_key=*/object_from.remote_path,
+                /*src_offset=*/0,
+                /*src_size=*/size,
+                /*dest_s3_client=*/current_client,
+                /*dest_bucket=*/dest_s3->uri.bucket,
+                /*dest_key=*/object_to.remote_path,
                 settings_ptr->request_settings,
                 patchSettings(read_settings),
                 BlobStorageLogWriter::create(disk_name),
@@ -535,13 +536,15 @@ void S3ObjectStorage::copyObject( // NOLINT
     auto size = S3::getObjectSize(*current_client, uri.bucket, object_from.remote_path, {}, settings_ptr->request_settings);
     auto scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "S3ObjStor_copy");
 
-    copyS3File(current_client,
-        uri.bucket,
-        object_from.remote_path,
-        0,
-        size,
-        uri.bucket,
-        object_to.remote_path,
+    copyS3File(
+        /*src_s3_client=*/current_client,
+        /*src_bucket=*/uri.bucket,
+        /*src_key=*/object_from.remote_path,
+        /*src_offset=*/0,
+        /*src_size=*/size,
+        /*dest_s3_client=*/current_client,
+        /*dest_bucket=*/uri.bucket,
+        /*dest_key=*/object_to.remote_path,
         settings_ptr->request_settings,
         patchSettings(read_settings),
         BlobStorageLogWriter::create(disk_name),
diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp
index 8dc2e6c0e0d..24e14985758 100644
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@@ -654,7 +654,16 @@ namespace
             bool for_disk_s3_,
             BlobStorageLogWriterPtr blob_storage_log_,
             std::function<void()> fallback_method_)
-            : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, blob_storage_log_, getLogger("copyS3File"))
+            : UploadHelper(
+                client_ptr_,
+                dest_bucket_,
+                dest_key_,
+                request_settings_,
+                object_metadata_,
+                schedule_,
+                for_disk_s3_,
+                blob_storage_log_,
+                getLogger("copyS3File"))
             , src_bucket(src_bucket_)
             , src_key(src_key_)
             , offset(src_offset_)
@@ -869,6 +878,7 @@ void copyS3File(
     const String & src_key,
     size_t src_offset,
     size_t src_size,
+    std::shared_ptr<const S3::Client> dest_s3_client,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
@@ -876,8 +886,7 @@ void copyS3File(
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata,
     ThreadPoolCallbackRunnerUnsafe<void> schedule,
-    bool for_disk_s3,
-    std::shared_ptr<const S3::Client> dest_s3_client)
+    bool for_disk_s3)
 {
     if (!dest_s3_client)
         dest_s3_client = src_s3_client;
diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h
index cb1960cc368..85b3870ddbf 100644
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@@ -36,6 +36,7 @@ void copyS3File(
     const String & src_key,
     size_t src_offset,
     size_t src_size,
+    std::shared_ptr<const S3::Client> dest_s3_client,
     const String & dest_bucket,
     const String & dest_key,
     const S3Settings::RequestSettings & settings,
@@ -43,8 +44,7 @@ void copyS3File(
     BlobStorageLogWriterPtr blob_storage_log,
     const std::optional<std::map<String, String>> & object_metadata = std::nullopt,
     ThreadPoolCallbackRunnerUnsafe<void> schedule_ = {},
-    bool for_disk_s3 = false,
-    std::shared_ptr<const S3::Client> dest_s3_client = nullptr);
+    bool for_disk_s3 = false);
 
 /// Copies data from any seekable source to S3.
 /// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3
diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py
index a76b32bce39..967ed6a221c 100644
--- a/tests/integration/test_backup_restore_s3/test.py
+++ b/tests/integration/test_backup_restore_s3/test.py
@@ -28,6 +28,10 @@ node = cluster.add_instance(
 
 
 def setup_minio_users():
+    # create 2 extra users with restricted access
+    # miniorestricted1 - full access to bucket 'root', no access to other buckets
+    # miniorestricted2 - full access to bucket 'root2', no access to other buckets
+    # storage policy 'policy_s3_restricted' defines a policy for storing files inside bucket 'root' using 'miniorestricted1' user
     for user, bucket in [("miniorestricted1", "root"), ("miniorestricted2", "root2")]:
         print(
             cluster.exec_in_container(

From 8166da7fbb616d9fa2d779ffe8e533b238d3680e Mon Sep 17 00:00:00 2001
From: Blargian <shaunstruwig.ss@gmail.com>
Date: Mon, 27 May 2024 16:21:36 +0200
Subject: [PATCH 390/392] Incorporate review changes

---
 .../functions/type-conversion-functions.md    | 124 +++++-------------
 1 file changed, 30 insertions(+), 94 deletions(-)

diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 2360cecb9a5..c4e0b2946c4 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -998,7 +998,7 @@ Result:
 
 ## reinterpretAsUInt8
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt8. 
+Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1008,11 +1008,7 @@ reinterpretAsUInt8(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt8. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1040,7 +1036,7 @@ Result:
 
 ## reinterpretAsUInt16
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt16. 
+Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1050,11 +1046,7 @@ reinterpretAsUInt16(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt16. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1082,7 +1074,7 @@ Result:
 
 ## reinterpretAsUInt32
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt32. 
+Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
 
 **Syntax**
 
@@ -1092,11 +1084,7 @@ reinterpretAsUInt32(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt32. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1124,7 +1112,7 @@ Result:
 
 ## reinterpretAsUInt64
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt64. 
+Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1134,11 +1122,7 @@ reinterpretAsUInt64(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt64. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1166,7 +1150,7 @@ Result:
 
 ## reinterpretAsUInt128
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt128. 
+Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1176,11 +1160,7 @@ reinterpretAsUInt128(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt64. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1208,7 +1188,7 @@ Result:
 
 ## reinterpretAsUInt256
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type UInt256. 
+Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.  
 
 **Syntax**
 
@@ -1218,11 +1198,7 @@ reinterpretAsUInt256(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as UInt256. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as UInt256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1250,7 +1226,7 @@ Result:
 
 ## reinterpretAsInt8
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int8. 
+Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1260,11 +1236,7 @@ reinterpretAsInt8(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int8. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1292,7 +1264,7 @@ Result:
 
 ## reinterpretAsInt16
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int16. 
+Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.  
 
 **Syntax**
 
@@ -1302,11 +1274,7 @@ reinterpretAsInt16(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int16. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1334,7 +1302,7 @@ Result:
 
 ## reinterpretAsInt32
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int32. 
+Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1344,11 +1312,7 @@ reinterpretAsInt32(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int32. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1376,7 +1340,7 @@ Result:
 
 ## reinterpretAsInt64
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int64. 
+Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1386,11 +1350,7 @@ reinterpretAsInt64(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int64. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1418,7 +1378,7 @@ Result:
 
 ## reinterpretAsInt128
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int128. 
+Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1428,11 +1388,7 @@ reinterpretAsInt128(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int128. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1460,7 +1416,7 @@ Result:
 
 ## reinterpretAsInt256
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Int256. 
+Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1470,11 +1426,7 @@ reinterpretAsInt256(x)
 
 **Parameters**
 
-- `x`: value to byte reinterpret as Int256. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to byte reinterpret as Int256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1502,7 +1454,7 @@ Result:
 
 ## reinterpretAsFloat32
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float32. 
+Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1512,11 +1464,7 @@ reinterpretAsFloat32(x)
 
 **Parameters**
 
-- `x`: value to reinterpret as Float32. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to reinterpret as Float32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1540,7 +1488,7 @@ Result:
 
 ## reinterpretAsFloat64
 
-Performs byte reinterpretation similar to [reinterpret_cast](https://en.cppreference.com/w/cpp/language/reinterpret_cast) to type Float64. 
+Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. 
 
 **Syntax**
 
@@ -1550,11 +1498,7 @@ reinterpretAsFloat64(x)
 
 **Parameters**
 
-- `x`: value to reinterpret as Float64. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: value to reinterpret as Float64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1588,11 +1532,7 @@ reinterpretAsDate(x)
 
 **Parameters**
 
-- `x`: number of days since the beginning of the Unix Epoch. 
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: number of days since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 
@@ -1632,11 +1572,7 @@ reinterpretAsDateTime(x)
 
 **Parameters**
 
-- `x`: number of seconds since the beginning of the Unix Epoch.
-
-:::note
-Accepts types that can be interpreted as numeric such as [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md). Accepts [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
-:::
+- `x`: number of seconds since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
 
 **Returned value**
 

From 9eb79530f4b40d0f0dcef4ecd82da97e5136a4bf Mon Sep 17 00:00:00 2001
From: Max K <maxkaynov@gmail.com>
Date: Mon, 27 May 2024 17:35:42 +0200
Subject: [PATCH 391/392] CI: fix build_report selection in case of job reuse

---
 tests/ci/report.py | 50 +++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index 8676c998afb..670a10f4561 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -401,30 +401,40 @@ class BuildResult:
     @classmethod
     def load_any(cls, build_name: str, pr_number: int, head_ref: str):  # type: ignore
         """
-        loads report from suitable report file with the following priority:
-            1. report from PR with the same @pr_number
-            2. report from branch with the same @head_ref
-            3. report from the master
-            4. any other report
+        loads build report from one of all available report files (matching the job digest)
+        with the following priority:
+            1. report for the current PR @pr_number (might happen in PR' wf with or without job reuse)
+            2. report for the current branch @head_ref (might happen in release/master' wf with or without job reuse)
+            3. report for master branch (might happen in any workflow in case of job reuse)
+            4. any other report (job reuse from another PR, if master report is not available yet)
         """
-        reports = []
+        pr_report = None
+        ref_report = None
+        master_report = None
+        any_report = None
         for file in Path(REPORT_PATH).iterdir():
             if f"{build_name}.json" in file.name:
-                reports.append(file)
-        if not reports:
-            return None
-        file_path = None
-        for file in reports:
-            if pr_number and f"_{pr_number}_" in file.name:
-                file_path = file
-                break
-            if f"_{head_ref}_" in file.name:
-                file_path = file
-                break
+                any_report = file
             if "_master_" in file.name:
-                file_path = file
-                break
-        return cls.load_from_file(file_path or reports[-1])
+                master_report = file
+            elif f"_{head_ref}_" in file.name:
+                ref_report = file
+            elif pr_number and f"_{pr_number}_" in file.name:
+                pr_report = file
+
+        if not any_report:
+            return None
+
+        if pr_report:
+            file_path = pr_report
+        elif ref_report:
+            file_path = ref_report
+        elif master_report:
+            file_path = master_report
+        else:
+            file_path = any_report
+
+        return cls.load_from_file(file_path)
 
     @classmethod
     def load_from_file(cls, file: Union[Path, str]):  # type: ignore

From f610af56f7b8b1b2367420b7533b0262c3c8231d Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 28 May 2024 07:20:25 +0000
Subject: [PATCH 392/392] Fix

---
 src/Backups/BackupIO_S3.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index be2f81a299c..92f086295a0 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -188,7 +188,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
                 fs::path(s3_uri.key) / path_in_backup,
                 0,
                 file_size,
-                /* dest_s3_client= */ destination_disk->getObjectStorage()->getS3StorageClient(),
+                /* dest_s3_client= */ destination_disk->getS3StorageClient(),
                 /* dest_bucket= */ blob_path[1],
                 /* dest_key= */ blob_path[0],
                 s3_settings.request_settings,
@@ -253,7 +253,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
         {
             LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
             copyS3File(
-                src_disk->getObjectStorage()->getS3StorageClient(),
+                src_disk->getS3StorageClient(),
                 /* src_bucket */ blob_path[1],
                 /* src_key= */ blob_path[0],
                 start_pos,