diff --git a/.github/ISSUE_TEMPLATE/20_feature-request.md b/.github/ISSUE_TEMPLATE/20_feature-request.md
index cf5ac000a23..1e5ccdbb3b3 100644
--- a/.github/ISSUE_TEMPLATE/20_feature-request.md
+++ b/.github/ISSUE_TEMPLATE/20_feature-request.md
@@ -15,7 +15,7 @@ assignees: ''
 
 **Use case**
 
-> A clear and concise description of what is the intended usage scenario is.
+> A clear and concise description of what the intended usage scenario is.
 
 **Describe the solution you'd like**
 
diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake
index b2a648c7ca3..2bb6deb4847 100644
--- a/cmake/cpu_features.cmake
+++ b/cmake/cpu_features.cmake
@@ -11,6 +11,10 @@ option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries n
 if (ARCH_NATIVE)
     set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
 
+    # Populate the ENABLE_ option flags. This is required for the build of some third-party dependencies, specifically snappy, which
+    # (somewhat weirdly) expects the relative SNAPPY_HAVE_ preprocessor variables to be populated, in addition to the microarchitecture
+    # feature flags being enabled in the compiler. This fixes the ARCH_NATIVE flag by automatically populating the ENABLE_ option flags
+    # according to the current CPU's capabilities, detected using clang.
     if (ARCH_AMD64)
         execute_process(
             COMMAND sh -c "clang -E - -march=native -###"
diff --git a/cmake/cxx.cmake b/cmake/cxx.cmake
index 7d93bf05fc7..d2d8fa99100 100644
--- a/cmake/cxx.cmake
+++ b/cmake/cxx.cmake
@@ -1,4 +1,21 @@
-set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
+if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
+    # Enable libcxx debug mode: https://releases.llvm.org/15.0.0/projects/libcxx/docs/DesignDocs/DebugMode.html
+    # The docs say the debug mode violates complexity guarantees, so do this only for Debug builds.
+    # set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_DEBUG_MODE=1")
+    # ^^ Crashes the database upon startup, needs investigation.
+    #    Besides that, the implementation looks like a poor man's MSAN specific to libcxx. Since CI tests MSAN
+    #    anyways, we can keep the debug mode disabled.
+
+    # Libcxx also provides extra assertions:
+    # --> https://releases.llvm.org/15.0.0/projects/libcxx/docs/UsingLibcxx.html#assertions-mode
+    # These look orthogonal to the debug mode but the debug mode enables them implicitly:
+    # --> https://github.com/llvm/llvm-project/blob/release/15.x/libcxx/include/__assert#L29
+    # They are cheap and straightforward, so enable them in debug builds:
+    set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_ENABLE_ASSERTIONS=1")
+
+    # TODO Once we upgrade to LLVM 18+, reconsider all of the above as they introduced "hardening modes":
+    # https://libcxx.llvm.org/Hardening.html
+endif ()
 
 add_subdirectory(contrib/libcxxabi-cmake)
 add_subdirectory(contrib/libcxx-cmake)
diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt
index 4137547b736..1d40381a973 100644
--- a/contrib/abseil-cpp-cmake/CMakeLists.txt
+++ b/contrib/abseil-cpp-cmake/CMakeLists.txt
@@ -1,6 +1,9 @@
 set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
 set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
 
+# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
+set(CMAKE_CXX_STANDARD 17)
+
 # This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
 
 #
diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt
index f1a744f851f..9df9d3e0026 100644
--- a/contrib/google-protobuf-cmake/CMakeLists.txt
+++ b/contrib/google-protobuf-cmake/CMakeLists.txt
@@ -5,6 +5,9 @@ if(NOT ENABLE_PROTOBUF)
   return()
 endif()
 
+# To avoid errors "'X' does not refer to a value" while using `offsetof` function.
+set(CMAKE_CXX_STANDARD 17)
+
 set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src")
 if(OS_FREEBSD AND SANITIZE STREQUAL "address")
   # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found
diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt
index 975774d1990..6dfa9a39583 100644
--- a/contrib/grpc-cmake/CMakeLists.txt
+++ b/contrib/grpc-cmake/CMakeLists.txt
@@ -6,6 +6,8 @@ if(NOT ENABLE_GRPC)
   return()
 endif()
 
+set(CMAKE_CXX_STANDARD 17)
+
 set(_gRPC_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc")
 set(_gRPC_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/grpc")
 
diff --git a/contrib/grpc-cmake/grpc.cmake b/contrib/grpc-cmake/grpc.cmake
index 39645938f14..83add88f38c 100644
--- a/contrib/grpc-cmake/grpc.cmake
+++ b/contrib/grpc-cmake/grpc.cmake
@@ -22,7 +22,7 @@
 # limitations under the License.
 
 # We want to use C++23, but GRPC is not ready
-set (CMAKE_CXX_STANDARD 20)
+set (CMAKE_CXX_STANDARD 17)
 
 set(_gRPC_ZLIB_INCLUDE_DIR "")
 set(_gRPC_ZLIB_LIBRARIES ch_contrib::zlib)
diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile
index dd0ab925431..4d12bb5b135 100644
--- a/docker/docs/builder/Dockerfile
+++ b/docker/docs/builder/Dockerfile
@@ -9,7 +9,7 @@ RUN CGO_ENABLED=0 go install github.com/wjdp/htmltest@v${HTMLTEST_VERSION} \
 # nodejs 17 prefers ipv6 and is broken in our environment
 FROM node:16-alpine
 
-RUN apk add --no-cache git openssh bash
+RUN apk add --no-cache git openssh bash curl
 
 # At this point we want to really update /opt/clickhouse-docs directory
 # So we reset the cache
@@ -33,4 +33,7 @@ RUN mkdir /output_path \
 COPY run.sh /run.sh
 COPY --from=htmltest-builder /usr/bin/htmltest /usr/bin/htmltest
 
+# Install ClickHouse Local, which is used to auto-generate some doc pages.
+RUN curl https://clickhouse.com/ | sh
+
 ENTRYPOINT ["/run.sh"]
diff --git a/docker/docs/builder/run.sh b/docker/docs/builder/run.sh
index d73adb5d279..77df10cbdbb 100755
--- a/docker/docs/builder/run.sh
+++ b/docker/docs/builder/run.sh
@@ -21,6 +21,78 @@ do
   fi
 done
 
+# Generate pages with settings
+
+./clickhouse -q "
+WITH
+
+'/ClickHouse/src/Core/Settings.cpp' AS cpp_file,
+
+settings_from_cpp AS
+(
+    SELECT extract(line, 'M\\(\\w+, (\\w+),') AS name
+    FROM file(cpp_file, LineAsString)
+    WHERE match(line, '^\\s*M\\(')
+),
+
+main_content AS
+(
+    SELECT format('## {} {}\\n\\nType: {}\\n\\nDefault value: {}\\n\\n{}\\n\\n', name, '{#'||name||'}', type, default, trim(BOTH '\\n' FROM description))
+    FROM system.settings WHERE name IN settings_from_cpp
+    ORDER BY name
+),
+
+'---
+sidebar_label: Core Settings
+sidebar_position: 2
+slug: /en/operations/settings/settings
+toc_max_heading_level: 2
+---
+
+# Core Settings
+
+All below settings are also available in table [system.settings](/docs/en/operations/system-tables/settings).
+
+' AS prefix
+
+SELECT prefix || (SELECT groupConcat(*) FROM main_content)
+INTO OUTFILE '/opt/clickhouse-docs/docs/en/operations/settings/settings.md' TRUNCATE FORMAT LineAsString
+"
+
+./clickhouse -q "
+WITH
+
+'/ClickHouse/src/Core/FormatFactorySettingsDeclaration.h' AS cpp_file,
+
+settings_from_cpp AS
+(
+    SELECT extract(line, 'M\\(\\w+, (\\w+),') AS name
+    FROM file(cpp_file, LineAsString)
+    WHERE match(line, '^\\s*M\\(')
+),
+
+main_content AS
+(
+    SELECT format('## {} {}\\n\\nType: {}\\n\\nDefault value: {}\\n\\n{}\\n\\n', name, '{#'||name||'}', type, default, trim(BOTH '\\n' FROM description))
+    FROM system.settings WHERE name IN settings_from_cpp
+    ORDER BY name
+),
+
+'---
+sidebar_label: Format Settings
+sidebar_position: 52
+slug: /en/operations/settings/formats
+toc_max_heading_level: 2
+---
+
+# Format settings {#format-settings}
+
+' AS prefix
+
+SELECT prefix || (SELECT groupConcat(*) FROM main_content)
+INTO OUTFILE '/opt/clickhouse-docs/docs/en/operations/settings/settings-formats.md' TRUNCATE FORMAT LineAsString
+"
+
 # Force build error on wrong symlinks
 sed -i '/onBrokenMarkdownLinks:/ s/ignore/error/g' docusaurus.config.js
 
diff --git a/docker/test/integration/runner/misc/openldap/initialized.sh b/docker/test/integration/runner/misc/openldap/initialized.sh
new file mode 100755
index 00000000000..61ed1ff3e57
--- /dev/null
+++ b/docker/test/integration/runner/misc/openldap/initialized.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -e
+
+# workaround for https://github.com/bitnami/containers/issues/73310
+touch /tmp/.openldap-initialized
diff --git a/docs/README.md b/docs/README.md
index d1260312166..42fdda88732 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -196,7 +196,6 @@ When writing docs, you can use prepared templates. Copy the code of a template a
 Templates:
 
 - [Function](_description_templates/template-function.md)
-- [Setting](_description_templates/template-setting.md)
 - [Server Setting](_description_templates/template-server-setting.md)
 - [Database or Table engine](_description_templates/template-engine.md)
 - [System table](_description_templates/template-system-table.md)
diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md
deleted file mode 100644
index f4525d872df..00000000000
--- a/docs/_description_templates/template-setting.md
+++ /dev/null
@@ -1,27 +0,0 @@
-## setting_name {#setting_name}
-
-Description.
-
-For the switch setting, use the typical phrase: “Enables or disables something ...”.
-
-Possible values:
-
-*For switcher setting:*
-
--   0 — Disabled.
--   1 — Enabled.
-
-*For another setting (typical phrases):*
-
--   Positive integer.
--   0 — Disabled or unlimited or something else.
-
-Default value: `value`.
-
-**Additional Info** (Optional)
-
-The name of an additional section can be any, for example, **Usage**.
-
-**See Also** (Optional)
-
--   [link](#)
diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh
deleted file mode 100644
index 0daf12a132f..00000000000
--- a/docs/_includes/install/deb.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-sudo apt-get install -y apt-transport-https ca-certificates dirmngr
-sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
-
-echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
-    /etc/apt/sources.list.d/clickhouse.list
-sudo apt-get update
-
-sudo apt-get install -y clickhouse-server clickhouse-client
-
-sudo service clickhouse-server start
-clickhouse-client # or "clickhouse-client --password" if you've set up a password.
diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh
deleted file mode 100644
index ff99018f872..00000000000
--- a/docs/_includes/install/rpm.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-sudo yum install -y yum-utils
-sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
-sudo yum install -y clickhouse-server clickhouse-client
-
-sudo /etc/init.d/clickhouse-server start
-clickhouse-client # or "clickhouse-client --password" if you set up a password.
diff --git a/docs/_includes/install/tgz.sh b/docs/_includes/install/tgz.sh
deleted file mode 100644
index e42353f7b48..00000000000
--- a/docs/_includes/install/tgz.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \
-    grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1)
-export LATEST_VERSION
-
-case $(uname -m) in
-  x86_64) ARCH=amd64 ;;
-  aarch64) ARCH=arm64 ;;
-  *) echo "Unknown architecture $(uname -m)"; exit 1 ;;
-esac
-
-for PKG in clickhouse-common-static clickhouse-common-static-dbg clickhouse-server clickhouse-client
-do
-  curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION-${ARCH}.tgz" \
-    || curl -fO "https://packages.clickhouse.com/tgz/stable/$PKG-$LATEST_VERSION.tgz"
-done
-
-tar -xzvf "clickhouse-common-static-$LATEST_VERSION-${ARCH}.tgz" \
-  || tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz"
-sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh"
-
-tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION-${ARCH}.tgz" \
-  || tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz"
-sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh"
-
-tar -xzvf "clickhouse-server-$LATEST_VERSION-${ARCH}.tgz" \
-  || tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz"
-sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" configure
-sudo /etc/init.d/clickhouse-server start
-
-tar -xzvf "clickhouse-client-$LATEST_VERSION-${ARCH}.tgz" \
-  || tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz"
-sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh"
diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh
index d474aa98e76..7a7b948dbf0 100755
--- a/docs/_includes/install/universal.sh
+++ b/docs/_includes/install/universal.sh
@@ -14,7 +14,12 @@ then
         HAS_SSE42=$(grep sse4_2 /proc/cpuinfo)
         if [ "${HAS_SSE42}" ]
         then
-            DIR="amd64"
+            if ldd --version 2>&1 | grep -q musl
+            then
+                DIR="amd64musl"
+            else
+                DIR="amd64"
+            fi
         else
             DIR="amd64compat"
         fi
diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md
index bdf96832e9d..bb1349ad9d0 100644
--- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md
+++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md
@@ -63,7 +63,34 @@ Currently there are 3 ways to authenticate:
 - `SAS Token` - Can be used by providing an `endpoint`, `connection_string` or `storage_account_url`. It is identified by presence of '?' in the url.
 - `Workload Identity` - Can be used by providing an `endpoint` or `storage_account_url`. If `use_workload_identity` parameter is set in config, ([workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications)) is used for authentication.
 
+### Data cache {#data-cache}
 
+`Azure` table engine supports data caching on local disk.
+See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
+Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
+
+To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
+
+```sql
+SELECT *
+FROM azureBlobStorage('DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://azurite1:10000/devstoreaccount1/;', 'test_container', 'test_table', 'CSV')
+SETTINGS filesystem_cache_name = 'cache_for_azure', enable_filesystem_cache = 1;
+```
+
+1. add the following section to clickhouse configuration file:
+
+``` xml
+<clickhouse>
+    <filesystem_caches>
+        <cache_for_azure>
+            <path>path to cache directory</path>
+            <max_size>10Gi</max_size>
+        </cache_for_azure>
+    </filesystem_caches>
+</clickhouse>
+```
+
+2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
 
 ## See also
 
diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
index 964c952f31a..fb564b4873e 100644
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -48,6 +48,10 @@ Using named collections:
 CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table')
 ```
 
+### Data cache {#data-cache}
+
+`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
+
 ## See also
 
 - [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md
index 86980733dcf..1a1d31e238b 100644
--- a/docs/en/engines/table-engines/integrations/iceberg.md
+++ b/docs/en/engines/table-engines/integrations/iceberg.md
@@ -63,6 +63,10 @@ CREATE TABLE iceberg_table ENGINE=IcebergS3(iceberg_conf, filename = 'test_table
 
 Table engine `Iceberg` is an alias to `IcebergS3` now.
 
+### Data cache {#data-cache}
+
+`Iceberg` table engine and table function support data caching same as `S3`, `AzureBlobStorage`, `HDFS` storages. See [here](../../../engines/table-engines/integrations/s3.md#data-cache).
+
 ## See also
 
 - [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md)
diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md
index 63da4be615d..834b57757b9 100644
--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@@ -9,7 +9,7 @@ sidebar_label: MongoDB
 MongoDB engine is read-only table engine which allows to read data from remote [MongoDB](https://www.mongodb.com/) collection.
 
 Only MongoDB v3.6+ servers are supported.
-[Seed list(`mongodb**+srv**`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported.
+[Seed list(`mongodb+srv`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported.
 
 :::note
 If you're facing troubles, please report the issue, and try to use [the legacy implementation](../../../operations/server-configuration-parameters/settings.md#use_legacy_mongodb_integration).
diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md
index e9d0a43242b..3522bd57cc3 100644
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@@ -4,12 +4,8 @@ sidebar_position: 138
 sidebar_label: MySQL
 ---
 
-import CloudAvailableBadge from '@theme/badges/CloudAvailableBadge';
-
 # MySQL Table Engine
 
-<CloudAvailableBadge />
-
 The MySQL engine allows you to perform `SELECT` and `INSERT` queries on data that is stored on a remote MySQL server.
 
 ## Creating a Table {#creating-a-table}
diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md
index f02d0563491..fb759b948a5 100644
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@@ -26,6 +26,7 @@ SELECT * FROM s3_engine_table LIMIT 2;
 │ two  │     2 │
 └──────┴───────┘
 ```
+
 ## Create Table {#creating-a-table}
 
 ``` sql
@@ -43,6 +44,37 @@ CREATE TABLE s3_engine_table (name String, value UInt32)
 - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
 - `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension.
 
+### Data cache {#data-cache}
+
+`S3` table engine supports data caching on local disk.
+See filesystem cache configuration options and usage in this [section](/docs/en/operations/storing-data.md/#using-local-cache).
+Caching is made depending on the path and ETag of the storage object, so clickhouse will not read a stale cache version.
+
+To enable caching use a setting `filesystem_cache_name = '<name>'` and `enable_filesystem_cache = 1`.
+
+```sql
+SELECT *
+FROM s3('http://minio:10000/clickhouse//test_3.csv', 'minioadmin', 'minioadminpassword', 'CSV')
+SETTINGS filesystem_cache_name = 'cache_for_s3', enable_filesystem_cache = 1;
+```
+
+There are two ways to define cache in configuration file.
+
+1. add the following section to clickhouse configuration file:
+
+``` xml
+<clickhouse>
+    <filesystem_caches>
+        <cache_for_s3>
+            <path>path to cache directory</path>
+            <max_size>10Gi</max_size>
+        </cache_for_s3>
+    </filesystem_caches>
+</clickhouse>
+```
+
+2. reuse cache configuration (and therefore cache storage) from clickhouse `storage_configuration` section, [described here](/docs/en/operations/storing-data.md/#using-local-cache)
+
 ### PARTITION BY
 
 `PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).
diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md
index fe60ceedc0b..48078197309 100644
--- a/docs/en/operations/opentelemetry.md
+++ b/docs/en/operations/opentelemetry.md
@@ -31,6 +31,10 @@ The table must be enabled in the server configuration, see the `opentelemetry_sp
 
 The tags or attributes are saved as two parallel arrays, containing the keys and values. Use [ARRAY JOIN](../sql-reference/statements/select/array-join.md) to work with them.
 
+## Log-query-settings
+
+ClickHouse allows you to log changes to query settings during query execution. When enabled, any modifications made to query settings will be recorded in the OpenTelemetry span log. This feature is particularly useful in production environments for tracking configuration changes that may affect query performance.
+
 ## Integration with monitoring systems
 
 At the moment, there is no ready tool that can export the tracing data from ClickHouse to a monitoring system.
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
deleted file mode 100644
index 885530bcacd..00000000000
--- a/docs/en/operations/settings/settings-formats.md
+++ /dev/null
@@ -1,2323 +0,0 @@
----
-sidebar_label: Format Settings
-sidebar_position: 52
-slug: /en/operations/settings/formats
-toc_max_heading_level: 2
----
-
-# Format settings {#format-settings}
-
-## bool_false_representation {#bool_false_representation}
-
-Type: String
-
-Default value: false
-
-Text to represent false bool value in TSV/CSV/Vertical/Pretty formats.
-
-## bool_true_representation {#bool_true_representation}
-
-Type: String
-
-Default value: true
-
-Text to represent true bool value in TSV/CSV/Vertical/Pretty formats.
-
-## column_names_for_schema_inference {#column_names_for_schema_inference}
-
-Type: String
-
-Default value: 
-
-The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'
-
-## cross_to_inner_join_rewrite {#cross_to_inner_join_rewrite}
-
-Type: UInt64
-
-Default value: 1
-
-Use inner join instead of comma/cross join if there are joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible
-
-## date_time_input_format {#date_time_input_format}
-
-Type: DateTimeInputFormat
-
-Default value: basic
-
-Allows choosing a parser of the text representation of date and time.
-
-The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md).
-
-Possible values:
-
-- `'best_effort'` — Enables extended parsing.
-
-    ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`.
-
-- `'basic'` — Use basic parser.
-
-    ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`.
-
-Cloud default value: `'best_effort'`.
-
-See also:
-
-- [DateTime data type.](../../sql-reference/data-types/datetime.md)
-- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
-
-## date_time_output_format {#date_time_output_format}
-
-Type: DateTimeOutputFormat
-
-Default value: simple
-
-Allows choosing different output formats of the text representation of date and time.
-
-Possible values:
-
-- `simple` - Simple output format.
-
-    ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone.
-
-- `iso` - ISO output format.
-
-    ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC).
-
-- `unix_timestamp` - Unix timestamp output format.
-
-    ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`.
-
-See also:
-
-- [DateTime data type.](../../sql-reference/data-types/datetime.md)
-- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
-
-## date_time_overflow_behavior {#date_time_overflow_behavior}
-
-Type: DateTimeOverflowBehavior
-
-Default value: ignore
-
-Overflow mode for Date, Date32, DateTime, DateTime64 types. Possible values: 'ignore', 'throw', 'saturate'.
-
-## dictionary_use_async_executor {#dictionary_use_async_executor}
-
-Type: Bool
-
-Default value: 0
-
-Execute a pipeline for reading dictionary source in several threads. It's supported only by dictionaries with local CLICKHOUSE source.
-
-## errors_output_format {#errors_output_format}
-
-Type: String
-
-Default value: CSV
-
-Method to write Errors to text output.
-
-## exact_rows_before_limit {#exact_rows_before_limit}
-
-Type: Bool
-
-Default value: 0
-
-When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely
-
-## format_avro_schema_registry_url {#format_avro_schema_registry_url}
-
-Type: URI
-
-Default value: 
-
-For AvroConfluent format: Confluent Schema Registry URL.
-
-## format_binary_max_array_size {#format_binary_max_array_size}
-
-Type: UInt64
-
-Default value: 1073741824
-
-The maximum allowed size for Array in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit
-
-## format_binary_max_string_size {#format_binary_max_string_size}
-
-Type: UInt64
-
-Default value: 1073741824
-
-The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit
-
-## format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode}
-
-Type: CapnProtoEnumComparingMode
-
-Default value: by_values
-
-How to map ClickHouse Enum and CapnProto Enum
-
-## format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema}
-
-Type: Bool
-
-Default value: 1
-
-Use autogenerated CapnProto schema when format_schema is not set
-
-## format_csv_allow_double_quotes {#format_csv_allow_double_quotes}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, allow strings in double quotes.
-
-## format_csv_allow_single_quotes {#format_csv_allow_single_quotes}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, allow strings in single quotes.
-
-## format_csv_delimiter {#format_csv_delimiter}
-
-Type: Char
-
-Default value: ,
-
-The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.
-
-## format_csv_null_representation {#format_csv_null_representation}
-
-Type: String
-
-Default value: \N
-
-Custom NULL representation in CSV format
-
-## format_custom_escaping_rule {#format_custom_escaping_rule}
-
-Type: EscapingRule
-
-Default value: Escaped
-
-Field escaping rule (for CustomSeparated format)
-
-## format_custom_field_delimiter {#format_custom_field_delimiter}
-
-Type: String
-
-Default value: 	
-
-Delimiter between fields (for CustomSeparated format)
-
-## format_custom_result_after_delimiter {#format_custom_result_after_delimiter}
-
-Type: String
-
-Default value: 
-
-Suffix after result set (for CustomSeparated format)
-
-## format_custom_result_before_delimiter {#format_custom_result_before_delimiter}
-
-Type: String
-
-Default value: 
-
-Prefix before result set (for CustomSeparated format)
-
-## format_custom_row_after_delimiter {#format_custom_row_after_delimiter}
-
-Type: String
-
-Default value: 
-
-
-Delimiter after field of the last column (for CustomSeparated format)
-
-## format_custom_row_before_delimiter {#format_custom_row_before_delimiter}
-
-Type: String
-
-Default value: 
-
-Delimiter before field of the first column (for CustomSeparated format)
-
-## format_custom_row_between_delimiter {#format_custom_row_between_delimiter}
-
-Type: String
-
-Default value: 
-
-Delimiter between rows (for CustomSeparated format)
-
-## format_display_secrets_in_show_and_select {#format_display_secrets_in_show_and_select}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables showing secrets in `SHOW` and `SELECT` queries for tables, databases,
-table functions, and dictionaries.
-
-User wishing to see secrets must also have
-[`display_secrets_in_show_and_select` server setting](../server-configuration-parameters/settings#display_secrets_in_show_and_select)
-turned on and a
-[`displaySecretsInShowAndSelect`](../../sql-reference/statements/grant#display-secrets) privilege.
-
-Possible values:
-
--   0 — Disabled.
--   1 — Enabled.
-
-## format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name}
-
-Type: String
-
-Default value: 
-
-The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format.
-Column type should be String. If value is empty, default names `row_{i}`will be used for object names.
-
-### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
-
-Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats.
-Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
-
-Disabled by default.
-
-### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters}
-
-When enabled, escape special characters in Markdown.
-
-[Common Mark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped by \:
-
-```
-! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
-```
-
-Possible values:
-
-+ 0 — Disable.
-+ 1 — Enable.
-
-### input_format_json_empty_as_default {#input_format_json_empty_as_default}
-
-When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
-
-Possible values:
-
-+ 0 — Disable.
-+ 1 — Enable.
-
-## format_protobuf_use_autogenerated_schema {#format_protobuf_use_autogenerated_schema}
-
-Type: Bool
-
-Default value: 1
-
-Use autogenerated Protobuf when format_schema is not set
-
-## format_regexp {#format_regexp}
-
-Type: String
-
-Default value: 
-
-Regular expression (for Regexp format)
-
-## format_regexp_escaping_rule {#format_regexp_escaping_rule}
-
-Type: EscapingRule
-
-Default value: Raw
-
-Field escaping rule (for Regexp format)
-
-## format_regexp_skip_unmatched {#format_regexp_skip_unmatched}
-
-Type: Bool
-
-Default value: 0
-
-Skip lines unmatched by regular expression (for Regexp format)
-
-## format_schema {#format_schema}
-
-Type: String
-
-Default value: 
-
-This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format.
-
-## format_template_resultset {#format_template_resultset}
-
-Type: String
-
-Default value: 
-
-Path to file which contains format string for result set (for Template format)
-
-## format_template_resultset_format {#format_template_resultset_format}
-
-Type: String
-
-Default value: 
-
-Format string for result set (for Template format)
-
-## format_template_row {#format_template_row}
-
-Type: String
-
-Default value: 
-
-Path to file which contains format string for rows (for Template format)
-
-## format_template_row_format {#format_template_row_format}
-
-Type: String
-
-Default value: 
-
-Format string for rows (for Template format)
-
-## format_template_rows_between_delimiter {#format_template_rows_between_delimiter}
-
-Type: String
-
-Default value: 
-
-
-Delimiter between rows (for Template format)
-
-## format_tsv_null_representation {#format_tsv_null_representation}
-
-Type: String
-
-Default value: \N
-
-Custom NULL representation in TSV format
-
-## input_format_allow_errors_num {#input_format_allow_errors_num}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.).
-
-The default value is 0.
-
-Always pair it with `input_format_allow_errors_ratio`.
-
-If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one.
-
-If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception.
-
-## input_format_allow_errors_ratio {#input_format_allow_errors_ratio}
-
-Type: Float
-
-Default value: 0
-
-Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.).
-The percentage of errors is set as a floating-point number between 0 and 1.
-
-The default value is 0.
-
-Always pair it with `input_format_allow_errors_num`.
-
-If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one.
-
-If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception.
-
-## input_format_allow_seeks {#input_format_allow_seeks}
-
-Type: Bool
-
-Default value: 1
-
-Allow seeks while reading in ORC/Parquet/Arrow input formats.
-
-Enabled by default.
-
-## input_format_arrow_allow_missing_columns {#input_format_arrow_allow_missing_columns}
-
-Type: Bool
-
-Default value: 1
-
-Allow missing columns while reading Arrow input formats
-
-## input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching}
-
-Type: Bool
-
-Default value: 0
-
-Ignore case when matching Arrow columns with CH columns.
-
-## input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference {#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip columns with unsupported types while schema inference for format Arrow
-
-## input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
-
-Type: Bool
-
-Default value: 0
-
-For Avro/AvroConfluent format: when field is not found in schema use default value instead of error
-
-## input_format_avro_null_as_default {#input_format_avro_null_as_default}
-
-Type: Bool
-
-Default value: 0
-
-For Avro/AvroConfluent format: insert default in case of null and non Nullable column
-
-## input_format_binary_decode_types_in_binary_format {#input_format_binary_decode_types_in_binary_format}
-
-Type: Bool
-
-Default value: 0
-
-Read data types in binary format instead of type names in RowBinaryWithNamesAndTypes input format
-
-### input_format_binary_read_json_as_string {#input_format_binary_read_json_as_string}
-
-Type: Bool
-
-Default value: 0
-
-Read values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary input format.
-
-## input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip fields with unsupported types while schema inference for format BSON.
-
-## input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference {#input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip columns with unsupported types while schema inference for format CapnProto
-
-## input_format_csv_allow_cr_end_of_line {#input_format_csv_allow_cr_end_of_line}
-
-Type: Bool
-
-Default value: 0
-
-If it is set true, \\r will be allowed at end of line not followed by \\n
-
-## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
-
-Type: Bool
-
-Default value: 0
-
-Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values
-
-## input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
-
-Type: Bool
-
-Default value: 0
-
-Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings
-
-## input_format_csv_arrays_as_nested_csv {#input_format_csv_arrays_as_nested_csv}
-
-Type: Bool
-
-Default value: 0
-
-When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: \"[\"\"Hello\"\", \"\"world\"\", \"\"42\"\"\"\" TV\"\"]\". Braces around array can be omitted.
-
-## input_format_csv_deserialize_separate_columns_into_tuple {#input_format_csv_deserialize_separate_columns_into_tuple}
-
-Type: Bool
-
-Default value: 1
-
-If it set to true, then separate columns written in CSV format can be deserialized to Tuple column.
-
-## input_format_csv_detect_header {#input_format_csv_detect_header}
-
-Type: Bool
-
-Default value: 1
-
-Automatically detect header with names and types in CSV format
-
-## input_format_csv_empty_as_default {#input_format_csv_empty_as_default}
-
-Type: Bool
-
-Default value: 1
-
-Treat empty fields in CSV input as default values.
-
-## input_format_csv_enum_as_number {#input_format_csv_enum_as_number}
-
-Type: Bool
-
-Default value: 0
-
-Treat inserted enum values in CSV formats as enum indices
-
-## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines}
-
-Type: UInt64
-
-Default value: 0
-
-Skip specified number of lines at the beginning of data in CSV format
-
-## input_format_csv_skip_trailing_empty_lines {#input_format_csv_skip_trailing_empty_lines}
-
-Type: Bool
-
-Default value: 0
-
-Skip trailing empty lines in CSV format
-
-## input_format_csv_trim_whitespaces {#input_format_csv_trim_whitespaces}
-
-Type: Bool
-
-Default value: 1
-
-Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings
-
-## input_format_csv_try_infer_numbers_from_strings {#input_format_csv_try_infer_numbers_from_strings}
-
-Type: Bool
-
-Default value: 0
-
-If enabled, during schema inference ClickHouse will try to infer numbers from string fields.
-It can be useful if CSV data contains quoted UInt64 numbers.
-
-Disabled by default.
-
-## input_format_csv_try_infer_strings_from_quoted_tuples {#input_format_csv_try_infer_strings_from_quoted_tuples}
-
-Type: Bool
-
-Default value: 1
-
-Interpret quoted tuples in the input data as a value of type String.
-
-## input_format_csv_use_best_effort_in_schema_inference {#input_format_csv_use_best_effort_in_schema_inference}
-
-Type: Bool
-
-Default value: 1
-
-Use some tweaks and heuristics to infer schema in CSV format
-
-## input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values}
-
-Type: Bool
-
-Default value: 0
-
-Allow to set default value to column when CSV field deserialization failed on bad value
-
-## input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns}
-
-Type: Bool
-
-Default value: 0
-
-Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values
-
-## input_format_custom_detect_header {#input_format_custom_detect_header}
-
-Type: Bool
-
-Default value: 1
-
-Automatically detect header with names and types in CustomSeparated format
-
-## input_format_custom_skip_trailing_empty_lines {#input_format_custom_skip_trailing_empty_lines}
-
-Type: Bool
-
-Default value: 0
-
-Skip trailing empty lines in CustomSeparated format
-
-## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields}
-
-Type: Bool
-
-Default value: 1
-
-When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [Parquet](../../interfaces/formats.md/#parquet), [Arrow](../../interfaces/formats.md/#arrow), [Avro](../../interfaces/formats.md/#avro), [ORC](../../interfaces/formats.md/#orc), [Native](../../interfaces/formats.md/#native) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes.
-
-:::note
-When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
-:::
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## input_format_force_null_for_omitted_fields {#input_format_force_null_for_omitted_fields}
-
-Type: Bool
-
-Default value: 0
-
-Force initialize omitted fields with null values
-
-## input_format_hive_text_allow_variable_number_of_columns {#input_format_hive_text_allow_variable_number_of_columns}
-
-Type: Bool
-
-Default value: 1
-
-Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values
-
-## input_format_hive_text_collection_items_delimiter {#input_format_hive_text_collection_items_delimiter}
-
-Type: Char
-
-Default value: 
-
-Delimiter between collection(array or map) items in Hive Text File
-
-## input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
-
-Type: Char
-
-Default value: 
-
-Delimiter between fields in Hive Text File
-
-## input_format_hive_text_map_keys_delimiter {#input_format_hive_text_map_keys_delimiter}
-
-Type: Char
-
-Default value: 
-
-Delimiter between a pair of map key/values in Hive Text File
-
-## input_format_import_nested_json {#input_format_import_nested_json}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables the insertion of JSON data with nested objects.
-
-Supported formats:
-
-- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow)
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-See also:
-
-- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format.
-
-## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error}
-
-Type: Bool
-
-Default value: 0
-
-Deserialization of IPv4 will use default values instead of throwing exception on conversion error.
-
-Disabled by default.
-
-## input_format_ipv6_default_on_conversion_error {#input_format_ipv6_default_on_conversion_error}
-
-Type: Bool
-
-Default value: 0
-
-Deserialization of IPV6 will use default values instead of throwing exception on conversion error.
-
-Disabled by default.
-
-## input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
-
-Type: Bool
-
-Default value: 0
-
-Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values
-
-## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
-
-Type: Bool
-
-Default value: 1
-
-Insert default values for missing elements in JSON object while parsing named tuple.
-This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled.
-
-Enabled by default.
-
-## input_format_json_empty_as_default {#input_format_json_empty_as_default}
-
-Type: Bool
-
-Default value: 0
-
-Treat empty fields in JSON input as default values.
-
-## input_format_json_ignore_unknown_keys_in_named_tuple {#input_format_json_ignore_unknown_keys_in_named_tuple}
-
-Type: Bool
-
-Default value: 1
-
-Ignore unknown keys in json object for named tuples.
-
-Enabled by default.
-
-## input_format_json_ignore_unnecessary_fields {#input_format_json_ignore_unnecessary_fields}
-
-Type: Bool
-
-Default value: 1
-
-Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields
-
-## input_format_json_infer_incomplete_types_as_strings {#input_format_json_infer_incomplete_types_as_strings}
-
-Type: Bool
-
-Default value: 1
-
-Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference.
-In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference
-by using String type for keys with unknown types.
-
-Example:
-
-```sql
-SET input_format_json_infer_incomplete_types_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 1;
-DESCRIBE format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}');
-SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}');
-```
-
-Result:
-```
-┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
-│ obj  │ Tuple(a Array(Nullable(Int64)), b Nullable(String), c Nullable(String), d Nullable(String), e Array(Nullable(String))) │              │                    │         │                  │                │
-└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
-
-┌─obj────────────────────────────┐
-│ ([1,2,3],'hello',NULL,'{}',[]) │
-└────────────────────────────────┘
-```
-
-Enabled by default.
-
-## input_format_json_max_depth {#input_format_json_max_depth}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.
-
-## input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects}
-
-Type: Bool
-
-Default value: 1
-
-Parse named tuple columns as JSON objects.
-
-Enabled by default.
-
-## input_format_json_read_arrays_as_strings {#input_format_json_read_arrays_as_strings}
-
-Type: Bool
-
-Default value: 1
-
-Allow parsing JSON arrays as strings in JSON input formats.
-
-Example:
-
-```sql
-SET input_format_json_read_arrays_as_strings = 1;
-SELECT arr, toTypeName(arr), JSONExtractArrayRaw(arr)[3] from format(JSONEachRow, 'arr String', '{"arr" : [1, "Hello", [1,2,3]]}');
-```
-
-Result:
-```
-┌─arr───────────────────┬─toTypeName(arr)─┬─arrayElement(JSONExtractArrayRaw(arr), 3)─┐
-│ [1, "Hello", [1,2,3]] │ String          │ [1,2,3]                                   │
-└───────────────────────┴─────────────────┴───────────────────────────────────────────┘
-```
-
-Enabled by default.
-
-## input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers}
-
-Type: Bool
-
-Default value: 1
-
-Allow parsing bools as numbers in JSON input formats.
-
-Enabled by default.
-
-## input_format_json_read_bools_as_strings {#input_format_json_read_bools_as_strings}
-
-Type: Bool
-
-Default value: 1
-
-Allow parsing bools as strings in JSON input formats.
-
-Enabled by default.
-
-## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
-
-Type: Bool
-
-Default value: 1
-
-Allow parsing numbers as strings in JSON input formats.
-
-Enabled by default.
-
-## input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings}
-
-Type: Bool
-
-Default value: 1
-
-Allow parsing JSON objects as strings in JSON input formats.
-
-Example:
-
-```sql
-SET input_format_json_read_objects_as_strings = 1;
-CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory();
-INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"};
-SELECT * FROM test;
-```
-
-Result:
-
-```
-┌─id─┬─obj──────────────────────┬───────date─┐
-│  1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │
-└────┴──────────────────────────┴────────────┘
-```
-
-Enabled by default.
-
-## input_format_json_throw_on_bad_escape_sequence {#input_format_json_throw_on_bad_escape_sequence}
-
-Type: Bool
-
-Default value: 1
-
-Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data.
-
-Enabled by default.
-
-## input_format_json_try_infer_named_tuples_from_objects {#input_format_json_try_infer_named_tuples_from_objects}
-
-Type: Bool
-
-Default value: 1
-
-If enabled, during schema inference ClickHouse will try to infer named Tuple from JSON objects.
-The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data.
-
-Example:
-
-```sql
-SET input_format_json_try_infer_named_tuples_from_objects = 1;
-DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}')
-```
-
-Result:
-
-```
-┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
-│ obj  │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │              │                    │         │                  │                │
-└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
-```
-
-Enabled by default.
-
-## input_format_json_try_infer_numbers_from_strings {#input_format_json_try_infer_numbers_from_strings}
-
-Type: Bool
-
-Default value: 0
-
-If enabled, during schema inference ClickHouse will try to infer numbers from string fields.
-It can be useful if JSON data contains quoted UInt64 numbers.
-
-Disabled by default.
-
-## input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects {#input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects}
-
-Type: Bool
-
-Default value: 0
-
-Use String type instead of an exception in case of ambiguous paths in JSON objects during named tuples inference
-
-## input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata}
-
-Type: Bool
-
-Default value: 1
-
-For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1,
-the types from metadata in input data will be compared with the types of the corresponding columns from the table.
-
-Enabled by default.
-
-## input_format_max_bytes_to_read_for_schema_inference {#input_format_max_bytes_to_read_for_schema_inference}
-
-Type: UInt64
-
-Default value: 33554432
-
-The maximum amount of data in bytes to read for automatic schema inference.
-
-## input_format_max_rows_to_read_for_schema_inference {#input_format_max_rows_to_read_for_schema_inference}
-
-Type: UInt64
-
-Default value: 25000
-
-The maximum rows of data to read for automatic schema inference.
-
-## input_format_msgpack_number_of_columns {#input_format_msgpack_number_of_columns}
-
-Type: UInt64
-
-Default value: 0
-
-The number of columns in inserted MsgPack data. Used for automatic schema inference from data.
-
-## input_format_mysql_dump_map_column_names {#input_format_mysql_dump_map_column_names}
-
-Type: Bool
-
-Default value: 1
-
-Match columns from table in MySQL dump and columns from ClickHouse table by names
-
-## input_format_mysql_dump_table_name {#input_format_mysql_dump_table_name}
-
-Type: String
-
-Default value: 
-
-Name of the table in MySQL dump from which to read data
-
-## input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion}
-
-Type: Bool
-
-Default value: 1
-
-Allow data types conversion in Native input format
-
-## input_format_native_decode_types_in_binary_format {#input_format_native_decode_types_in_binary_format}
-
-Type: Bool
-
-Default value: 0
-
-Read data types in binary format instead of type names in Native input format
-
-## input_format_null_as_default {#input_format_null_as_default}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable).
-If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
-
-This setting is applicable for most input formats.
-
-For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
-
-Possible values:
-
-- 0 — Inserting `NULL` into a not nullable column causes an exception.
-- 1 — `NULL` fields are initialized with default column values.
-
-## input_format_orc_allow_missing_columns {#input_format_orc_allow_missing_columns}
-
-Type: Bool
-
-Default value: 1
-
-Allow missing columns while reading ORC input formats
-
-## input_format_orc_case_insensitive_column_matching {#input_format_orc_case_insensitive_column_matching}
-
-Type: Bool
-
-Default value: 0
-
-Ignore case when matching ORC columns with CH columns.
-
-## input_format_orc_filter_push_down {#input_format_orc_filter_push_down}
-
-Type: Bool
-
-Default value: 1
-
-When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.
-
-## input_format_orc_reader_time_zone_name {#input_format_orc_reader_time_zone_name}
-
-Type: String
-
-Default value: GMT
-
-The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.
-
-## input_format_orc_row_batch_size {#input_format_orc_row_batch_size}
-
-Type: Int64
-
-Default value: 100000
-
-Batch size when reading ORC stripes.
-
-## input_format_orc_skip_columns_with_unsupported_types_in_schema_inference {#input_format_orc_skip_columns_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip columns with unsupported types while schema inference for format ORC
-
-## input_format_orc_use_fast_decoder {#input_format_orc_use_fast_decoder}
-
-Type: Bool
-
-Default value: 1
-
-Use a faster ORC decoder implementation.
-
-## input_format_parquet_allow_missing_columns {#input_format_parquet_allow_missing_columns}
-
-Type: Bool
-
-Default value: 1
-
-Allow missing columns while reading Parquet input formats
-
-## input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching}
-
-Type: Bool
-
-Default value: 0
-
-Ignore case when matching Parquet columns with CH columns.
-
-## input_format_parquet_filter_push_down {#input_format_parquet_filter_push_down}
-
-Type: Bool
-
-Default value: 1
-
-When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.
-
-## input_format_parquet_local_file_min_bytes_for_seek {#input_format_parquet_local_file_min_bytes_for_seek}
-
-Type: UInt64
-
-Default value: 8192
-
-Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format
-
-## input_format_parquet_max_block_size {#input_format_parquet_max_block_size}
-
-Type: UInt64
-
-Default value: 65409
-
-Max block size for parquet reader.
-
-## input_format_parquet_prefer_block_bytes {#input_format_parquet_prefer_block_bytes}
-
-Type: UInt64
-
-Default value: 16744704
-
-Average block bytes output by parquet reader
-
-## input_format_parquet_preserve_order {#input_format_parquet_preserve_order}
-
-Type: Bool
-
-Default value: 0
-
-Avoid reordering rows when reading from Parquet files. Usually makes it much slower.
-
-## input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip columns with unsupported types while schema inference for format Parquet
-
-## input_format_parquet_use_native_reader {#input_format_parquet_use_native_reader}
-
-Type: Bool
-
-Default value: 0
-
-When reading Parquet files, to use native reader instead of arrow reader.
-
-## input_format_protobuf_flatten_google_wrappers {#input_format_protobuf_flatten_google_wrappers}
-
-Type: Bool
-
-Default value: 0
-
-Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls
-
-## input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference {#input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference}
-
-Type: Bool
-
-Default value: 0
-
-Skip fields with unsupported types while schema inference for format Protobuf
-
-## input_format_record_errors_file_path {#input_format_record_errors_file_path}
-
-Type: String
-
-Default value: 
-
-Path of the file used to record errors while reading text formats (CSV, TSV).
-
-## input_format_skip_unknown_fields {#input_format_skip_unknown_fields}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables skipping insertion of extra data.
-
-When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception.
-
-Supported formats:
-
-- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) (and other JSON formats)
-- [BSONEachRow](../../interfaces/formats.md/#bsoneachrow) (and other JSON formats)
-- [TSKV](../../interfaces/formats.md/#tskv)
-- All formats with suffixes WithNames/WithNamesAndTypes
-- [MySQLDump](../../interfaces/formats.md/#mysqldump)
-- [Native](../../interfaces/formats.md/#native)
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## input_format_try_infer_dates {#input_format_try_infer_dates}
-
-Type: Bool
-
-Default value: 1
-
-If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`.
-
-Enabled by default.
-
-## input_format_try_infer_datetimes {#input_format_try_infer_datetimes}
-
-Type: Bool
-
-Default value: 1
-
-If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`.
-
-Enabled by default.
-
-## input_format_try_infer_datetimes_only_datetime64 {#input_format_try_infer_datetimes_only_datetime64}
-
-Type: Bool
-
-Default value: 0
-
-When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types
-
-## input_format_try_infer_exponent_floats {#input_format_try_infer_exponent_floats}
-
-Type: Bool
-
-Default value: 0
-
-Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)
-
-## input_format_try_infer_integers {#input_format_try_infer_integers}
-
-Type: Bool
-
-Default value: 1
-
-If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`.
-
-Enabled by default.
-
-## input_format_try_infer_variants {#input_format_try_infer_variants}
-
-Type: Bool
-
-Default value: 0
-
-If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns}
-
-Type: Bool
-
-Default value: 0
-
-Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values
-
-## input_format_tsv_crlf_end_of_line {#input_format_tsv_crlf_end_of_line}
-
-Type: Bool
-
-Default value: 0
-
-If it is set true, file function will read TSV format with \\r\\n instead of \\n.
-
-## input_format_tsv_detect_header {#input_format_tsv_detect_header}
-
-Type: Bool
-
-Default value: 1
-
-Automatically detect header with names and types in TSV format
-
-## input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
-
-Type: Bool
-
-Default value: 0
-
-Treat empty fields in TSV input as default values.
-
-## input_format_tsv_enum_as_number {#input_format_tsv_enum_as_number}
-
-Type: Bool
-
-Default value: 0
-
-Treat inserted enum values in TSV formats as enum indices.
-
-## input_format_tsv_skip_first_lines {#input_format_tsv_skip_first_lines}
-
-Type: UInt64
-
-Default value: 0
-
-Skip specified number of lines at the beginning of data in TSV format
-
-## input_format_tsv_skip_trailing_empty_lines {#input_format_tsv_skip_trailing_empty_lines}
-
-Type: Bool
-
-Default value: 0
-
-Skip trailing empty lines in TSV format
-
-## input_format_tsv_use_best_effort_in_schema_inference {#input_format_tsv_use_best_effort_in_schema_inference}
-
-Type: Bool
-
-Default value: 1
-
-Use some tweaks and heuristics to infer schema in TSV format
-
-## input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals}
-
-Type: Bool
-
-Default value: 1
-
-For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.
-
-## input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions}
-
-Type: Bool
-
-Default value: 1
-
-For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.
-
-## input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
-
-Type: Bool
-
-Default value: 1
-
-For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.
-
-## input_format_with_names_use_header {#input_format_with_names_use_header}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables checking the column order when inserting data.
-
-To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table.
-
-Supported formats:
-
-- [CSVWithNames](../../interfaces/formats.md/#csvwithnames)
-- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes)
-- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames)
-- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes)
-- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames)
-- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes)
-- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames)
-- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes)
-- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames)
-- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes)
-- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames)
-- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes)
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## input_format_with_types_use_header {#input_format_with_types_use_header}
-
-Type: Bool
-
-Default value: 1
-
-Controls whether format parser should check if data types from the input data match data types from the target table.
-
-Supported formats:
-
-- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes)
-- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes)
-- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes)
-- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes)
-- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
-- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes)
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## insert_distributed_one_random_shard {#insert_distributed_one_random_shard}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key.
-
-By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards.
-
-Possible values:
-
-- 0 — Insertion is rejected if there are multiple shards and no distributed key is given.
-- 1 — Insertion is done randomly among all available shards when no distributed key is given.
-
-## interval_output_format {#interval_output_format}
-
-Type: IntervalOutputFormat
-
-Default value: numeric
-
-Allows choosing different output formats of the text representation of interval types.
-
-Possible values:
-
--   `kusto` - KQL-style output format.
-
-    ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account.
-
--   `numeric` - Numeric output format.
-
-    ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`.
-
-See also:
-
--   [Interval](../../sql-reference/data-types/special-data-types/interval.md)
-
-## output_format_arrow_compression_method {#output_format_arrow_compression_method}
-
-Type: ArrowCompression
-
-Default value: lz4_frame
-
-Compression method for Arrow output format. Supported codecs: lz4_frame, zstd, none (uncompressed)
-
-## output_format_arrow_fixed_string_as_fixed_byte_array {#output_format_arrow_fixed_string_as_fixed_byte_array}
-
-Type: Bool
-
-Default value: 1
-
-Use Arrow FIXED_SIZE_BINARY type instead of Binary for FixedString columns.
-
-## output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary}
-
-Type: Bool
-
-Default value: 0
-
-Enable output LowCardinality type as Dictionary Arrow type
-
-## output_format_arrow_string_as_string {#output_format_arrow_string_as_string}
-
-Type: Bool
-
-Default value: 1
-
-Use Arrow String type instead of Binary for String columns
-
-## output_format_arrow_use_64_bit_indexes_for_dictionary {#output_format_arrow_use_64_bit_indexes_for_dictionary}
-
-Type: Bool
-
-Default value: 0
-
-Always use 64 bit integers for dictionary indexes in Arrow format
-
-## output_format_arrow_use_signed_indexes_for_dictionary {#output_format_arrow_use_signed_indexes_for_dictionary}
-
-Type: Bool
-
-Default value: 1
-
-Use signed integers for dictionary indexes in Arrow format
-
-## output_format_avro_codec {#output_format_avro_codec}
-
-Type: String
-
-Default value: 
-
-Compression codec used for output. Possible values: 'null', 'deflate', 'snappy', 'zstd'.
-
-## output_format_avro_rows_in_file {#output_format_avro_rows_in_file}
-
-Type: UInt64
-
-Default value: 1
-
-Max rows in a file (if permitted by storage)
-
-## output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern}
-
-Type: String
-
-Default value: 
-
-For Avro format: regexp of String columns to select as AVRO string.
-
-## output_format_avro_sync_interval {#output_format_avro_sync_interval}
-
-Type: UInt64
-
-Default value: 16384
-
-Sync interval in bytes.
-
-## output_format_binary_encode_types_in_binary_format {#output_format_binary_encode_types_in_binary_format}
-
-Type: Bool
-
-Default value: 0
-
-Write data types in binary format instead of type names in RowBinaryWithNamesAndTypes output format
-
-### output_format_binary_write_json_as_string {#output_format_binary_write_json_as_string}
-
-Type: Bool
-
-Default value: 0
-
-Write values of [JSON](../../sql-reference/data-types/newjson.md) data type as JSON [String](../../sql-reference/data-types/string.md) values in RowBinary output format.
-
-## output_format_bson_string_as_string {#output_format_bson_string_as_string}
-
-Type: Bool
-
-Default value: 0
-
-Use BSON String type instead of Binary for String columns.
-
-## output_format_csv_crlf_end_of_line {#output_format_csv_crlf_end_of_line}
-
-Type: Bool
-
-Default value: 0
-
-If it is set true, end of line in CSV format will be \\r\\n instead of \\n.
-
-## output_format_csv_serialize_tuple_into_separate_columns {#output_format_csv_serialize_tuple_into_separate_columns}
-
-Type: Bool
-
-Default value: 1
-
-If it set to true, then Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost)
-
-## output_format_decimal_trailing_zeros {#output_format_decimal_trailing_zeros}
-
-Type: Bool
-
-Default value: 0
-
-Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23.
-
-Disabled by default.
-
-## output_format_enable_streaming {#output_format_enable_streaming}
-
-Type: Bool
-
-Default value: 0
-
-Enable streaming in output formats that support it.
-
-Disabled by default.
-
-## output_format_json_array_of_rows {#output_format_json_array_of_rows}
-
-Type: Bool
-
-Default value: 0
-
-Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format.
-
-Possible values:
-
-- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format.
-- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format.
-
-**Example of a query with the enabled setting**
-
-Query:
-
-```sql
-SET output_format_json_array_of_rows = 1;
-SELECT number FROM numbers(3) FORMAT JSONEachRow;
-```
-
-Result:
-
-```text
-[
-{"number":"0"},
-{"number":"1"},
-{"number":"2"}
-]
-```
-
-**Example of a query with the disabled setting**
-
-Query:
-
-```sql
-SET output_format_json_array_of_rows = 0;
-SELECT number FROM numbers(3) FORMAT JSONEachRow;
-```
-
-Result:
-
-```text
-{"number":"0"}
-{"number":"1"}
-{"number":"2"}
-```
-
-## output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes}
-
-Type: Bool
-
-Default value: 1
-
-Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.
-
-Enabled by default.
-
-## output_format_json_named_tuples_as_objects {#output_format_json_named_tuples_as_objects}
-
-Type: Bool
-
-Default value: 1
-
-Serialize named tuple columns as JSON objects.
-
-Enabled by default.
-
-## output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats}
-
-Type: Bool
-
-Default value: 0
-
-Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats.
-
-Disabled by default.
-
-## output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers}
-
-Type: Bool
-
-Default value: 1
-
-Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format.
-Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations.
-
-Possible values:
-
-- 0 — Integers are output without quotes.
-- 1 — Integers are enclosed in quotes.
-
-## output_format_json_quote_decimals {#output_format_json_quote_decimals}
-
-Type: Bool
-
-Default value: 0
-
-Controls quoting of decimals in JSON output formats.
-
-Disabled by default.
-
-## output_format_json_quote_denormals {#output_format_json_quote_denormals}
-
-Type: Bool
-
-Default value: 0
-
-Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-**Example**
-
-Consider the following table `account_orders`:
-
-```text
-┌─id─┬─name───┬─duration─┬─period─┬─area─┐
-│  1 │ Andrew │       20 │      0 │  400 │
-│  2 │ John   │       40 │      0 │    0 │
-│  3 │ Bob    │       15 │      0 │ -100 │
-└────┴────────┴──────────┴────────┴──────┘
-```
-
-When `output_format_json_quote_denormals = 0`, the query returns `null` values in output:
-
-```sql
-SELECT area/period FROM account_orders FORMAT JSON;
-```
-
-```json
-{
-        "meta":
-        [
-                {
-                        "name": "divide(area, period)",
-                        "type": "Float64"
-                }
-        ],
-
-        "data":
-        [
-                {
-                        "divide(area, period)": null
-                },
-                {
-                        "divide(area, period)": null
-                },
-                {
-                        "divide(area, period)": null
-                }
-        ],
-
-        "rows": 3,
-
-        "statistics":
-        {
-                "elapsed": 0.003648093,
-                "rows_read": 3,
-                "bytes_read": 24
-        }
-}
-```
-
-When `output_format_json_quote_denormals = 1`, the query returns:
-
-```json
-{
-        "meta":
-        [
-                {
-                        "name": "divide(area, period)",
-                        "type": "Float64"
-                }
-        ],
-
-        "data":
-        [
-                {
-                        "divide(area, period)": "inf"
-                },
-                {
-                        "divide(area, period)": "-nan"
-                },
-                {
-                        "divide(area, period)": "-inf"
-                }
-        ],
-
-        "rows": 3,
-
-        "statistics":
-        {
-                "elapsed": 0.000070241,
-                "rows_read": 3,
-                "bytes_read": 24
-        }
-}
-```
-
-## output_format_json_skip_null_value_in_named_tuples {#output_format_json_skip_null_value_in_named_tuples}
-
-Type: Bool
-
-Default value: 0
-
-Skip key value pairs with null value when serialize named tuple columns as JSON objects. It is only valid when output_format_json_named_tuples_as_objects is true.
-
-## output_format_json_validate_utf8 {#output_format_json_validate_utf8}
-
-Type: Bool
-
-Default value: 0
-
-Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8.
-
-Disabled by default.
-
-## output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters}
-
-Type: Bool
-
-Default value: 0
-
-Escape special characters in Markdown
-
-## output_format_msgpack_uuid_representation {#output_format_msgpack_uuid_representation}
-
-Type: MsgPackUUIDRepresentation
-
-Default value: ext
-
-The way how to output UUID in MsgPack format.
-
-## output_format_native_encode_types_in_binary_format {#output_format_native_encode_types_in_binary_format}
-
-Type: Bool
-
-Default value: 0
-
-Write data types in binary format instead of type names in Native output format
-
-### output_format_native_write_json_as_string {#output_format_native_write_json_as_string}
-
-Type: Bool
-
-Default value: 0
-
-Write data of [JSON](../../sql-reference/data-types/newjson.md) column as [String](../../sql-reference/data-types/string.md) column containing JSON strings instead of default native JSON serialization.
-
-## output_format_orc_compression_method {#output_format_orc_compression_method}
-
-Type: ORCCompression
-
-Default value: zstd
-
-Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)
-
-## output_format_orc_dictionary_key_size_threshold {#output_format_orc_dictionary_key_size_threshold}
-
-Type: Double
-
-Default value: 0
-
-For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled
-
-## output_format_orc_row_index_stride {#output_format_orc_row_index_stride}
-
-Type: UInt64
-
-Default value: 10000
-
-Target row index stride in ORC output format
-
-## output_format_orc_string_as_string {#output_format_orc_string_as_string}
-
-Type: Bool
-
-Default value: 1
-
-Use ORC String type instead of Binary for String columns
-
-## output_format_parquet_batch_size {#output_format_parquet_batch_size}
-
-Type: UInt64
-
-Default value: 1024
-
-Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.
-
-## output_format_parquet_compliant_nested_types {#output_format_parquet_compliant_nested_types}
-
-Type: Bool
-
-Default value: 1
-
-In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.
-
-## output_format_parquet_compression_method {#output_format_parquet_compression_method}
-
-Type: ParquetCompression
-
-Default value: zstd
-
-Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)
-
-## output_format_parquet_data_page_size {#output_format_parquet_data_page_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-Target page size in bytes, before compression.
-
-## output_format_parquet_fixed_string_as_fixed_byte_array {#output_format_parquet_fixed_string_as_fixed_byte_array}
-
-Type: Bool
-
-Default value: 1
-
-Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.
-
-## output_format_parquet_parallel_encoding {#output_format_parquet_parallel_encoding}
-
-Type: Bool
-
-Default value: 1
-
-Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.
-
-## output_format_parquet_row_group_size {#output_format_parquet_row_group_size}
-
-Type: UInt64
-
-Default value: 1000000
-
-Target row group size in rows.
-
-## output_format_parquet_row_group_size_bytes {#output_format_parquet_row_group_size_bytes}
-
-Type: UInt64
-
-Default value: 536870912
-
-Target row group size in bytes, before compression.
-
-## output_format_parquet_string_as_string {#output_format_parquet_string_as_string}
-
-Type: Bool
-
-Default value: 1
-
-Use Parquet String type instead of Binary for String columns.
-
-## output_format_parquet_use_custom_encoder {#output_format_parquet_use_custom_encoder}
-
-Type: Bool
-
-Default value: 1
-
-Use a faster Parquet encoder implementation.
-
-## output_format_parquet_version {#output_format_parquet_version}
-
-Type: ParquetVersion
-
-Default value: 2.latest
-
-Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)
-
-## output_format_parquet_write_page_index {#output_format_parquet_write_page_index}
-
-Type: Bool
-
-Default value: 1
-
-Add a possibility to write page index into parquet files.
-
-## output_format_pretty_color {#output_format_pretty_color}
-
-Type: UInt64Auto
-
-Default value: auto
-
-Use ANSI escape sequences in Pretty formats. 0 - disabled, 1 - enabled, 'auto' - enabled if a terminal.
-
-## output_format_pretty_display_footer_column_names {#output_format_pretty_display_footer_column_names}
-
-Type: UInt64
-
-Default value: 1
-
-Display column names in the footer if there are many table rows.
-
-Possible values:
-
-- 0 — No column names are displayed in the footer.
-- 1 — Column names are displayed in the footer if row count is greater than or equal to the threshold value set by [output_format_pretty_display_footer_column_names_min_rows](#output_format_pretty_display_footer_column_names_min_rows) (50 by default).
-
-**Example**
-
-Query:
-
-```sql
-SELECT *, toTypeName(*) FROM (SELECT * FROM system.numbers LIMIT 1000);
-```
-
-Result:
-
-```response
-      ┌─number─┬─toTypeName(number)─┐
-   1. │      0 │ UInt64             │
-   2. │      1 │ UInt64             │
-   3. │      2 │ UInt64             │
-   ...
- 999. │    998 │ UInt64             │
-1000. │    999 │ UInt64             │
-      └─number─┴─toTypeName(number)─┘
-```
-
-## output_format_pretty_display_footer_column_names_min_rows {#output_format_pretty_display_footer_column_names_min_rows}
-
-Type: UInt64
-
-Default value: 50
-
-Sets the minimum number of rows for which a footer with column names will be displayed if setting [output_format_pretty_display_footer_column_names](#output_format_pretty_display_footer_column_names) is enabled.
-
-## output_format_pretty_grid_charset {#output_format_pretty_grid_charset}
-
-Type: String
-
-Default value: UTF-8
-
-Charset for printing grid borders. Available charsets: ASCII, UTF-8 (default one).
-
-## output_format_pretty_highlight_digit_groups {#output_format_pretty_highlight_digit_groups}
-
-Type: Bool
-
-Default value: 1
-
-If enabled and if output is a terminal, highlight every digit corresponding to the number of thousands, millions, etc. with underline.
-
-## output_format_pretty_max_column_pad_width {#output_format_pretty_max_column_pad_width}
-
-Type: UInt64
-
-Default value: 250
-
-Maximum width to pad all values in a column in Pretty formats.
-
-## output_format_pretty_max_rows {#output_format_pretty_max_rows}
-
-Type: UInt64
-
-Default value: 10000
-
-Rows limit for Pretty formats.
-
-## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
-
-Type: UInt64
-
-Default value: 10000
-
-Maximum width of value to display in Pretty formats. If greater - it will be cut.
-
-## output_format_pretty_max_value_width_apply_for_single_value {#output_format_pretty_max_value_width_apply_for_single_value}
-
-Type: UInt64
-
-Default value: 0
-
-Only cut values (see the `output_format_pretty_max_value_width` setting) when it is not a single value in a block. Otherwise output it entirely, which is useful for the `SHOW CREATE TABLE` query.
-
-## output_format_pretty_row_numbers {#output_format_pretty_row_numbers}
-
-Type: Bool
-
-Default value: 1
-
-Add row numbers before each row for pretty output format
-
-## output_format_pretty_single_large_number_tip_threshold {#output_format_pretty_single_large_number_tip_threshold}
-
-Type: UInt64
-
-Default value: 1000000
-
-Print a readable number tip on the right side of the table if the block consists of a single number which exceeds this value (except 0)
-
-## output_format_protobuf_nullables_with_google_wrappers {#output_format_protobuf_nullables_with_google_wrappers}
-
-Type: Bool
-
-Default value: 0
-
-When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized
-
-## output_format_schema {#output_format_schema}
-
-Type: String
-
-Default value: 
-
-The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats.
-
-## output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names}
-
-Type: Bool
-
-Default value: 1
-
-Include column names in INSERT query
-
-## output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size}
-
-Type: UInt64
-
-Default value: 65409
-
-The maximum number  of rows in one INSERT statement.
-
-## output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names}
-
-Type: Bool
-
-Default value: 1
-
-Quote column names with '`' characters
-
-## output_format_sql_insert_table_name {#output_format_sql_insert_table_name}
-
-Type: String
-
-Default value: table
-
-The name of table in the output INSERT query
-
-## output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace}
-
-Type: Bool
-
-Default value: 0
-
-Use REPLACE statement instead of INSERT
-
-## output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line}
-
-Type: Bool
-
-Default value: 0
-
-If it is set true, end of line in TSV format will be \\r\\n instead of \\n.
-
-## output_format_values_escape_quote_with_quote {#output_format_values_escape_quote_with_quote}
-
-Type: Bool
-
-Default value: 0
-
-If true escape ' with '', otherwise quoted with \\'
-
-## output_format_write_statistics {#output_format_write_statistics}
-
-Type: Bool
-
-Default value: 1
-
-Write statistics about read rows, bytes, time elapsed in suitable output formats.
-
-Enabled by default
-
-## precise_float_parsing {#precise_float_parsing}
-
-Type: Bool
-
-Default value: 0
-
-Prefer more precise (but slower) float parsing algorithm
-
-## regexp_dict_allow_hyperscan {#regexp_dict_allow_hyperscan}
-
-Type: Bool
-
-Default value: 1
-
-Allow regexp_tree dictionary using Hyperscan library.
-
-## regexp_dict_flag_case_insensitive {#regexp_dict_flag_case_insensitive}
-
-Type: Bool
-
-Default value: 0
-
-Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i).
-
-## regexp_dict_flag_dotall {#regexp_dict_flag_dotall}
-
-Type: Bool
-
-Default value: 0
-
-Allow '.' to match newline characters for a regexp_tree dictionary.
-
-## rows_before_aggregation {#rows_before_aggregation}
-
-Type: Bool
-
-Default value: 0
-
-When enabled, ClickHouse will provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation
-
-## schema_inference_hints {#schema_inference_hints}
-
-Type: String
-
-Default value: 
-
-The list of column names and types to use as hints in schema inference for formats without schema.
-
-Example:
-
-Query:
-```sql
-desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4';
-```
-
-Result:
-```sql
-x	UInt8
-y	Nullable(String)
-z	IPv4
-```
-
-:::note
-If the `schema_inference_hints` is not formatted properly, or if there is a typo or a wrong datatype, etc... the whole schema_inference_hints will be ignored.
-:::
-
-## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
-
-Type: UInt64Auto
-
-Default value: 1
-
-Controls making inferred types `Nullable` in schema inference.
-If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
-
-## schema_inference_mode {#schema_inference_mode}
-
-Type: SchemaInferenceMode
-
-Default value: default
-
-Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files
-
-## show_create_query_identifier_quoting_rule {#show_create_query_identifier_quoting_rule}
-
-Type: IdentifierQuotingRule
-
-Default value: when_necessary
-
-Set the quoting rule for identifiers in SHOW CREATE query
-
-## show_create_query_identifier_quoting_style {#show_create_query_identifier_quoting_style}
-
-Type: IdentifierQuotingStyle
-
-Default value: Backticks
-
-Set the quoting style for identifiers in SHOW CREATE query
-
-## type_json_skip_duplicated_paths {#type_json_skip_duplicated_paths}
-
-Type: Bool
-
-Default value: 0
-
-When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception
-
-## validate_experimental_and_suspicious_types_inside_nested_types {#validate_experimental_and_suspicious_types_inside_nested_types}
-
-Type: Bool
-
-Default value: 1
-
-Validate usage of experimental and suspicious types inside nested types like Array/Map/Tuple
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
deleted file mode 100644
index 53727bbc9b0..00000000000
--- a/docs/en/operations/settings/settings.md
+++ /dev/null
@@ -1,9749 +0,0 @@
----
-sidebar_label: Core Settings
-sidebar_position: 2
-slug: /en/operations/settings/settings
-toc_max_heading_level: 2
----
-
-# Core Settings
-
-All below settings are also available in table [system.settings](/docs/en/operations/system-tables/settings).
-
-## add_http_cors_header {#add_http_cors_header}
-
-Type: Bool
-
-Default value: 0
-
-Write add http CORS header.
-
-## additional_result_filter {#additional_result_filter}
-
-Type: String
-
-Default value:
-
-An additional filter expression to apply to the result of `SELECT` query.
-This setting is not applied to any subquery.
-
-**Example**
-
-``` sql
-INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
-SElECT * FROM table_1;
-```
-```response
-┌─x─┬─y────┐
-│ 1 │ a    │
-│ 2 │ bb   │
-│ 3 │ ccc  │
-│ 4 │ dddd │
-└───┴──────┘
-```
-```sql
-SELECT *
-FROM table_1
-SETTINGS additional_result_filter = 'x != 2'
-```
-```response
-┌─x─┬─y────┐
-│ 1 │ a    │
-│ 3 │ ccc  │
-│ 4 │ dddd │
-└───┴──────┘
-```
-
-## additional_table_filters {#additional_table_filters}
-
-Type: Map
-
-Default value: {}
-
-An additional filter expression that is applied after reading
-from the specified table.
-
-**Example**
-
-``` sql
-INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
-SELECT * FROM table_1;
-```
-```response
-┌─x─┬─y────┐
-│ 1 │ a    │
-│ 2 │ bb   │
-│ 3 │ ccc  │
-│ 4 │ dddd │
-└───┴──────┘
-```
-```sql
-SELECT *
-FROM table_1
-SETTINGS additional_table_filters = {'table_1': 'x != 2'}
-```
-```response
-┌─x─┬─y────┐
-│ 1 │ a    │
-│ 3 │ ccc  │
-│ 4 │ dddd │
-└───┴──────┘
-```
-
-## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
-It is implemented via query rewrite (similar to [count_distinct_implementation](#count_distinct_implementation) setting) to get consistent results for distributed queries.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-**Example**
-
-Consider the following query with aggregate functions:
-```sql
-SELECT SUM(-1), MAX(0) FROM system.one WHERE 0;
-```
-
-With `aggregate_functions_null_for_empty = 0` it would produce:
-```text
-┌─SUM(-1)─┬─MAX(0)─┐
-│       0 │      0 │
-└─────────┴────────┘
-```
-
-With `aggregate_functions_null_for_empty = 1` the result would be:
-```text
-┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
-│          NULL │         NULL │
-└───────────────┴──────────────┘
-```
-
-## aggregation_in_order_max_block_bytes {#aggregation_in_order_max_block_bytes}
-
-Type: UInt64
-
-Default value: 50000000
-
-Maximal size of block in bytes accumulated during aggregation in order of primary key. Lower block size allows to parallelize more final merge stage of aggregation.
-
-## aggregation_memory_efficient_merge_threads {#aggregation_memory_efficient_merge_threads}
-
-Type: UInt64
-
-Default value: 0
-
-Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.
-
-## allow_aggregate_partitions_independently {#allow_aggregate_partitions_independently}
-
-Type: Bool
-
-Default value: 0
-
-Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size
-
-## allow_archive_path_syntax {#allow_archive_path_syntax}
-
-Type: Bool
-
-Default value: 1
-
-File/S3 engines/table function will parse paths with '::' as '\\<archive\\> :: \\<file\\>' if archive has correct extension
-
-## allow_asynchronous_read_from_io_pool_for_merge_tree {#allow_asynchronous_read_from_io_pool_for_merge_tree}
-
-Type: Bool
-
-Default value: 0
-
-Use background I/O pool to read from MergeTree tables. This setting may increase performance for I/O bound queries
-
-## allow_changing_replica_until_first_data_packet {#allow_changing_replica_until_first_data_packet}
-
-Type: Bool
-
-Default value: 0
-
-If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress
-(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress.
-
-## allow_create_index_without_type {#allow_create_index_without_type}
-
-Type: Bool
-
-Default value: 0
-
-Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.
-
-## allow_custom_error_code_in_throwif {#allow_custom_error_code_in_throwif}
-
-Type: Bool
-
-Default value: 0
-
-Enable custom error code in function throwIf(). If true, thrown exceptions may have unexpected error codes.
-
-## allow_ddl {#allow_ddl}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, then a user is allowed to executed DDL queries.
-
-## allow_deprecated_database_ordinary {#allow_deprecated_database_ordinary}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create databases with deprecated Ordinary engine
-
-## allow_deprecated_error_prone_window_functions {#allow_deprecated_error_prone_window_functions}
-
-Type: Bool
-
-Default value: 0
-
-Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)
-
-## allow_deprecated_snowflake_conversion_functions {#allow_deprecated_snowflake_conversion_functions}
-
-Type: Bool
-
-Default value: 0
-
-Functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are deprecated and disabled by default.
-Please use functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` instead.
-
-To re-enable the deprecated functions (e.g., during a transition period), please set this setting to `true`.
-
-## allow_deprecated_syntax_for_merge_tree {#allow_deprecated_syntax_for_merge_tree}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create *MergeTree tables with deprecated engine definition syntax
-
-## allow_distributed_ddl {#allow_distributed_ddl}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, then a user is allowed to executed distributed DDL queries.
-
-## allow_drop_detached {#allow_drop_detached}
-
-Type: Bool
-
-Default value: 0
-
-Allow ALTER TABLE ... DROP DETACHED PART[ITION] ... queries
-
-## allow_execute_multiif_columnar {#allow_execute_multiif_columnar}
-
-Type: Bool
-
-Default value: 1
-
-Allow execute multiIf function columnar
-
-## allow_experimental_analyzer {#allow_experimental_analyzer}
-
-Type: Bool
-
-Default value: 1
-
-Allow new query analyzer.
-
-## allow_experimental_codecs {#allow_experimental_codecs}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).
-
-## allow_experimental_database_materialized_mysql {#allow_experimental_database_materialized_mysql}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create database with Engine=MaterializedMySQL(...).
-
-## allow_experimental_database_materialized_postgresql {#allow_experimental_database_materialized_postgresql}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create database with Engine=MaterializedPostgreSQL(...).
-
-## allow_experimental_dynamic_type {#allow_experimental_dynamic_type}
-
-Type: Bool
-
-Default value: 0
-
-Allow Dynamic data type
-
-## allow_experimental_full_text_index {#allow_experimental_full_text_index}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, allow to use experimental full-text index.
-
-## allow_experimental_funnel_functions {#allow_experimental_funnel_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enable experimental functions for funnel analysis.
-
-## allow_experimental_hash_functions {#allow_experimental_hash_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enable experimental hash functions
-
-## allow_experimental_inverted_index {#allow_experimental_inverted_index}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, allow to use experimental inverted index.
-
-## allow_experimental_join_condition {#allow_experimental_join_condition}
-
-Type: Bool
-
-Default value: 0
-
-Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.
-
-## allow_experimental_join_right_table_sorting {#allow_experimental_join_right_table_sorting}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, and the conditions of `join_to_sort_minimum_perkey_rows` and `join_to_sort_maximum_table_rows` are met, rerange the right table by key to improve the performance in left or inner hash join.
-
-## allow_experimental_json_type {#allow_experimental_json_type}
-
-Type: Bool
-
-Default value: 0
-
-Allow JSON data type
-
-## allow_experimental_kafka_offsets_storage_in_keeper {#allow_experimental_kafka_offsets_storage_in_keeper}
-
-Type: Bool
-
-Default value: 0
-
-Allow experimental feature to store Kafka related offsets in ClickHouse Keeper. When enabled a ClickHouse Keeper path and replica name can be specified to the Kafka table engine. As a result instead of the regular Kafka engine, a new type of storage engine will be used that stores the committed offsets primarily in ClickHouse Keeper
-
-## allow_experimental_live_view {#allow_experimental_live_view}
-
-Type: Bool
-
-Default value: 0
-
-Allows creation of a deprecated LIVE VIEW.
-
-Possible values:
-
-- 0 — Working with live views is disabled.
-- 1 — Working with live views is enabled.
-
-## allow_experimental_materialized_postgresql_table {#allow_experimental_materialized_postgresql_table}
-
-Type: Bool
-
-Default value: 0
-
-Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental
-
-## allow_experimental_nlp_functions {#allow_experimental_nlp_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enable experimental functions for natural language processing.
-
-## allow_experimental_object_type {#allow_experimental_object_type}
-
-Type: Bool
-
-Default value: 0
-
-Allow Object and JSON data types
-
-## allow_experimental_parallel_reading_from_replicas {#allow_experimental_parallel_reading_from_replicas}
-
-Type: UInt64
-
-Default value: 0
-
-Use up to `max_parallel_replicas` the number of replicas from each shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure
-
-## allow_experimental_query_deduplication {#allow_experimental_query_deduplication}
-
-Type: Bool
-
-Default value: 0
-
-Experimental data deduplication for SELECT queries based on part UUIDs
-
-## allow_experimental_shared_set_join {#allow_experimental_shared_set_join}
-
-Type: Bool
-
-Default value: 1
-
-Only in ClickHouse Cloud. Allow to create ShareSet and SharedJoin
-
-## allow_experimental_statistics {#allow_experimental_statistics}
-
-Type: Bool
-
-Default value: 0
-
-Allows defining columns with [statistics](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) and [manipulate statistics](../../engines/table-engines/mergetree-family/mergetree.md#column-statistics).
-
-## allow_experimental_time_series_table {#allow_experimental_time_series_table}
-
-Type: Bool
-
-Default value: 0
-
-Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
-
-Possible values:
-
-- 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled.
-- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
-
-## allow_experimental_variant_type {#allow_experimental_variant_type}
-
-Type: Bool
-
-Default value: 0
-
-Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md).
-
-## allow_experimental_vector_similarity_index {#allow_experimental_vector_similarity_index}
-
-Type: Bool
-
-Default value: 0
-
-Allow experimental vector similarity index
-
-## allow_experimental_window_view {#allow_experimental_window_view}
-
-Type: Bool
-
-Default value: 0
-
-Enable WINDOW VIEW. Not mature enough.
-
-## allow_get_client_http_header {#allow_get_client_http_header}
-
-Type: Bool
-
-Default value: 0
-
-Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.
-
-## allow_hyperscan {#allow_hyperscan}
-
-Type: Bool
-
-Default value: 1
-
-Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.
-
-## allow_introspection_functions {#allow_introspection_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
-
-Possible values:
-
-- 1 — Introspection functions enabled.
-- 0 — Introspection functions disabled.
-
-**See Also**
-
-- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md)
-- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
-
-## allow_materialized_view_with_bad_select {#allow_materialized_view_with_bad_select}
-
-Type: Bool
-
-Default value: 1
-
-Allow CREATE MATERIALIZED VIEW with SELECT query that references nonexistent tables or columns. It must still be syntactically valid. Doesn't apply to refreshable MVs. Doesn't apply if the MV schema needs to be inferred from the SELECT query (i.e. if the CREATE has no column list and no TO table). Can be used for creating MV before its source table.
-
-## allow_named_collection_override_by_default {#allow_named_collection_override_by_default}
-
-Type: Bool
-
-Default value: 1
-
-Allow named collections' fields override by default.
-
-## allow_non_metadata_alters {#allow_non_metadata_alters}
-
-Type: Bool
-
-Default value: 1
-
-Allow to execute alters which affects not only tables metadata, but also data on disk
-
-## allow_nonconst_timezone_arguments {#allow_nonconst_timezone_arguments}
-
-Type: Bool
-
-Default value: 0
-
-Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()
-
-## allow_nondeterministic_mutations {#allow_nondeterministic_mutations}
-
-Type: Bool
-
-Default value: 0
-
-User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`.
-
-Given that, for example, dictionaries, can be out of sync across nodes, mutations that pull values from them are disallowed on replicated tables by default. Enabling this setting allows this behavior, making it the user's responsibility to ensure that the data used is in sync across all nodes.
-
-**Example**
-
-``` xml
-<profiles>
-    <default>
-        <allow_nondeterministic_mutations>1</allow_nondeterministic_mutations>
-
-        <!-- ... -->
-    </default>
-
-    <!-- ... -->
-
-</profiles>
-```
-
-## allow_nondeterministic_optimize_skip_unused_shards {#allow_nondeterministic_optimize_skip_unused_shards}
-
-Type: Bool
-
-Default value: 0
-
-Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats with updates) functions in sharding key.
-
-Possible values:
-
-- 0 — Disallowed.
-- 1 — Allowed.
-
-## allow_prefetched_read_pool_for_local_filesystem {#allow_prefetched_read_pool_for_local_filesystem}
-
-Type: Bool
-
-Default value: 0
-
-Prefer prefetched threadpool if all parts are on local filesystem
-
-## allow_prefetched_read_pool_for_remote_filesystem {#allow_prefetched_read_pool_for_remote_filesystem}
-
-Type: Bool
-
-Default value: 1
-
-Prefer prefetched threadpool if all parts are on remote filesystem
-
-## allow_push_predicate_when_subquery_contains_with {#allow_push_predicate_when_subquery_contains_with}
-
-Type: Bool
-
-Default value: 1
-
-Allows push predicate when subquery contains WITH clause
-
-## allow_settings_after_format_in_insert {#allow_settings_after_format_in_insert}
-
-Type: Bool
-
-Default value: 0
-
-Control whether `SETTINGS` after `FORMAT` in `INSERT` queries is allowed or not. It is not recommended to use this, since this may interpret part of `SETTINGS` as values.
-
-Example:
-
-```sql
-INSERT INTO FUNCTION null('foo String') SETTINGS max_threads=1 VALUES ('bar');
-```
-
-But the following query will work only with `allow_settings_after_format_in_insert`:
-
-```sql
-SET allow_settings_after_format_in_insert=1;
-INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1;
-```
-
-Possible values:
-
-- 0 — Disallow.
-- 1 — Allow.
-
-:::note
-Use this setting only for backward compatibility if your use cases depend on old syntax.
-:::
-
-## allow_simdjson {#allow_simdjson}
-
-Type: Bool
-
-Default value: 1
-
-Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.
-
-## allow_statistics_optimize {#allow_statistics_optimize}
-
-Type: Bool
-
-Default value: 0
-
-Allows using statistics to optimize queries
-
-## allow_suspicious_codecs {#allow_suspicious_codecs}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, allow to specify meaningless compression codecs.
-
-## allow_suspicious_fixed_string_types {#allow_suspicious_fixed_string_types}
-
-Type: Bool
-
-Default value: 0
-
-In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates a misuse
-
-## allow_suspicious_indices {#allow_suspicious_indices}
-
-Type: Bool
-
-Default value: 0
-
-Reject primary/secondary indexes and sorting keys with identical expressions
-
-## allow_suspicious_low_cardinality_types {#allow_suspicious_low_cardinality_types}
-
-Type: Bool
-
-Default value: 0
-
-Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`.
-
-For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result:
-
-- Disk space usage can rise.
-- RAM consumption can be higher, depending on a dictionary size.
-- Some functions can work slower due to extra coding/encoding operations.
-
-Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above.
-
-Possible values:
-
-- 1 — Usage of `LowCardinality` is not restricted.
-- 0 — Usage of `LowCardinality` is restricted.
-
-## allow_suspicious_primary_key {#allow_suspicious_primary_key}
-
-Type: Bool
-
-Default value: 0
-
-Allow suspicious `PRIMARY KEY`/`ORDER BY` for MergeTree (i.e. SimpleAggregateFunction).
-
-## allow_suspicious_ttl_expressions {#allow_suspicious_ttl_expressions}
-
-Type: Bool
-
-Default value: 0
-
-Reject TTL expressions that don't depend on any of table's columns. It indicates a user error most of the time.
-
-## allow_suspicious_variant_types {#allow_suspicious_variant_types}
-
-Type: Bool
-
-Default value: 0
-
-In CREATE TABLE statement allows specifying Variant type with similar variant types (for example, with different numeric or date types). Enabling this setting may introduce some ambiguity when working with values with similar types.
-
-## allow_unrestricted_reads_from_keeper {#allow_unrestricted_reads_from_keeper}
-
-Type: Bool
-
-Default value: 0
-
-Allow unrestricted (without condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper
-
-## alter_move_to_space_execute_async {#alter_move_to_space_execute_async}
-
-Type: Bool
-
-Default value: 0
-
-Execute ALTER TABLE MOVE ... TO [DISK|VOLUME] asynchronously
-
-## alter_partition_verbose_result {#alter_partition_verbose_result}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied.
-Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md/#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition).
-
-Possible values:
-
-- 0 — disable verbosity.
-- 1 — enable verbosity.
-
-**Example**
-
-```sql
-CREATE TABLE test(a Int64, d Date, s String) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY a;
-INSERT INTO test VALUES(1, '2021-01-01', '');
-INSERT INTO test VALUES(1, '2021-01-01', '');
-ALTER TABLE test DETACH PARTITION ID '202101';
-
-ALTER TABLE test ATTACH PARTITION ID '202101' SETTINGS alter_partition_verbose_result = 1;
-
-┌─command_type─────┬─partition_id─┬─part_name────┬─old_part_name─┐
-│ ATTACH PARTITION │ 202101       │ 202101_7_7_0 │ 202101_5_5_0  │
-│ ATTACH PARTITION │ 202101       │ 202101_8_8_0 │ 202101_6_6_0  │
-└──────────────────┴──────────────┴──────────────┴───────────────┘
-
-ALTER TABLE test FREEZE SETTINGS alter_partition_verbose_result = 1;
-
-┌─command_type─┬─partition_id─┬─part_name────┬─backup_name─┬─backup_path───────────────────┬─part_backup_path────────────────────────────────────────────┐
-│ FREEZE ALL   │ 202101       │ 202101_7_7_0 │ 8           │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_7_7_0 │
-│ FREEZE ALL   │ 202101       │ 202101_8_8_0 │ 8           │ /var/lib/clickhouse/shadow/8/ │ /var/lib/clickhouse/shadow/8/data/default/test/202101_8_8_0 │
-└──────────────┴──────────────┴──────────────┴─────────────┴───────────────────────────────┴─────────────────────────────────────────────────────────────┘
-```
-
-## alter_sync {#alter_sync}
-
-Type: UInt64
-
-Default value: 1
-
-Allows to set up waiting for actions to be executed on replicas by [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries.
-
-Possible values:
-
-- 0 — Do not wait.
-- 1 — Wait for own execution.
-- 2 — Wait for everyone.
-
-Cloud default value: `0`.
-
-:::note
-`alter_sync` is applicable to `Replicated` tables only, it does nothing to alters of not `Replicated` tables.
-:::
-
-## analyze_index_with_space_filling_curves {#analyze_index_with_space_filling_curves}
-
-Type: Bool
-
-Default value: 1
-
-If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)` or `ORDER BY hilbertEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
-
-## analyzer_compatibility_join_using_top_level_identifier {#analyzer_compatibility_join_using_top_level_identifier}
-
-Type: Bool
-
-Default value: 0
-
-Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).
-
-## any_join_distinct_right_table_keys {#any_join_distinct_right_table_keys}
-
-Type: Bool
-
-Default value: 0
-
-Enables legacy ClickHouse server behaviour in `ANY INNER|LEFT JOIN` operations.
-
-:::note
-Use this setting only for backward compatibility if your use cases depend on legacy `JOIN` behaviour.
-:::
-
-When the legacy behaviour is enabled:
-
-- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping.
-- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do.
-
-When the legacy behaviour is disabled:
-
-- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations.
-- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables.
-
-Possible values:
-
-- 0 — Legacy behaviour is disabled.
-- 1 — Legacy behaviour is enabled.
-
-See also:
-
-- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings)
-
-## apply_deleted_mask {#apply_deleted_mask}
-
-Type: Bool
-
-Default value: 1
-
-Enables filtering out rows deleted with lightweight DELETE. If disabled, a query will be able to read those rows. This is useful for debugging and \"undelete\" scenarios
-
-## apply_mutations_on_fly {#apply_mutations_on_fly}
-
-Type: Bool
-
-Default value: 0
-
-If true, mutations (UPDATEs and DELETEs) which are not materialized in data part will be applied on SELECTs. Only available in ClickHouse Cloud.
-
-## asterisk_include_alias_columns {#asterisk_include_alias_columns}
-
-Type: Bool
-
-Default value: 0
-
-Include [ALIAS](../../sql-reference/statements/create/table.md#alias) columns for wildcard query (`SELECT *`).
-
-Possible values:
-
-- 0 - disabled
-- 1 - enabled
-
-## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
-
-Type: Bool
-
-Default value: 0
-
-Include [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) columns for wildcard query (`SELECT *`).
-
-Possible values:
-
-- 0 - disabled
-- 1 - enabled
-
-## async_insert {#async_insert}
-
-Type: Bool
-
-Default value: 0
-
-If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table
-
-## async_insert_busy_timeout_decrease_rate {#async_insert_busy_timeout_decrease_rate}
-
-Type: Double
-
-Default value: 0.2
-
-The exponential growth rate at which the adaptive asynchronous insert timeout decreases
-
-## async_insert_busy_timeout_increase_rate {#async_insert_busy_timeout_increase_rate}
-
-Type: Double
-
-Default value: 0.2
-
-The exponential growth rate at which the adaptive asynchronous insert timeout increases
-
-## async_insert_busy_timeout_max_ms {#async_insert_busy_timeout_max_ms}
-
-Type: Milliseconds
-
-Default value: 200
-
-Maximum time to wait before dumping collected data per query since the first data appeared.
-
-## async_insert_busy_timeout_min_ms {#async_insert_busy_timeout_min_ms}
-
-Type: Milliseconds
-
-Default value: 50
-
-If auto-adjusting is enabled through async_insert_use_adaptive_busy_timeout, minimum time to wait before dumping collected data per query since the first data appeared. It also serves as the initial value for the adaptive algorithm
-
-## async_insert_deduplicate {#async_insert_deduplicate}
-
-Type: Bool
-
-Default value: 0
-
-For async INSERT queries in the replicated table, specifies that deduplication of inserting blocks should be performed
-
-## async_insert_max_data_size {#async_insert_max_data_size}
-
-Type: UInt64
-
-Default value: 10485760
-
-Maximum size in bytes of unparsed data collected per query before being inserted
-
-## async_insert_max_query_number {#async_insert_max_query_number}
-
-Type: UInt64
-
-Default value: 450
-
-Maximum number of insert queries before being inserted
-
-## async_insert_poll_timeout_ms {#async_insert_poll_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 10
-
-Timeout for polling data from asynchronous insert queue
-
-## async_insert_use_adaptive_busy_timeout {#async_insert_use_adaptive_busy_timeout}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, use adaptive busy timeout for asynchronous inserts
-
-## async_query_sending_for_remote {#async_query_sending_for_remote}
-
-Type: Bool
-
-Default value: 1
-
-Enables asynchronous connection creation and query sending while executing remote query.
-
-Enabled by default.
-
-## async_socket_for_remote {#async_socket_for_remote}
-
-Type: Bool
-
-Default value: 1
-
-Enables asynchronous read from socket while executing remote query.
-
-Enabled by default.
-
-## azure_allow_parallel_part_upload {#azure_allow_parallel_part_upload}
-
-Type: Bool
-
-Default value: 1
-
-Use multiple threads for azure multipart upload.
-
-## azure_create_new_file_on_insert {#azure_create_new_file_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables creating a new file on each insert in azure engine tables
-
-## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist}
-
-Type: Bool
-
-Default value: 0
-
-Ignore absence of file if it does not exist when reading certain keys.
-
-Possible values:
-- 1 — `SELECT` returns empty result.
-- 0 — `SELECT` throws an exception.
-
-## azure_list_object_keys_size {#azure_list_object_keys_size}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum number of files that could be returned in batch by ListObject request
-
-## azure_max_blocks_in_multipart_upload {#azure_max_blocks_in_multipart_upload}
-
-Type: UInt64
-
-Default value: 50000
-
-Maximum number of blocks in multipart upload for Azure.
-
-## azure_max_inflight_parts_for_one_file {#azure_max_inflight_parts_for_one_file}
-
-Type: UInt64
-
-Default value: 20
-
-The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.
-
-## azure_max_single_part_copy_size {#azure_max_single_part_copy_size}
-
-Type: UInt64
-
-Default value: 268435456
-
-The maximum size of object to copy using single part copy to Azure blob storage.
-
-## azure_max_single_part_upload_size {#azure_max_single_part_upload_size}
-
-Type: UInt64
-
-Default value: 104857600
-
-The maximum size of object to upload using singlepart upload to Azure blob storage.
-
-## azure_max_single_read_retries {#azure_max_single_read_retries}
-
-Type: UInt64
-
-Default value: 4
-
-The maximum number of retries during single Azure blob storage read.
-
-## azure_max_unexpected_write_error_retries {#azure_max_unexpected_write_error_retries}
-
-Type: UInt64
-
-Default value: 4
-
-The maximum number of retries in case of unexpected errors during Azure blob storage write
-
-## azure_max_upload_part_size {#azure_max_upload_part_size}
-
-Type: UInt64
-
-Default value: 5368709120
-
-The maximum size of part to upload during multipart upload to Azure blob storage.
-
-## azure_min_upload_part_size {#azure_min_upload_part_size}
-
-Type: UInt64
-
-Default value: 16777216
-
-The minimum size of part to upload during multipart upload to Azure blob storage.
-
-## azure_sdk_max_retries {#azure_sdk_max_retries}
-
-Type: UInt64
-
-Default value: 10
-
-Maximum number of retries in azure sdk
-
-## azure_sdk_retry_initial_backoff_ms {#azure_sdk_retry_initial_backoff_ms}
-
-Type: UInt64
-
-Default value: 10
-
-Minimal backoff between retries in azure sdk
-
-## azure_sdk_retry_max_backoff_ms {#azure_sdk_retry_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximal backoff between retries in azure sdk
-
-## azure_skip_empty_files {#azure_skip_empty_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping empty files in S3 engine.
-
-Possible values:
-- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
-- 1 — `SELECT` returns empty result for empty file.
-
-## azure_strict_upload_part_size {#azure_strict_upload_part_size}
-
-Type: UInt64
-
-Default value: 0
-
-The exact size of part to upload during multipart upload to Azure blob storage.
-
-## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match}
-
-Type: Bool
-
-Default value: 0
-
-Throw an error if matched zero files according to glob expansion rules.
-
-Possible values:
-- 1 — `SELECT` throws an exception.
-- 0 — `SELECT` returns empty result.
-
-## azure_truncate_on_insert {#azure_truncate_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables truncate before insert in azure engine tables.
-
-## azure_upload_part_size_multiply_factor {#azure_upload_part_size_multiply_factor}
-
-Type: UInt64
-
-Default value: 2
-
-Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.
-
-## azure_upload_part_size_multiply_parts_count_threshold {#azure_upload_part_size_multiply_parts_count_threshold}
-
-Type: UInt64
-
-Default value: 500
-
-Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.
-
-## backup_restore_batch_size_for_keeper_multi {#backup_restore_batch_size_for_keeper_multi}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum size of batch for multi request to [Zoo]Keeper during backup or restore
-
-## backup_restore_batch_size_for_keeper_multiread {#backup_restore_batch_size_for_keeper_multiread}
-
-Type: UInt64
-
-Default value: 10000
-
-Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore
-
-## backup_restore_keeper_fault_injection_probability {#backup_restore_keeper_fault_injection_probability}
-
-Type: Float
-
-Default value: 0
-
-Approximate probability of failure for a keeper request during backup or restore. Valid value is in interval [0.0f, 1.0f]
-
-## backup_restore_keeper_fault_injection_seed {#backup_restore_keeper_fault_injection_seed}
-
-Type: UInt64
-
-Default value: 0
-
-0 - random seed, otherwise the setting value
-
-## backup_restore_keeper_max_retries {#backup_restore_keeper_max_retries}
-
-Type: UInt64
-
-Default value: 20
-
-Max retries for keeper operations during backup or restore
-
-## backup_restore_keeper_retry_initial_backoff_ms {#backup_restore_keeper_retry_initial_backoff_ms}
-
-Type: UInt64
-
-Default value: 100
-
-Initial backoff timeout for [Zoo]Keeper operations during backup or restore
-
-## backup_restore_keeper_retry_max_backoff_ms {#backup_restore_keeper_retry_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 5000
-
-Max backoff timeout for [Zoo]Keeper operations during backup or restore
-
-## backup_restore_keeper_value_max_size {#backup_restore_keeper_value_max_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-Maximum size of data of a [Zoo]Keeper's node during backup
-
-## backup_restore_s3_retry_attempts {#backup_restore_s3_retry_attempts}
-
-Type: UInt64
-
-Default value: 1000
-
-Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.
-
-## cache_warmer_threads {#cache_warmer_threads}
-
-Type: UInt64
-
-Default value: 4
-
-Only available in ClickHouse Cloud. Number of background threads for speculatively downloading new data parts into file cache, when cache_populated_by_fetch is enabled. Zero to disable.
-
-## calculate_text_stack_trace {#calculate_text_stack_trace}
-
-Type: Bool
-
-Default value: 1
-
-Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when a huge amount of wrong queries are executed. In normal cases, you should not disable this option.
-
-## cancel_http_readonly_queries_on_client_close {#cancel_http_readonly_queries_on_client_close}
-
-Type: Bool
-
-Default value: 0
-
-Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response.
-
-Cloud default value: `1`.
-
-## cast_ipv4_ipv6_default_on_conversion_error {#cast_ipv4_ipv6_default_on_conversion_error}
-
-Type: Bool
-
-Default value: 0
-
-CAST operator into IPv4, CAST operator into IPV6 type, toIPv4, toIPv6 functions will return default value instead of throwing exception on conversion error.
-
-## cast_keep_nullable {#cast_keep_nullable}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md/#castx-t) operations.
-
-When the setting is enabled and the argument of `CAST` function is `Nullable`, the result is also transformed to `Nullable` type. When the setting is disabled, the result always has the destination type exactly.
-
-Possible values:
-
-- 0 — The `CAST` result has exactly the destination type specified.
-- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`.
-
-**Examples**
-
-The following query results in the destination data type exactly:
-
-```sql
-SET cast_keep_nullable = 0;
-SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
-```
-
-Result:
-
-```text
-┌─x─┬─toTypeName(CAST(toNullable(toInt32(0)), 'Int32'))─┐
-│ 0 │ Int32                                             │
-└───┴───────────────────────────────────────────────────┘
-```
-
-The following query results in the `Nullable` modification on the destination data type:
-
-```sql
-SET cast_keep_nullable = 1;
-SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x);
-```
-
-Result:
-
-```text
-┌─x─┬─toTypeName(CAST(toNullable(toInt32(0)), 'Int32'))─┐
-│ 0 │ Nullable(Int32)                                   │
-└───┴───────────────────────────────────────────────────┘
-```
-
-**See Also**
-
-- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function
-
-## cast_string_to_dynamic_use_inference {#cast_string_to_dynamic_use_inference}
-
-Type: Bool
-
-Default value: 0
-
-Use types inference during String to Dynamic conversion
-
-## check_query_single_value_result {#check_query_single_value_result}
-
-Type: Bool
-
-Default value: 1
-
-Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements/check-table.md/#checking-mergetree-tables) query result for `MergeTree` family engines .
-
-Possible values:
-
-- 0 — the query shows a check status for every individual data part of a table.
-- 1 — the query shows the general table check status.
-
-## check_referential_table_dependencies {#check_referential_table_dependencies}
-
-Type: Bool
-
-Default value: 0
-
-Check that DDL query (such as DROP TABLE or RENAME) will not break referential dependencies
-
-## check_table_dependencies {#check_table_dependencies}
-
-Type: Bool
-
-Default value: 1
-
-Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies
-
-## checksum_on_read {#checksum_on_read}
-
-Type: Bool
-
-Default value: 1
-
-Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting is only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over the network.
-
-## cloud_mode {#cloud_mode}
-
-Type: Bool
-
-Default value: 0
-
-Cloud mode
-
-## cloud_mode_database_engine {#cloud_mode_database_engine}
-
-Type: UInt64
-
-Default value: 1
-
-The database engine allowed in Cloud. 1 - rewrite DDLs to use Replicated database, 2 - rewrite DDLs to use Shared database
-
-## cloud_mode_engine {#cloud_mode_engine}
-
-Type: UInt64
-
-Default value: 1
-
-The engine family allowed in Cloud. 0 - allow everything, 1 - rewrite DDLs to use *ReplicatedMergeTree, 2 - rewrite DDLs to use SharedMergeTree. UInt64 to minimize public part
-
-## cluster_for_parallel_replicas {#cluster_for_parallel_replicas}
-
-Type: String
-
-Default value:
-
-Cluster for a shard in which current server is located
-
-## collect_hash_table_stats_during_aggregation {#collect_hash_table_stats_during_aggregation}
-
-Type: Bool
-
-Default value: 1
-
-Enable collecting hash table statistics to optimize memory allocation
-
-## collect_hash_table_stats_during_joins {#collect_hash_table_stats_during_joins}
-
-Type: Bool
-
-Default value: 1
-
-Enable collecting hash table statistics to optimize memory allocation
-
-## compatibility {#compatibility}
-
-Type: String
-
-Default value:
-
-The `compatibility` setting causes ClickHouse to use the default settings of a previous version of ClickHouse, where the previous version is provided as the setting.
-
-If settings are set to non-default values, then those settings are honored (only settings that have not been modified are affected by the `compatibility` setting).
-
-This setting takes a ClickHouse version number as a string, like `22.3`, `22.8`. An empty value means that this setting is disabled.
-
-Disabled by default.
-
-:::note
-In ClickHouse Cloud the compatibility setting must be set by ClickHouse Cloud support.  Please [open a case](https://clickhouse.cloud/support) to have it set.
-:::
-
-## compatibility_ignore_auto_increment_in_create_table {#compatibility_ignore_auto_increment_in_create_table}
-
-Type: Bool
-
-Default value: 0
-
-Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL
-
-## compatibility_ignore_collation_in_create_table {#compatibility_ignore_collation_in_create_table}
-
-Type: Bool
-
-Default value: 1
-
-Compatibility ignore collation in create table
-
-## compile_aggregate_expressions {#compile_aggregate_expressions}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables JIT-compilation of aggregate functions to native code. Enabling this setting can improve the performance.
-
-Possible values:
-
-- 0 — Aggregation is done without JIT compilation.
-- 1 — Aggregation is done using JIT compilation.
-
-**See Also**
-
-- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression)
-
-## compile_expressions {#compile_expressions}
-
-Type: Bool
-
-Default value: 0
-
-Compile some scalar functions and operators to native code. Due to a bug in the LLVM compiler infrastructure, on AArch64 machines, it is known to lead to a nullptr dereference and, consequently, server crash. Do not enable this setting.
-
-## compile_sort_description {#compile_sort_description}
-
-Type: Bool
-
-Default value: 1
-
-Compile sort description to native code.
-
-## connect_timeout {#connect_timeout}
-
-Type: Seconds
-
-Default value: 10
-
-Connection timeout if there are no replicas.
-
-## connect_timeout_with_failover_ms {#connect_timeout_with_failover_ms}
-
-Type: Milliseconds
-
-Default value: 1000
-
-The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the ‘shard’ and ‘replica’ sections are used in the cluster definition.
-If unsuccessful, several attempts are made to connect to various replicas.
-
-## connect_timeout_with_failover_secure_ms {#connect_timeout_with_failover_secure_ms}
-
-Type: Milliseconds
-
-Default value: 1000
-
-Connection timeout for selecting first healthy replica (for secure connections).
-
-## connection_pool_max_wait_ms {#connection_pool_max_wait_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-The wait time in milliseconds for a connection when the connection pool is full.
-
-Possible values:
-
-- Positive integer.
-- 0 — Infinite timeout.
-
-## connections_with_failover_max_tries {#connections_with_failover_max_tries}
-
-Type: UInt64
-
-Default value: 3
-
-The maximum number of connection attempts with each replica for the Distributed table engine.
-
-## convert_query_to_cnf {#convert_query_to_cnf}
-
-Type: Bool
-
-Default value: 0
-
-When set to `true`, a `SELECT` query will be converted to conjuctive normal form (CNF). There are scenarios where rewriting a query in CNF may execute faster (view this [Github issue](https://github.com/ClickHouse/ClickHouse/issues/11749) for an explanation).
-
-For example, notice how the following `SELECT` query is not modified (the default behavior):
-
-```sql
-EXPLAIN SYNTAX
-SELECT *
-FROM
-(
-    SELECT number AS x
-    FROM numbers(20)
-) AS a
-WHERE ((x >= 1) AND (x <= 5)) OR ((x >= 10) AND (x <= 15))
-SETTINGS convert_query_to_cnf = false;
-```
-
-The result is:
-
-```response
-┌─explain────────────────────────────────────────────────────────┐
-│ SELECT x                                                       │
-│ FROM                                                           │
-│ (                                                              │
-│     SELECT number AS x                                         │
-│     FROM numbers(20)                                           │
-│     WHERE ((x >= 1) AND (x <= 5)) OR ((x >= 10) AND (x <= 15)) │
-│ ) AS a                                                         │
-│ WHERE ((x >= 1) AND (x <= 5)) OR ((x >= 10) AND (x <= 15))     │
-│ SETTINGS convert_query_to_cnf = 0                              │
-└────────────────────────────────────────────────────────────────┘
-```
-
-Let's set `convert_query_to_cnf` to `true` and see what changes:
-
-```sql
-EXPLAIN SYNTAX
-SELECT *
-FROM
-(
-    SELECT number AS x
-    FROM numbers(20)
-) AS a
-WHERE ((x >= 1) AND (x <= 5)) OR ((x >= 10) AND (x <= 15))
-SETTINGS convert_query_to_cnf = true;
-```
-
-Notice the `WHERE` clause is rewritten in CNF, but the result set is the identical - the Boolean logic is unchanged:
-
-```response
-┌─explain───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ SELECT x                                                                                                              │
-│ FROM                                                                                                                  │
-│ (                                                                                                                     │
-│     SELECT number AS x                                                                                                │
-│     FROM numbers(20)                                                                                                  │
-│     WHERE ((x <= 15) OR (x <= 5)) AND ((x <= 15) OR (x >= 1)) AND ((x >= 10) OR (x <= 5)) AND ((x >= 10) OR (x >= 1)) │
-│ ) AS a                                                                                                                │
-│ WHERE ((x >= 10) OR (x >= 1)) AND ((x >= 10) OR (x <= 5)) AND ((x <= 15) OR (x >= 1)) AND ((x <= 15) OR (x <= 5))     │
-│ SETTINGS convert_query_to_cnf = 1                                                                                     │
-└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
-Possible values: true, false
-
-## count_distinct_implementation {#count_distinct_implementation}
-
-Type: String
-
-Default value: uniqExact
-
-Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT ...)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.
-
-Possible values:
-
-- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq)
-- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined)
-- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64)
-- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12)
-- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact)
-
-## count_distinct_optimization {#count_distinct_optimization}
-
-Type: Bool
-
-Default value: 0
-
-Rewrite count distinct to subquery of group by
-
-## create_if_not_exists {#create_if_not_exists}
-
-Type: Bool
-
-Default value: 0
-
-Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
-
-## create_index_ignore_unique {#create_index_ignore_unique}
-
-Type: Bool
-
-Default value: 0
-
-Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.
-
-## create_replicated_merge_tree_fault_injection_probability {#create_replicated_merge_tree_fault_injection_probability}
-
-Type: Float
-
-Default value: 0
-
-The probability of a fault injection during table creation after creating metadata in ZooKeeper
-
-## create_table_empty_primary_key_by_default {#create_table_empty_primary_key_by_default}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified
-
-## cross_join_min_bytes_to_compress {#cross_join_min_bytes_to_compress}
-
-Type: UInt64
-
-Default value: 1073741824
-
-Minimal size of block to compress in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
-
-## cross_join_min_rows_to_compress {#cross_join_min_rows_to_compress}
-
-Type: UInt64
-
-Default value: 10000000
-
-Minimal count of rows to compress block in CROSS JOIN. Zero value means - disable this threshold. This block is compressed when any of the two thresholds (by rows or by bytes) are reached.
-
-## data_type_default_nullable {#data_type_default_nullable}
-
-Type: Bool
-
-Default value: 0
-
-Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable).
-
-Possible values:
-
-- 1 — The data types in column definitions are set to `Nullable` by default.
-- 0 — The data types in column definitions are set to not `Nullable` by default.
-
-## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
-
-Type: Bool
-
-Default value: 0
-
-Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.
-
-Possible values:
-
-- 0 — Queries will be executed with delay.
-- 1 — Queries will be executed without delay.
-
-## database_replicated_allow_explicit_uuid {#database_replicated_allow_explicit_uuid}
-
-Type: UInt64
-
-Default value: 0
-
-0 - Don't allow to explicitly specify UUIDs for tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified UUID and generate a random one instead.
-
-## database_replicated_allow_heavy_create {#database_replicated_allow_heavy_create}
-
-Type: Bool
-
-Default value: 0
-
-Allow long-running DDL queries (CREATE AS SELECT and POPULATE) in Replicated database engine. Note that it can block DDL queue for a long time.
-
-## database_replicated_allow_only_replicated_engine {#database_replicated_allow_only_replicated_engine}
-
-Type: Bool
-
-Default value: 0
-
-Allow to create only Replicated tables in database with engine Replicated
-
-## database_replicated_allow_replicated_engine_arguments {#database_replicated_allow_replicated_engine_arguments}
-
-Type: UInt64
-
-Default value: 0
-
-0 - Don't allow to explicitly specify ZooKeeper path and replica name for *MergeTree tables in Replicated databases. 1 - Allow. 2 - Allow, but ignore the specified path and use default one instead. 3 - Allow and don't log a warning.
-
-## database_replicated_always_detach_permanently {#database_replicated_always_detach_permanently}
-
-Type: Bool
-
-Default value: 0
-
-Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated
-
-## database_replicated_enforce_synchronous_settings {#database_replicated_enforce_synchronous_settings}
-
-Type: Bool
-
-Default value: 0
-
-Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.
-
-## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec}
-
-Type: UInt64
-
-Default value: 300
-
-Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds.
-
-Possible values:
-
-- Positive integer.
-- 0 — Unlimited.
-
-## decimal_check_overflow {#decimal_check_overflow}
-
-Type: Bool
-
-Default value: 1
-
-Check overflow of decimal arithmetic/comparison operations
-
-## deduplicate_blocks_in_dependent_materialized_views {#deduplicate_blocks_in_dependent_materialized_views}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables.
-
-Possible values:
-
-      0 — Disabled.
-      1 — Enabled.
-
-Usage
-
-By default, deduplication is not performed for materialized views but is done upstream, in the source table.
-If an INSERTed block is skipped due to deduplication in the source table, there will be no insertion into attached materialized views. This behaviour exists to enable the insertion of highly aggregated data into materialized views, for cases where inserted blocks are the same after materialized view aggregation but derived from different INSERTs into the source table.
-At the same time, this behaviour “breaks” `INSERT` idempotency. If an `INSERT` into the main table was successful and `INSERT` into a materialized view failed (e.g. because of communication failure with ClickHouse Keeper) a client will get an error and can retry the operation. However, the materialized view won’t receive the second insert because it will be discarded by deduplication in the main (source) table. The setting `deduplicate_blocks_in_dependent_materialized_views` allows for changing this behaviour. On retry, a materialized view will receive the repeat insert and will perform a deduplication check by itself,
-ignoring check result for the source table, and will insert rows lost because of the first failure.
-
-## default_materialized_view_sql_security {#default_materialized_view_sql_security}
-
-Type: SQLSecurityType
-
-Default value: DEFINER
-
-Allows to set a default value for SQL SECURITY option when creating a materialized view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
-
-The default value is `DEFINER`.
-
-## default_max_bytes_in_join {#default_max_bytes_in_join}
-
-Type: UInt64
-
-Default value: 1000000000
-
-Maximum size of right-side table if limit is required but max_bytes_in_join is not set.
-
-## default_normal_view_sql_security {#default_normal_view_sql_security}
-
-Type: SQLSecurityType
-
-Default value: INVOKER
-
-Allows to set default `SQL SECURITY` option while creating a normal view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
-
-The default value is `INVOKER`.
-
-## default_table_engine {#default_table_engine}
-
-Type: DefaultTableEngine
-
-Default value: MergeTree
-
-Default table engine to use when `ENGINE` is not set in a `CREATE` statement.
-
-Possible values:
-
-- a string representing any valid table engine name
-
-Cloud default value: `SharedMergeTree`.
-
-**Example**
-
-Query:
-
-```sql
-SET default_table_engine = 'Log';
-
-SELECT name, value, changed FROM system.settings WHERE name = 'default_table_engine';
-```
-
-Result:
-
-```response
-┌─name─────────────────┬─value─┬─changed─┐
-│ default_table_engine │ Log   │       1 │
-└──────────────────────┴───────┴─────────┘
-```
-
-In this example, any new table that does not specify an `Engine` will use the `Log` table engine:
-
-Query:
-
-```sql
-CREATE TABLE my_table (
-    x UInt32,
-    y UInt32
-);
-
-SHOW CREATE TABLE my_table;
-```
-
-Result:
-
-```response
-┌─statement────────────────────────────────────────────────────────────────┐
-│ CREATE TABLE default.my_table
-(
-    `x` UInt32,
-    `y` UInt32
-)
-ENGINE = Log
-└──────────────────────────────────────────────────────────────────────────┘
-```
-
-## default_temporary_table_engine {#default_temporary_table_engine}
-
-Type: DefaultTableEngine
-
-Default value: Memory
-
-Same as [default_table_engine](#default_table_engine) but for temporary tables.
-
-In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine:
-
-Query:
-
-```sql
-SET default_temporary_table_engine = 'Log';
-
-CREATE TEMPORARY TABLE my_table (
-    x UInt32,
-    y UInt32
-);
-
-SHOW CREATE TEMPORARY TABLE my_table;
-```
-
-Result:
-
-```response
-┌─statement────────────────────────────────────────────────────────────────┐
-│ CREATE TEMPORARY TABLE default.my_table
-(
-    `x` UInt32,
-    `y` UInt32
-)
-ENGINE = Log
-└──────────────────────────────────────────────────────────────────────────┘
-```
-
-## default_view_definer {#default_view_definer}
-
-Type: String
-
-Default value: CURRENT_USER
-
-Allows to set default `DEFINER` option while creating a view. [More about SQL security](../../sql-reference/statements/create/view.md#sql_security).
-
-The default value is `CURRENT_USER`.
-
-## describe_compact_output {#describe_compact_output}
-
-Type: Bool
-
-Default value: 0
-
-If true, include only column names and types into result of DESCRIBE query
-
-## describe_extend_object_types {#describe_extend_object_types}
-
-Type: Bool
-
-Default value: 0
-
-Deduce concrete type of columns of type Object in DESCRIBE query
-
-## describe_include_subcolumns {#describe_include_subcolumns}
-
-Type: Bool
-
-Default value: 0
-
-Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type.
-
-Possible values:
-
-- 0 — Subcolumns are not included in `DESCRIBE` queries.
-- 1 — Subcolumns are included in `DESCRIBE` queries.
-
-**Example**
-
-See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement.
-
-## describe_include_virtual_columns {#describe_include_virtual_columns}
-
-Type: Bool
-
-Default value: 0
-
-If true, virtual columns of table will be included into result of DESCRIBE query
-
-## dialect {#dialect}
-
-Type: Dialect
-
-Default value: clickhouse
-
-Which dialect will be used to parse query
-
-## dictionary_validate_primary_key_type {#dictionary_validate_primary_key_type}
-
-Type: Bool
-
-Default value: 0
-
-Validate primary key type for dictionaries. By default id type for simple layouts will be implicitly converted to UInt64.
-
-## distinct_overflow_mode {#distinct_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## distributed_aggregation_memory_efficient {#distributed_aggregation_memory_efficient}
-
-Type: Bool
-
-Default value: 1
-
-Is the memory-saving mode of distributed aggregation enabled.
-
-## distributed_background_insert_batch {#distributed_background_insert_batch}
-
-Type: Bool
-
-Default value: 0
-
-Enables/disables inserted data sending in batches.
-
-When batch sending is enabled, the [Distributed](../../engines/table-engines/special/distributed.md) table engine tries to send multiple files of inserted data in one operation instead of sending them separately. Batch sending improves cluster performance by better-utilizing server and network resources.
-
-Possible values:
-
-- 1 — Enabled.
-- 0 — Disabled.
-
-## distributed_background_insert_max_sleep_time_ms {#distributed_background_insert_max_sleep_time_ms}
-
-Type: Milliseconds
-
-Default value: 30000
-
-Maximum interval for the [Distributed](../../engines/table-engines/special/distributed.md) table engine to send data. Limits exponential growth of the interval set in the [distributed_background_insert_sleep_time_ms](#distributed_background_insert_sleep_time_ms) setting.
-
-Possible values:
-
-- A positive integer number of milliseconds.
-
-## distributed_background_insert_sleep_time_ms {#distributed_background_insert_sleep_time_ms}
-
-Type: Milliseconds
-
-Default value: 100
-
-Base interval for the [Distributed](../../engines/table-engines/special/distributed.md) table engine to send data. The actual interval grows exponentially in the event of errors.
-
-Possible values:
-
-- A positive integer number of milliseconds.
-
-## distributed_background_insert_split_batch_on_failure {#distributed_background_insert_split_batch_on_failure}
-
-Type: Bool
-
-Default value: 0
-
-Enables/disables splitting batches on failures.
-
-Sometimes sending particular batch to the remote shard may fail, because of some complex pipeline after (i.e. `MATERIALIZED VIEW` with `GROUP BY`) due to `Memory limit exceeded` or similar errors. In this case, retrying will not help (and this will stuck distributed sends for the table) but sending files from that batch one by one may succeed INSERT.
-
-So installing this setting to `1` will disable batching for such batches (i.e. temporary disables `distributed_background_insert_batch` for failed batches).
-
-Possible values:
-
-- 1 — Enabled.
-- 0 — Disabled.
-
-:::note
-This setting also affects broken batches (that may appears because of abnormal server (machine) termination and no `fsync_after_insert`/`fsync_directories` for [Distributed](../../engines/table-engines/special/distributed.md) table engine).
-:::
-
-:::note
-You should not rely on automatic batch splitting, since this may hurt performance.
-:::
-
-## distributed_background_insert_timeout {#distributed_background_insert_timeout}
-
-Type: UInt64
-
-Default value: 0
-
-Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.
-
-## distributed_cache_bypass_connection_pool {#distributed_cache_bypass_connection_pool}
-
-Type: Bool
-
-Default value: 0
-
-Only in ClickHouse Cloud. Allow to bypass distributed cache connection pool
-
-## distributed_cache_connect_max_tries {#distributed_cache_connect_max_tries}
-
-Type: UInt64
-
-Default value: 100
-
-Only in ClickHouse Cloud. Number of tries to connect to distributed cache if unsuccessful
-
-## distributed_cache_data_packet_ack_window {#distributed_cache_data_packet_ack_window}
-
-Type: UInt64
-
-Default value: 5
-
-Only in ClickHouse Cloud. A window for sending ACK for DataPacket sequence in a single distributed cache read request
-
-## distributed_cache_fetch_metrics_only_from_current_az {#distributed_cache_fetch_metrics_only_from_current_az}
-
-Type: Bool
-
-Default value: 1
-
-Only in ClickHouse Cloud. Fetch metrics only from current availability zone in system.distributed_cache_metrics, system.distributed_cache_events
-
-## distributed_cache_log_mode {#distributed_cache_log_mode}
-
-Type: DistributedCacheLogMode
-
-Default value: on_error
-
-Only in ClickHouse Cloud. Mode for writing to system.distributed_cache_log
-
-## distributed_cache_max_unacked_inflight_packets {#distributed_cache_max_unacked_inflight_packets}
-
-Type: UInt64
-
-Default value: 10
-
-Only in ClickHouse Cloud. A maximum number of unacknowledged in-flight packets in a single distributed cache read request
-
-## distributed_cache_pool_behaviour_on_limit {#distributed_cache_pool_behaviour_on_limit}
-
-Type: DistributedCachePoolBehaviourOnLimit
-
-Default value: allocate_bypassing_pool
-
-Only in ClickHouse Cloud. Identifies behaviour of distributed cache connection on pool limit reached
-
-## distributed_cache_read_alignment {#distributed_cache_read_alignment}
-
-Type: UInt64
-
-Default value: 0
-
-Only in ClickHouse Cloud. A setting for testing purposes, do not change it
-
-## distributed_cache_receive_response_wait_milliseconds {#distributed_cache_receive_response_wait_milliseconds}
-
-Type: UInt64
-
-Default value: 60000
-
-Only in ClickHouse Cloud. Wait time in milliseconds to receive data for request from distributed cache
-
-## distributed_cache_receive_timeout_milliseconds {#distributed_cache_receive_timeout_milliseconds}
-
-Type: UInt64
-
-Default value: 10000
-
-Only in ClickHouse Cloud. Wait time in milliseconds to receive any kind of response from distributed cache
-
-## distributed_cache_throw_on_error {#distributed_cache_throw_on_error}
-
-Type: Bool
-
-Default value: 0
-
-Only in ClickHouse Cloud. Rethrow exception happened during communication with distributed cache or exception received from distributed cache. Otherwise fallback to skipping distributed cache on error
-
-## distributed_cache_wait_connection_from_pool_milliseconds {#distributed_cache_wait_connection_from_pool_milliseconds}
-
-Type: UInt64
-
-Default value: 100
-
-Only in ClickHouse Cloud. Wait time in milliseconds to receive connection from connection pool if distributed_cache_pool_behaviour_on_limit is wait
-
-## distributed_connections_pool_size {#distributed_connections_pool_size}
-
-Type: UInt64
-
-Default value: 1024
-
-The maximum number of simultaneous connections with remote servers for distributed processing of all queries to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster.
-
-## distributed_ddl_entry_format_version {#distributed_ddl_entry_format_version}
-
-Type: UInt64
-
-Default value: 5
-
-Compatibility version of distributed DDL (ON CLUSTER) queries
-
-## distributed_ddl_output_mode {#distributed_ddl_output_mode}
-
-Type: DistributedDDLOutputMode
-
-Default value: throw
-
-Sets format of distributed DDL query result.
-
-Possible values:
-
-- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception.
-- `none` — Is similar to throw, but distributed DDL query returns no result set.
-- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts.
-- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts.
-- `none_only_active` - similar to `none`, but doesn't wait for inactive replicas of the `Replicated` database. Note: with this mode it's impossible to figure out that the query was not executed on some replica and will be executed in background.
-- `null_status_on_timeout_only_active` — similar to `null_status_on_timeout`, but doesn't wait for inactive replicas of the `Replicated` database
-- `throw_only_active` — similar to `throw`, but doesn't wait for inactive replicas of the `Replicated` database
-
-Cloud default value: `none`.
-
-## distributed_ddl_task_timeout {#distributed_ddl_task_timeout}
-
-Type: Int64
-
-Default value: 180
-
-Sets timeout for DDL query responses from all hosts in cluster. If a DDL request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.
-
-Possible values:
-
-- Positive integer.
-- 0 — Async mode.
-- Negative integer — infinite timeout.
-
-## distributed_foreground_insert {#distributed_foreground_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table.
-
-By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in background mode. When `distributed_foreground_insert=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).
-
-Possible values:
-
-- 0 — Data is inserted in background mode.
-- 1 — Data is inserted in synchronous mode.
-
-Cloud default value: `1`.
-
-**See Also**
-
-- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
-- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed)
-
-## distributed_group_by_no_merge {#distributed_group_by_no_merge}
-
-Type: UInt64
-
-Default value: 0
-
-Do not merge aggregation states from different servers for distributed query processing, you can use this in case it is for certain that there are different keys on different shards
-
-Possible values:
-
-- `0` — Disabled (final query processing is done on the initiator node).
-- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
-- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
-
-**Example**
-
-```sql
-SELECT *
-FROM remote('127.0.0.{2,3}', system.one)
-GROUP BY dummy
-LIMIT 1
-SETTINGS distributed_group_by_no_merge = 1
-FORMAT PrettyCompactMonoBlock
-
-┌─dummy─┐
-│     0 │
-│     0 │
-└───────┘
-```
-
-```sql
-SELECT *
-FROM remote('127.0.0.{2,3}', system.one)
-GROUP BY dummy
-LIMIT 1
-SETTINGS distributed_group_by_no_merge = 2
-FORMAT PrettyCompactMonoBlock
-
-┌─dummy─┐
-│     0 │
-└───────┘
-```
-
-## distributed_insert_skip_read_only_replicas {#distributed_insert_skip_read_only_replicas}
-
-Type: Bool
-
-Default value: 0
-
-Enables skipping read-only replicas for INSERT queries into Distributed.
-
-Possible values:
-
-- 0 — INSERT was as usual, if it will go to read-only replica it will fail
-- 1 — Initiator will skip read-only replicas before sending data to shards.
-
-## distributed_product_mode {#distributed_product_mode}
-
-Type: DistributedProductMode
-
-Default value: deny
-
-Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md).
-
-ClickHouse applies this setting when the query contains the product of distributed tables, i.e. when the query for a distributed table contains a non-GLOBAL subquery for the distributed table.
-
-Restrictions:
-
-- Only applied for IN and JOIN subqueries.
-- Only if the FROM section uses a distributed table containing more than one shard.
-- If the subquery concerns a distributed table containing more than one shard.
-- Not used for a table-valued [remote](../../sql-reference/table-functions/remote.md) function.
-
-Possible values:
-
-- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception).
-- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.`
-- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.`
-- `allow` — Allows the use of these types of subqueries.
-
-## distributed_push_down_limit {#distributed_push_down_limit}
-
-Type: UInt64
-
-Default value: 1
-
-Enables or disables [LIMIT](#limit) applying on each shard separately.
-
-This will allow to avoid:
-- Sending extra rows over network;
-- Processing rows behind the limit on the initiator.
-
-Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met:
-- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0.
-- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`.
-- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and:
-    - [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled.
-    - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-See also:
-
-- [distributed_group_by_no_merge](#distributed-group-by-no-merge)
-- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
-- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key)
-
-## distributed_replica_error_cap {#distributed_replica_error_cap}
-
-Type: UInt64
-
-Default value: 1000
-
-- Type: unsigned int
-- Default value: 1000
-
-The error count of each replica is capped at this value, preventing a single replica from accumulating too many errors.
-
-See also:
-
-- [load_balancing](#load_balancing-round_robin)
-- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
-- [distributed_replica_error_half_life](#distributed_replica_error_half_life)
-- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
-
-## distributed_replica_error_half_life {#distributed_replica_error_half_life}
-
-Type: Seconds
-
-Default value: 60
-
-- Type: seconds
-- Default value: 60 seconds
-
-Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after the last error.
-
-See also:
-
-- [load_balancing](#load_balancing-round_robin)
-- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
-- [distributed_replica_error_cap](#distributed_replica_error_cap)
-- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
-
-## distributed_replica_max_ignored_errors {#distributed_replica_max_ignored_errors}
-
-Type: UInt64
-
-Default value: 0
-
-- Type: unsigned int
-- Default value: 0
-
-The number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm).
-
-See also:
-
-- [load_balancing](#load_balancing-round_robin)
-- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
-- [distributed_replica_error_cap](#distributed_replica_error_cap)
-- [distributed_replica_error_half_life](#distributed_replica_error_half_life)
-
-## do_not_merge_across_partitions_select_final {#do_not_merge_across_partitions_select_final}
-
-Type: Bool
-
-Default value: 0
-
-Merge parts only in one partition in select final
-
-## empty_result_for_aggregation_by_constant_keys_on_empty_set {#empty_result_for_aggregation_by_constant_keys_on_empty_set}
-
-Type: Bool
-
-Default value: 1
-
-Return empty result when aggregating by constant keys on empty set.
-
-## empty_result_for_aggregation_by_empty_set {#empty_result_for_aggregation_by_empty_set}
-
-Type: Bool
-
-Default value: 0
-
-Return empty result when aggregating without keys on empty set.
-
-## enable_blob_storage_log {#enable_blob_storage_log}
-
-Type: Bool
-
-Default value: 1
-
-Write information about blob storage operations to system.blob_storage_log table
-
-## enable_deflate_qpl_codec {#enable_deflate_qpl_codec}
-
-Type: Bool
-
-Default value: 0
-
-If turned on, the DEFLATE_QPL codec may be used to compress columns.
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## enable_early_constant_folding {#enable_early_constant_folding}
-
-Type: Bool
-
-Default value: 1
-
-Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there
-
-## enable_extended_results_for_datetime_functions {#enable_extended_results_for_datetime_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables returning results of type:
-- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toLastDayOfWeek](../../sql-reference/functions/date-time-functions.md#tolastdayofweek) and [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday).
-- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
-
-Possible values:
-
-- 0 — Functions return `Date` or `DateTime` for all types of arguments.
-- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise.
-
-## enable_filesystem_cache {#enable_filesystem_cache}
-
-Type: Bool
-
-Default value: 1
-
-Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended
-
-## enable_filesystem_cache_log {#enable_filesystem_cache_log}
-
-Type: Bool
-
-Default value: 0
-
-Allows to record the filesystem caching log for each query
-
-## enable_filesystem_cache_on_write_operations {#enable_filesystem_cache_on_write_operations}
-
-Type: Bool
-
-Default value: 0
-
-Write into cache on write operations. To actually work this setting requires be added to disk config too
-
-## enable_filesystem_read_prefetches_log {#enable_filesystem_read_prefetches_log}
-
-Type: Bool
-
-Default value: 0
-
-Log to system.filesystem prefetch_log during query. Should be used only for testing or debugging, not recommended to be turned on by default
-
-## enable_global_with_statement {#enable_global_with_statement}
-
-Type: Bool
-
-Default value: 1
-
-Propagate WITH statements to UNION queries and all subqueries
-
-## enable_http_compression {#enable_http_compression}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables data compression in the response to an HTTP request.
-
-For more information, read the [HTTP interface description](../../interfaces/http.md).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## enable_job_stack_trace {#enable_job_stack_trace}
-
-Type: Bool
-
-Default value: 0
-
-Output stack trace of a job creator when job results in exception
-
-## enable_lightweight_delete {#enable_lightweight_delete}
-
-Type: Bool
-
-Default value: 1
-
-Enable lightweight DELETE mutations for mergetree tables.
-
-## enable_memory_bound_merging_of_aggregation_results {#enable_memory_bound_merging_of_aggregation_results}
-
-Type: Bool
-
-Default value: 1
-
-Enable memory bound merging strategy for aggregation.
-
-## enable_multiple_prewhere_read_steps {#enable_multiple_prewhere_read_steps}
-
-Type: Bool
-
-Default value: 1
-
-Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND
-
-## enable_named_columns_in_function_tuple {#enable_named_columns_in_function_tuple}
-
-Type: Bool
-
-Default value: 1
-
-Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers.
-
-## enable_optimize_predicate_expression {#enable_optimize_predicate_expression}
-
-Type: Bool
-
-Default value: 1
-
-Turns on predicate pushdown in `SELECT` queries.
-
-Predicate pushdown may significantly reduce network traffic for distributed queries.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-Usage
-
-Consider the following queries:
-
-1.  `SELECT count() FROM test_table WHERE date = '2018-10-10'`
-2.  `SELECT count() FROM (SELECT * FROM test_table) WHERE date = '2018-10-10'`
-
-If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal because ClickHouse applies `WHERE` to the subquery when processing it.
-
-If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes.
-
-## enable_optimize_predicate_expression_to_final_subquery {#enable_optimize_predicate_expression_to_final_subquery}
-
-Type: Bool
-
-Default value: 1
-
-Allow push predicate to final subquery.
-
-## enable_order_by_all {#enable_order_by_all}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables sorting with `ORDER BY ALL` syntax, see [ORDER BY](../../sql-reference/statements/select/order-by.md).
-
-Possible values:
-
-- 0 — Disable ORDER BY ALL.
-- 1 — Enable ORDER BY ALL.
-
-**Example**
-
-Query:
-
-```sql
-CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory();
-
-INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
-
-SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous
-
-SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all = 0;
-```
-
-Result:
-
-```text
-┌─C1─┬─C2─┬─ALL─┐
-│ 20 │ 20 │  10 │
-│ 30 │ 10 │  20 │
-│ 10 │ 20 │  30 │
-└────┴────┴─────┘
-```
-
-## enable_parsing_to_custom_serialization {#enable_parsing_to_custom_serialization}
-
-Type: Bool
-
-Default value: 1
-
-If true then data can be parsed directly to columns with custom serialization (e.g. Sparse) according to hints for serialization got from the table.
-
-## enable_positional_arguments {#enable_positional_arguments}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables supporting positional arguments for [GROUP BY](../../sql-reference/statements/select/group-by.md), [LIMIT BY](../../sql-reference/statements/select/limit-by.md), [ORDER BY](../../sql-reference/statements/select/order-by.md) statements.
-
-Possible values:
-
-- 0 — Positional arguments aren't supported.
-- 1 — Positional arguments are supported: column numbers can use instead of column names.
-
-**Example**
-
-Query:
-
-```sql
-CREATE TABLE positional_arguments(one Int, two Int, three Int) ENGINE=Memory();
-
-INSERT INTO positional_arguments VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20);
-
-SELECT * FROM positional_arguments ORDER BY 2,3;
-```
-
-Result:
-
-```text
-┌─one─┬─two─┬─three─┐
-│  30 │  10 │   20  │
-│  20 │  20 │   10  │
-│  10 │  20 │   30  │
-└─────┴─────┴───────┘
-```
-
-## enable_reads_from_query_cache {#enable_reads_from_query_cache}
-
-Type: Bool
-
-Default value: 1
-
-If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md).
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## enable_s3_requests_logging {#enable_s3_requests_logging}
-
-Type: Bool
-
-Default value: 0
-
-Enable very explicit logging of S3 requests. Makes sense for debug only.
-
-## enable_scalar_subquery_optimization {#enable_scalar_subquery_optimization}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.
-
-## enable_secure_identifiers {#enable_secure_identifiers}
-
-Type: Bool
-
-Default value: 0
-
-If enabled, only allow secure identifiers which contain only underscore and alphanumeric characters
-
-## enable_sharing_sets_for_mutations {#enable_sharing_sets_for_mutations}
-
-Type: Bool
-
-Default value: 1
-
-Allow sharing set objects build for IN subqueries between different tasks of the same mutation. This reduces memory usage and CPU consumption
-
-## enable_software_prefetch_in_aggregation {#enable_software_prefetch_in_aggregation}
-
-Type: Bool
-
-Default value: 1
-
-Enable use of software prefetch in aggregation
-
-## enable_unaligned_array_join {#enable_unaligned_array_join}
-
-Type: Bool
-
-Default value: 0
-
-Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.
-
-## enable_url_encoding {#enable_url_encoding}
-
-Type: Bool
-
-Default value: 1
-
-Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
-
-Enabled by default.
-
-## enable_vertical_final {#enable_vertical_final}
-
-Type: Bool
-
-Default value: 1
-
-If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows
-
-## enable_writes_to_query_cache {#enable_writes_to_query_cache}
-
-Type: Bool
-
-Default value: 1
-
-If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md).
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## enable_zstd_qat_codec {#enable_zstd_qat_codec}
-
-Type: Bool
-
-Default value: 0
-
-If turned on, the ZSTD_QAT codec may be used to compress columns.
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## engine_file_allow_create_multiple_files {#engine_file_allow_create_multiple_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
-
-`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query creates a new file.
-
-## engine_file_empty_if_not_exists {#engine_file_empty_if_not_exists}
-
-Type: Bool
-
-Default value: 0
-
-Allows to select data from a file engine table without file.
-
-Possible values:
-- 0 — `SELECT` throws exception.
-- 1 — `SELECT` returns empty result.
-
-## engine_file_skip_empty_files {#engine_file_skip_empty_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables.
-
-Possible values:
-- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
-- 1 — `SELECT` returns empty result for empty file.
-
-## engine_file_truncate_on_insert {#engine_file_truncate_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables truncate before insert in [File](../../engines/table-engines/special/file.md) engine tables.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query replaces existing content of the file with the new data.
-
-## engine_url_skip_empty_files {#engine_url_skip_empty_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
-
-Possible values:
-- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
-- 1 — `SELECT` returns empty result for empty file.
-
-## except_default_mode {#except_default_mode}
-
-Type: SetOperationMode
-
-Default value: ALL
-
-Set default mode in EXCEPT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception.
-
-## external_storage_connect_timeout_sec {#external_storage_connect_timeout_sec}
-
-Type: UInt64
-
-Default value: 10
-
-Connect timeout in seconds. Now supported only for MySQL
-
-## external_storage_max_read_bytes {#external_storage_max_read_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled
-
-## external_storage_max_read_rows {#external_storage_max_read_rows}
-
-Type: UInt64
-
-Default value: 0
-
-Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled
-
-## external_storage_rw_timeout_sec {#external_storage_rw_timeout_sec}
-
-Type: UInt64
-
-Default value: 300
-
-Read/write timeout in seconds. Now supported only for MySQL
-
-## external_table_functions_use_nulls {#external_table_functions_use_nulls}
-
-Type: Bool
-
-Default value: 1
-
-Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql](../../sql-reference/table-functions/postgresql.md) and [odbc](../../sql-reference/table-functions/odbc.md) table functions use Nullable columns.
-
-Possible values:
-
-- 0 — The table function explicitly uses Nullable columns.
-- 1 — The table function implicitly uses Nullable columns.
-
-**Usage**
-
-If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.
-
-## external_table_strict_query {#external_table_strict_query}
-
-Type: Bool
-
-Default value: 0
-
-If it is set to true, transforming expression to local filter is forbidden for queries to external tables.
-
-## extract_key_value_pairs_max_pairs_per_row {#extract_key_value_pairs_max_pairs_per_row}
-
-Type: UInt64
-
-Default value: 1000
-
-Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.
-
-## extremes {#extremes}
-
-Type: Bool
-
-Default value: 0
-
-Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled).
-For more information, see the section “Extreme values”.
-
-## fallback_to_stale_replicas_for_distributed_queries {#fallback_to_stale_replicas_for_distributed_queries}
-
-Type: Bool
-
-Default value: 1
-
-Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../engines/table-engines/mergetree-family/replication.md).
-
-ClickHouse selects the most relevant from the outdated replicas of the table.
-
-Used when performing `SELECT` from a distributed table that points to replicated tables.
-
-By default, 1 (enabled).
-
-## filesystem_cache_max_download_size {#filesystem_cache_max_download_size}
-
-Type: UInt64
-
-Default value: 137438953472
-
-Max remote filesystem cache size that can be downloaded by a single query
-
-## filesystem_cache_reserve_space_wait_lock_timeout_milliseconds {#filesystem_cache_reserve_space_wait_lock_timeout_milliseconds}
-
-Type: UInt64
-
-Default value: 1000
-
-Wait time to lock cache for space reservation in filesystem cache
-
-## filesystem_cache_segments_batch_size {#filesystem_cache_segments_batch_size}
-
-Type: UInt64
-
-Default value: 20
-
-Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache
-
-## filesystem_prefetch_max_memory_usage {#filesystem_prefetch_max_memory_usage}
-
-Type: UInt64
-
-Default value: 1073741824
-
-Maximum memory usage for prefetches.
-
-## filesystem_prefetch_step_bytes {#filesystem_prefetch_step_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task
-
-## filesystem_prefetch_step_marks {#filesystem_prefetch_step_marks}
-
-Type: UInt64
-
-Default value: 0
-
-Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task
-
-## filesystem_prefetches_limit {#filesystem_prefetches_limit}
-
-Type: UInt64
-
-Default value: 200
-
-Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches
-
-## final {#final}
-
-Type: Bool
-
-Default value: 0
-
-Automatically applies [FINAL](../../sql-reference/statements/select/from.md#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from.md#final-modifier) is applicable, including joined tables and tables in sub-queries, and
-distributed tables.
-
-Possible values:
-
-- 0 - disabled
-- 1 - enabled
-
-Example:
-
-```sql
-CREATE TABLE test
-(
-    key Int64,
-    some String
-)
-ENGINE = ReplacingMergeTree
-ORDER BY key;
-
-INSERT INTO test FORMAT Values (1, 'first');
-INSERT INTO test FORMAT Values (1, 'second');
-
-SELECT * FROM test;
-┌─key─┬─some───┐
-│   1 │ second │
-└─────┴────────┘
-┌─key─┬─some──┐
-│   1 │ first │
-└─────┴───────┘
-
-SELECT * FROM test SETTINGS final = 1;
-┌─key─┬─some───┐
-│   1 │ second │
-└─────┴────────┘
-
-SET final = 1;
-SELECT * FROM test;
-┌─key─┬─some───┐
-│   1 │ second │
-└─────┴────────┘
-```
-
-## flatten_nested {#flatten_nested}
-
-Type: Bool
-
-Default value: 1
-
-Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns.
-
-Possible values:
-
-- 1 — Nested column is flattened to separate arrays.
-- 0 — Nested column stays a single array of tuples.
-
-**Usage**
-
-If the setting is set to `0`, it is possible to use an arbitrary level of nesting.
-
-**Examples**
-
-Query:
-
-``` sql
-SET flatten_nested = 1;
-CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
-
-SHOW CREATE TABLE t_nest;
-```
-
-Result:
-
-``` text
-┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ CREATE TABLE default.t_nest
-(
-    `n.a` Array(UInt32),
-    `n.b` Array(UInt32)
-)
-ENGINE = MergeTree
-ORDER BY tuple()
-SETTINGS index_granularity = 8192 │
-└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
-Query:
-
-``` sql
-SET flatten_nested = 0;
-
-CREATE TABLE t_nest (`n` Nested(a UInt32, b UInt32)) ENGINE = MergeTree ORDER BY tuple();
-
-SHOW CREATE TABLE t_nest;
-```
-
-Result:
-
-``` text
-┌─statement──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ CREATE TABLE default.t_nest
-(
-    `n` Nested(a UInt32, b UInt32)
-)
-ENGINE = MergeTree
-ORDER BY tuple()
-SETTINGS index_granularity = 8192 │
-└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
-```
-
-## force_aggregate_partitions_independently {#force_aggregate_partitions_independently}
-
-Type: Bool
-
-Default value: 0
-
-Force the use of optimization when it is applicable, but heuristics decided not to use it
-
-## force_aggregation_in_order {#force_aggregation_in_order}
-
-Type: Bool
-
-Default value: 0
-
-The setting is used by the server itself to support distributed queries. Do not change it manually, because it will break normal operations. (Forces use of aggregation in order on remote nodes during distributed aggregation).
-
-## force_data_skipping_indices {#force_data_skipping_indices}
-
-Type: String
-
-Default value:
-
-Disables query execution if passed data skipping indices wasn't used.
-
-Consider the following example:
-
-```sql
-CREATE TABLE data
-(
-    key Int,
-    d1 Int,
-    d1_null Nullable(Int),
-    INDEX d1_idx d1 TYPE minmax GRANULARITY 1,
-    INDEX d1_null_idx assumeNotNull(d1_null) TYPE minmax GRANULARITY 1
-)
-Engine=MergeTree()
-ORDER BY key;
-
-SELECT * FROM data_01515;
-SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error.
-SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- query will produce INDEX_NOT_USED error.
-SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_idx'; -- Ok.
-SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`'; -- Ok (example of full featured parser).
-SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- query will produce INDEX_NOT_USED error, since d1_null_idx is not used.
-SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
-```
-
-## force_grouping_standard_compatibility {#force_grouping_standard_compatibility}
-
-Type: Bool
-
-Default value: 1
-
-Make GROUPING function to return 1 when argument is not used as an aggregation key
-
-## force_index_by_date {#force_index_by_date}
-
-Type: Bool
-
-Default value: 0
-
-Disables query execution if the index can’t be used by date.
-
-Works with tables in the MergeTree family.
-
-If `force_index_by_date=1`, ClickHouse checks whether the query has a date key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For example, the condition `Date != ' 2000-01-01 '` is acceptable even when it matches all the data in the table (i.e., running the query requires a full scan). For more information about ranges of data in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
-
-## force_optimize_projection {#force_optimize_projection}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting).
-
-Possible values:
-
-- 0 — Projection optimization is not obligatory.
-- 1 — Projection optimization is obligatory.
-
-## force_optimize_projection_name {#force_optimize_projection_name}
-
-Type: String
-
-Default value:
-
-If it is set to a non-empty string, check that this projection is used in the query at least once.
-
-Possible values:
-
-- string: name of projection that used in a query
-
-## force_optimize_skip_unused_shards {#force_optimize_skip_unused_shards}
-
-Type: UInt64
-
-Default value: 0
-
-Enables or disables query execution if [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
-
-Possible values:
-
-- 0 — Disabled. ClickHouse does not throw an exception.
-- 1 — Enabled. Query execution is disabled only if the table has a sharding key.
-- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table.
-
-## force_optimize_skip_unused_shards_nesting {#force_optimize_skip_unused_shards_nesting}
-
-Type: UInt64
-
-Default value: 0
-
-Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
-
-Possible values:
-
-- 0 - Disabled, `force_optimize_skip_unused_shards` works always.
-- 1 — Enables `force_optimize_skip_unused_shards` only for the first level.
-- 2 — Enables `force_optimize_skip_unused_shards` up to the second level.
-
-## force_primary_key {#force_primary_key}
-
-Type: Bool
-
-Default value: 0
-
-Disables query execution if indexing by the primary key is not possible.
-
-Works with tables in the MergeTree family.
-
-If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
-
-## force_remove_data_recursively_on_drop {#force_remove_data_recursively_on_drop}
-
-Type: Bool
-
-Default value: 0
-
-Recursively remove data on DROP query. Avoids 'Directory not empty' error, but may silently remove detached data
-
-## formatdatetime_f_prints_single_zero {#formatdatetime_f_prints_single_zero}
-
-Type: Bool
-
-Default value: 0
-
-Formatter '%f' in function 'formatDateTime()' prints a single zero instead of six zeros if the formatted value has no fractional seconds.
-
-## formatdatetime_format_without_leading_zeros {#formatdatetime_format_without_leading_zeros}
-
-Type: Bool
-
-Default value: 0
-
-Formatters '%c', '%l' and '%k' in function 'formatDateTime()' print months and hours without leading zeros.
-
-## formatdatetime_parsedatetime_m_is_month_name {#formatdatetime_parsedatetime_m_is_month_name}
-
-Type: Bool
-
-Default value: 1
-
-Formatter '%M' in functions 'formatDateTime()' and 'parseDateTime()' print/parse the month name instead of minutes.
-
-## fsync_metadata {#fsync_metadata}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/functions/fsync.html) when writing `.sql` files. Enabled by default.
-
-It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed.
-
-## function_implementation {#function_implementation}
-
-Type: String
-
-Default value:
-
-Choose function implementation for specific target or variant (experimental). If empty enable all of them.
-
-## function_json_value_return_type_allow_complex {#function_json_value_return_type_allow_complex}
-
-Type: Bool
-
-Default value: 0
-
-Control whether allow to return complex type (such as: struct, array, map) for json_value function.
-
-```sql
-SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_value_return_type_allow_complex=true
-
-┌─JSON_VALUE('{"hello":{"world":"!"}}', '$.hello')─┐
-│ {"world":"!"}                                    │
-└──────────────────────────────────────────────────┘
-
-1 row in set. Elapsed: 0.001 sec.
-```
-
-Possible values:
-
-- true — Allow.
-- false — Disallow.
-
-## function_json_value_return_type_allow_nullable {#function_json_value_return_type_allow_nullable}
-
-Type: Bool
-
-Default value: 0
-
-Control whether allow to return `NULL` when value is not exist for JSON_VALUE function.
-
-```sql
-SELECT JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_return_type_allow_nullable=true;
-
-┌─JSON_VALUE('{"hello":"world"}', '$.b')─┐
-│ ᴺᵁᴸᴸ                                   │
-└────────────────────────────────────────┘
-
-1 row in set. Elapsed: 0.001 sec.
-```
-
-Possible values:
-
-- true — Allow.
-- false — Disallow.
-
-## function_locate_has_mysql_compatible_argument_order {#function_locate_has_mysql_compatible_argument_order}
-
-Type: Bool
-
-Default value: 1
-
-Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate).
-
-Possible values:
-
-- 0 — Function `locate` accepts arguments `(haystack, needle[, start_pos])`.
-- 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior)
-
-## function_range_max_elements_in_block {#function_range_max_elements_in_block}
-
-Type: UInt64
-
-Default value: 500000000
-
-Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block).
-
-Possible values:
-
-- Positive integer.
-
-**See Also**
-
-- [max_block_size](#setting-max_block_size)
-- [min_insert_block_size_rows](#min-insert-block-size-rows)
-
-## function_sleep_max_microseconds_per_block {#function_sleep_max_microseconds_per_block}
-
-Type: UInt64
-
-Default value: 3000000
-
-Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.
-
-## function_visible_width_behavior {#function_visible_width_behavior}
-
-Type: UInt64
-
-Default value: 1
-
-The version of `visibleWidth` behavior. 0 - only count the number of code points; 1 - correctly count zero-width and combining characters, count full-width characters as two, estimate the tab width, count delete characters.
-
-## geo_distance_returns_float64_on_float64_arguments {#geo_distance_returns_float64_on_float64_arguments}
-
-Type: Bool
-
-Default value: 1
-
-If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.
-
-## glob_expansion_max_elements {#glob_expansion_max_elements}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum number of allowed addresses (For external storages, table functions, etc).
-
-## grace_hash_join_initial_buckets {#grace_hash_join_initial_buckets}
-
-Type: UInt64
-
-Default value: 1
-
-Initial number of grace hash join buckets
-
-## grace_hash_join_max_buckets {#grace_hash_join_max_buckets}
-
-Type: UInt64
-
-Default value: 1024
-
-Limit on the number of grace hash join buckets
-
-## group_by_overflow_mode {#group_by_overflow_mode}
-
-Type: OverflowModeGroupBy
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## group_by_two_level_threshold {#group_by_two_level_threshold}
-
-Type: UInt64
-
-Default value: 100000
-
-From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.
-
-## group_by_two_level_threshold_bytes {#group_by_two_level_threshold_bytes}
-
-Type: UInt64
-
-Default value: 50000000
-
-From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.
-
-## group_by_use_nulls {#group_by_use_nulls}
-
-Type: Bool
-
-Default value: 0
-
-Changes the way the [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) treats the types of aggregation keys.
-When the `ROLLUP`, `CUBE`, or `GROUPING SETS` specifiers are used, some aggregation keys may not be used to produce some result rows.
-Columns for these keys are filled with either default value or `NULL` in corresponding rows depending on this setting.
-
-Possible values:
-
-- 0 — The default value for the aggregation key type is used to produce missing values.
-- 1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it.
-
-See also:
-
-- [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md)
-
-## handshake_timeout_ms {#handshake_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 10000
-
-Timeout in milliseconds for receiving Hello packet from replicas during handshake.
-
-## hdfs_create_new_file_on_insert {#hdfs_create_new_file_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:
-
-initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query creates a new file.
-
-## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist}
-
-Type: Bool
-
-Default value: 0
-
-Ignore absence of file if it does not exist when reading certain keys.
-
-Possible values:
-- 1 — `SELECT` returns empty result.
-- 0 — `SELECT` throws an exception.
-
-## hdfs_replication {#hdfs_replication}
-
-Type: UInt64
-
-Default value: 0
-
-The actual number of replications can be specified when the hdfs file is created.
-
-## hdfs_skip_empty_files {#hdfs_skip_empty_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables.
-
-Possible values:
-- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
-- 1 — `SELECT` returns empty result for empty file.
-
-## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match}
-
-Type: Bool
-
-Default value: 0
-
-Throw an error if matched zero files according to glob expansion rules.
-
-Possible values:
-- 1 — `SELECT` throws an exception.
-- 0 — `SELECT` returns empty result.
-
-## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query replaces existing content of the file with the new data.
-
-## hedged_connection_timeout_ms {#hedged_connection_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 50
-
-Connection timeout for establishing connection with replica for Hedged requests
-
-## hsts_max_age {#hsts_max_age}
-
-Type: UInt64
-
-Default value: 0
-
-Expired time for HSTS. 0 means disable HSTS.
-
-## http_connection_timeout {#http_connection_timeout}
-
-Type: Seconds
-
-Default value: 1
-
-HTTP connection timeout (in seconds).
-
-Possible values:
-
-- Any positive integer.
-- 0 - Disabled (infinite timeout).
-
-## http_headers_progress_interval_ms {#http_headers_progress_interval_ms}
-
-Type: UInt64
-
-Default value: 100
-
-Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.
-
-## http_make_head_request {#http_make_head_request}
-
-Type: Bool
-
-Default value: 1
-
-The `http_make_head_request` setting allows the execution of a `HEAD` request while reading data from HTTP to retrieve information about the file to be read, such as its size. Since it's enabled by default, it may be desirable to disable this setting in cases where the server does not support `HEAD` requests.
-
-## http_max_field_name_size {#http_max_field_name_size}
-
-Type: UInt64
-
-Default value: 131072
-
-Maximum length of field name in HTTP header
-
-## http_max_field_value_size {#http_max_field_value_size}
-
-Type: UInt64
-
-Default value: 131072
-
-Maximum length of field value in HTTP header
-
-## http_max_fields {#http_max_fields}
-
-Type: UInt64
-
-Default value: 1000000
-
-Maximum number of fields in HTTP header
-
-## http_max_multipart_form_data_size {#http_max_multipart_form_data_size}
-
-Type: UInt64
-
-Default value: 1073741824
-
-Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in a user profile. Note that content is parsed and external tables are created in memory before the start of query execution. And this is the only limit that has an effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).
-
-## http_max_request_param_data_size {#http_max_request_param_data_size}
-
-Type: UInt64
-
-Default value: 10485760
-
-Limit on size of request data used as a query parameter in predefined HTTP requests.
-
-## http_max_tries {#http_max_tries}
-
-Type: UInt64
-
-Default value: 10
-
-Max attempts to read via http.
-
-## http_max_uri_size {#http_max_uri_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-Sets the maximum URI length of an HTTP request.
-
-Possible values:
-
-- Positive integer.
-
-## http_native_compression_disable_checksumming_on_decompress {#http_native_compression_disable_checksumming_on_decompress}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`).
-
-For more information, read the [HTTP interface description](../../interfaces/http.md).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## http_receive_timeout {#http_receive_timeout}
-
-Type: Seconds
-
-Default value: 30
-
-HTTP receive timeout (in seconds).
-
-Possible values:
-
-- Any positive integer.
-- 0 - Disabled (infinite timeout).
-
-## http_response_buffer_size {#http_response_buffer_size}
-
-Type: UInt64
-
-Default value: 0
-
-The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).
-
-## http_retry_initial_backoff_ms {#http_retry_initial_backoff_ms}
-
-Type: UInt64
-
-Default value: 100
-
-Min milliseconds for backoff, when retrying read via http
-
-## http_retry_max_backoff_ms {#http_retry_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 10000
-
-Max milliseconds for backoff, when retrying read via http
-
-## http_send_timeout {#http_send_timeout}
-
-Type: Seconds
-
-Default value: 30
-
-HTTP send timeout (in seconds).
-
-Possible values:
-
-- Any positive integer.
-- 0 - Disabled (infinite timeout).
-
-:::note
-It's applicable only to the default profile. A server reboot is required for the changes to take effect.
-:::
-
-## http_skip_not_found_url_for_globs {#http_skip_not_found_url_for_globs}
-
-Type: Bool
-
-Default value: 1
-
-Skip URLs for globs with HTTP_NOT_FOUND error
-
-## http_wait_end_of_query {#http_wait_end_of_query}
-
-Type: Bool
-
-Default value: 0
-
-Enable HTTP response buffering on the server-side.
-
-## http_write_exception_in_output_format {#http_write_exception_in_output_format}
-
-Type: Bool
-
-Default value: 1
-
-Write exception in output format to produce valid output. Works with JSON and XML formats.
-
-## http_zlib_compression_level {#http_zlib_compression_level}
-
-Type: Int64
-
-Default value: 3
-
-Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression).
-
-Possible values: Numbers from 1 to 9.
-
-## iceberg_engine_ignore_schema_evolution {#iceberg_engine_ignore_schema_evolution}
-
-Type: Bool
-
-Default value: 0
-
-Allow to ignore schema evolution in Iceberg table engine and read all data using schema specified by the user on table creation or latest schema parsed from metadata on table creation.
-
-:::note
-Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema.
-:::
-
-## idle_connection_timeout {#idle_connection_timeout}
-
-Type: UInt64
-
-Default value: 3600
-
-Timeout to close idle TCP connections after specified number of seconds.
-
-Possible values:
-
-- Positive integer (0 - close immediately, after 0 seconds).
-
-## ignore_cold_parts_seconds {#ignore_cold_parts_seconds}
-
-Type: Int64
-
-Default value: 0
-
-Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.
-
-## ignore_data_skipping_indices {#ignore_data_skipping_indices}
-
-Type: String
-
-Default value:
-
-Ignores the skipping indexes specified if used by the query.
-
-Consider the following example:
-
-```sql
-CREATE TABLE data
-(
-    key Int,
-    x Int,
-    y Int,
-    INDEX x_idx x TYPE minmax GRANULARITY 1,
-    INDEX y_idx y TYPE minmax GRANULARITY 1,
-    INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
-)
-Engine=MergeTree()
-ORDER BY key;
-
-INSERT INTO data VALUES (1, 2, 3);
-
-SELECT * FROM data;
-SELECT * FROM data SETTINGS ignore_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error.
-SELECT * FROM data SETTINGS ignore_data_skipping_indices='x_idx'; -- Ok.
-SELECT * FROM data SETTINGS ignore_data_skipping_indices='na_idx'; -- Ok.
-
-SELECT * FROM data WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- query will produce INDEX_NOT_USED error, since xy_idx is explicitly ignored.
-SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
-```
-
-The query without ignoring any indexes:
-```sql
-EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2;
-
-Expression ((Projection + Before ORDER BY))
-  Filter (WHERE)
-    ReadFromMergeTree (default.data)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 1/1
-      Skip
-        Name: x_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/1
-        Granules: 0/1
-      Skip
-        Name: y_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
-      Skip
-        Name: xy_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
-```
-
-Ignoring the `xy_idx` index:
-```sql
-EXPLAIN indexes = 1 SELECT * FROM data WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
-
-Expression ((Projection + Before ORDER BY))
-  Filter (WHERE)
-    ReadFromMergeTree (default.data)
-    Indexes:
-      PrimaryKey
-        Condition: true
-        Parts: 1/1
-        Granules: 1/1
-      Skip
-        Name: x_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/1
-        Granules: 0/1
-      Skip
-        Name: y_idx
-        Description: minmax GRANULARITY 1
-        Parts: 0/0
-        Granules: 0/0
-```
-
-Works with tables in the MergeTree family.
-
-## ignore_drop_queries_probability {#ignore_drop_queries_probability}
-
-Type: Float
-
-Default value: 0
-
-If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes
-
-## ignore_materialized_views_with_dropped_target_table {#ignore_materialized_views_with_dropped_target_table}
-
-Type: Bool
-
-Default value: 0
-
-Ignore MVs with dropped target table during pushing to views
-
-## ignore_on_cluster_for_replicated_access_entities_queries {#ignore_on_cluster_for_replicated_access_entities_queries}
-
-Type: Bool
-
-Default value: 0
-
-Ignore ON CLUSTER clause for replicated access entities management queries.
-
-## ignore_on_cluster_for_replicated_named_collections_queries {#ignore_on_cluster_for_replicated_named_collections_queries}
-
-Type: Bool
-
-Default value: 0
-
-Ignore ON CLUSTER clause for replicated named collections management queries.
-
-## ignore_on_cluster_for_replicated_udf_queries {#ignore_on_cluster_for_replicated_udf_queries}
-
-Type: Bool
-
-Default value: 0
-
-Ignore ON CLUSTER clause for replicated UDF management queries.
-
-## implicit_transaction {#implicit_transaction}
-
-Type: Bool
-
-Default value: 0
-
-If enabled and not already inside a transaction, wraps the query inside a full transaction (begin + commit or rollback)
-
-## input_format_parallel_parsing {#input_format_parallel_parsing}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats.
-
-Possible values:
-
-- 1 — Enabled.
-- 0 — Disabled.
-
-## insert_allow_materialized_columns {#insert_allow_materialized_columns}
-
-Type: Bool
-
-Default value: 0
-
-If setting is enabled, Allow materialized columns in INSERT.
-
-## insert_deduplicate {#insert_deduplicate}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables block deduplication of `INSERT` (for Replicated\* tables).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
-For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
-For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
-
-## insert_deduplication_token {#insert_deduplication_token}
-
-Type: String
-
-Default value:
-
-The setting allows a user to provide own deduplication semantic in MergeTree/ReplicatedMergeTree
-For example, by providing a unique value for the setting in each INSERT statement,
-user can avoid the same inserted data being deduplicated.
-
-Possible values:
-
-- Any string
-
-`insert_deduplication_token` is used for deduplication _only_ when not empty.
-
-For the replicated tables by default the only 100 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
-For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
-
-:::note
-`insert_deduplication_token` works on a partition level (the same as `insert_deduplication` checksum). Multiple partitions can have the same `insert_deduplication_token`.
-:::
-
-Example:
-
-```sql
-CREATE TABLE test_table
-( A Int64 )
-ENGINE = MergeTree
-ORDER BY A
-SETTINGS non_replicated_deduplication_window = 100;
-
-INSERT INTO test_table SETTINGS insert_deduplication_token = 'test' VALUES (1);
-
--- the next insert won't be deduplicated because insert_deduplication_token is different
-INSERT INTO test_table SETTINGS insert_deduplication_token = 'test1' VALUES (1);
-
--- the next insert will be deduplicated because insert_deduplication_token
--- is the same as one of the previous
-INSERT INTO test_table SETTINGS insert_deduplication_token = 'test' VALUES (2);
-
-SELECT * FROM test_table
-
-┌─A─┐
-│ 1 │
-└───┘
-┌─A─┐
-│ 1 │
-└───┘
-```
-
-## insert_keeper_fault_injection_probability {#insert_keeper_fault_injection_probability}
-
-Type: Float
-
-Default value: 0
-
-Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]
-
-## insert_keeper_fault_injection_seed {#insert_keeper_fault_injection_seed}
-
-Type: UInt64
-
-Default value: 0
-
-0 - random seed, otherwise the setting value
-
-## insert_keeper_max_retries {#insert_keeper_max_retries}
-
-Type: UInt64
-
-Default value: 20
-
-The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries.
-
-Possible values:
-
-- Positive integer.
-- 0 — Retries are disabled
-
-Cloud default value: `20`.
-
-Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
-The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
-```
-timeout = min(insert_keeper_retry_max_backoff_ms, latest_timeout * 2)
-```
-
-For example, if `insert_keeper_retry_initial_backoff_ms=100`, `insert_keeper_retry_max_backoff_ms=10000` and `insert_keeper_max_retries=8` then timeouts will be `100, 200, 400, 800, 1600, 3200, 6400, 10000`.
-
-Apart from fault tolerance, the retries aim to provide a better user experience - they allow to avoid returning an error during INSERT execution if Keeper is restarted, for example, due to an upgrade.
-
-## insert_keeper_retry_initial_backoff_ms {#insert_keeper_retry_initial_backoff_ms}
-
-Type: UInt64
-
-Default value: 100
-
-Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT query execution
-
-Possible values:
-
-- Positive integer.
-- 0 — No timeout
-
-## insert_keeper_retry_max_backoff_ms {#insert_keeper_retry_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 10000
-
-Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT query execution
-
-Possible values:
-
-- Positive integer.
-- 0 — Maximum timeout is not limited
-
-## insert_null_as_default {#insert_null_as_default}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type.
-If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
-
-This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#inserting-the-results-of-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
-
-Possible values:
-
-- 0 — Inserting `NULL` into a not nullable column causes an exception.
-- 1 — Default column value is inserted instead of `NULL`.
-
-## insert_quorum {#insert_quorum}
-
-Type: UInt64Auto
-
-Default value: 0
-
-:::note
-This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information.
-:::
-
-Enables the quorum writes.
-
-- If `insert_quorum < 2`, the quorum writes are disabled.
-- If `insert_quorum >= 2`, the quorum writes are enabled.
-- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number.
-
-Quorum writes
-
-`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.
-
-When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#select_sequential_consistency).
-
-ClickHouse generates an exception:
-
-- If the number of available replicas at the time of the query is less than the `insert_quorum`.
-- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed.
-
-See also:
-
-- [insert_quorum_timeout](#insert_quorum_timeout)
-- [insert_quorum_parallel](#insert_quorum_parallel)
-- [select_sequential_consistency](#select_sequential_consistency)
-
-## insert_quorum_parallel {#insert_quorum_parallel}
-
-Type: Bool
-
-Default value: 1
-
-:::note
-This setting is not applicable to SharedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information.
-:::
-
-Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-See also:
-
-- [insert_quorum](#insert_quorum)
-- [insert_quorum_timeout](#insert_quorum_timeout)
-- [select_sequential_consistency](#select_sequential_consistency)
-
-## insert_quorum_timeout {#insert_quorum_timeout}
-
-Type: Milliseconds
-
-Default value: 600000
-
-Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
-
-See also:
-
-- [insert_quorum](#insert_quorum)
-- [insert_quorum_parallel](#insert_quorum_parallel)
-- [select_sequential_consistency](#select_sequential_consistency)
-
-## insert_shard_id {#insert_shard_id}
-
-Type: UInt64
-
-Default value: 0
-
-If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table into which the data will be inserted synchronously.
-
-If `insert_shard_id` value is incorrect, the server will throw an exception.
-
-To get the number of shards on `requested_cluster`, you can check server config or use this query:
-
-``` sql
-SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
-```
-
-Possible values:
-
-- 0 — Disabled.
-- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table.
-
-**Example**
-
-Query:
-
-```sql
-CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number;
-CREATE TABLE x_dist AS x ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), x);
-INSERT INTO x_dist SELECT * FROM numbers(5) SETTINGS insert_shard_id = 1;
-SELECT * FROM x_dist ORDER BY number ASC;
-```
-
-Result:
-
-``` text
-┌─number─┐
-│      0 │
-│      0 │
-│      1 │
-│      1 │
-│      2 │
-│      2 │
-│      3 │
-│      3 │
-│      4 │
-│      4 │
-└────────┘
-```
-
-## interactive_delay {#interactive_delay}
-
-Type: UInt64
-
-Default value: 100000
-
-The interval in microseconds for checking whether request execution has been canceled and sending the progress.
-
-## intersect_default_mode {#intersect_default_mode}
-
-Type: SetOperationMode
-
-Default value: ALL
-
-Set default mode in INTERSECT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception.
-
-## join_algorithm {#join_algorithm}
-
-Type: JoinAlgorithm
-
-Default value: default
-
-Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.
-
-Several algorithms can be specified, and an available one would be chosen for a particular query based on kind/strictness and table engine.
-
-Possible values:
-
-- default
-
- This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`)
-
-- grace_hash
-
- [Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used.  Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.
-
- The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
-
- Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
-
-- hash
-
- [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
-
-- parallel_hash
-
- A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process.
-
- When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM.
-
-- partial_merge
-
- A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted.
-
- The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported).
-
- When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks.
-
-- direct
-
- This algorithm can be applied when the storage for the right table supports key-value requests.
-
- The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
-
-- auto
-
- When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated.
-
-- full_sorting_merge
-
- [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining.
-
-- prefer_partial_merge
-
- ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
-
-## join_any_take_last_row {#join_any_take_last_row}
-
-Type: Bool
-
-Default value: 0
-
-Changes the behaviour of join operations with `ANY` strictness.
-
-:::note
-This setting applies only for `JOIN` operations with [Join](../../engines/table-engines/special/join.md) engine tables.
-:::
-
-Possible values:
-
-- 0 — If the right table has more than one matching row, only the first one found is joined.
-- 1 — If the right table has more than one matching row, only the last one found is joined.
-
-See also:
-
-- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join)
-- [Join table engine](../../engines/table-engines/special/join.md)
-- [join_default_strictness](#join_default_strictness)
-
-## join_default_strictness {#join_default_strictness}
-
-Type: JoinStrictness
-
-Default value: ALL
-
-Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join).
-
-Possible values:
-
-- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL.
-- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same.
-- `ASOF` — For joining sequences with an uncertain match.
-- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception.
-
-## join_on_disk_max_files_to_merge {#join_on_disk_max_files_to_merge}
-
-Type: UInt64
-
-Default value: 64
-
-Limits the number of files allowed for parallel sorting in MergeJoin operations when they are executed on disk.
-
-The bigger the value of the setting, the more RAM is used and the less disk I/O is needed.
-
-Possible values:
-
-- Any positive integer, starting from 2.
-
-## join_output_by_rowlist_perkey_rows_threshold {#join_output_by_rowlist_perkey_rows_threshold}
-
-Type: UInt64
-
-Default value: 5
-
-The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join.
-
-## join_overflow_mode {#join_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## join_to_sort_maximum_table_rows {#join_to_sort_maximum_table_rows}
-
-Type: UInt64
-
-Default value: 10000
-
-The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join.
-
-## join_to_sort_minimum_perkey_rows {#join_to_sort_minimum_perkey_rows}
-
-Type: UInt64
-
-Default value: 40
-
-The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys
-
-## join_use_nulls {#join_use_nulls}
-
-Type: Bool
-
-Default value: 0
-
-Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour. When merging tables, empty cells may appear. ClickHouse fills them differently based on this setting.
-
-Possible values:
-
-- 0 — The empty cells are filled with the default value of the corresponding field type.
-- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md).
-
-## joined_subquery_requires_alias {#joined_subquery_requires_alias}
-
-Type: Bool
-
-Default value: 1
-
-Force joined subqueries and table functions to have aliases for correct name qualification.
-
-## kafka_disable_num_consumers_limit {#kafka_disable_num_consumers_limit}
-
-Type: Bool
-
-Default value: 0
-
-Disable limit on kafka_num_consumers that depends on the number of available CPU cores.
-
-## kafka_max_wait_ms {#kafka_max_wait_ms}
-
-Type: Milliseconds
-
-Default value: 5000
-
-The wait time in milliseconds for reading messages from [Kafka](../../engines/table-engines/integrations/kafka.md/#kafka) before retry.
-
-Possible values:
-
-- Positive integer.
-- 0 — Infinite timeout.
-
-See also:
-
-- [Apache Kafka](https://kafka.apache.org/)
-
-## keeper_map_strict_mode {#keeper_map_strict_mode}
-
-Type: Bool
-
-Default value: 0
-
-Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key
-
-## keeper_max_retries {#keeper_max_retries}
-
-Type: UInt64
-
-Default value: 10
-
-Max retries for general keeper operations
-
-## keeper_retry_initial_backoff_ms {#keeper_retry_initial_backoff_ms}
-
-Type: UInt64
-
-Default value: 100
-
-Initial backoff timeout for general keeper operations
-
-## keeper_retry_max_backoff_ms {#keeper_retry_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 5000
-
-Max backoff timeout for general keeper operations
-
-## legacy_column_name_of_tuple_literal {#legacy_column_name_of_tuple_literal}
-
-Type: Bool
-
-Default value: 0
-
-List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.
-
-## lightweight_deletes_sync {#lightweight_deletes_sync}
-
-Type: UInt64
-
-Default value: 2
-
-The same as [`mutations_sync`](#mutations_sync), but controls only execution of lightweight deletes.
-
-Possible values:
-
-- 0 - Mutations execute asynchronously.
-- 1 - The query waits for the lightweight deletes to complete on the current server.
-- 2 - The query waits for the lightweight deletes to complete on all replicas (if they exist).
-
-**See Also**
-
-- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
-- [Mutations](../../sql-reference/statements/alter/index.md#mutations)
-
-## limit {#limit}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md/#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting.
-
-Possible values:
-
-- 0 — The number of rows is not limited.
-- Positive integer.
-
-## live_view_heartbeat_interval {#live_view_heartbeat_interval}
-
-Type: Seconds
-
-Default value: 15
-
-The heartbeat interval in seconds to indicate live query is alive.
-
-## load_balancing {#load_balancing}
-
-Type: LoadBalancing
-
-Default value: random
-
-Specifies the algorithm of replicas selection that is used for distributed query processing.
-
-ClickHouse supports the following algorithms of choosing replicas:
-
-- [Random](#load_balancing-random) (by default)
-- [Nearest hostname](#load_balancing-nearest_hostname)
-- [Hostname levenshtein distance](#load_balancing-hostname_levenshtein_distance)
-- [In order](#load_balancing-in_order)
-- [First or random](#load_balancing-first_or_random)
-- [Round robin](#load_balancing-round_robin)
-
-See also:
-
-- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
-
-### Random (by Default) {#load_balancing-random}
-
-``` sql
-load_balancing = random
-```
-
-The number of errors is counted for each replica. The query is sent to the replica with the fewest errors, and if there are several of these, to anyone of them.
-Disadvantages: Server proximity is not accounted for; if the replicas have different data, you will also get different data.
-
-### Nearest Hostname {#load_balancing-nearest_hostname}
-
-``` sql
-load_balancing = nearest_hostname
-```
-
-The number of errors is counted for each replica. Every 5 minutes, the number of errors is integrally divided by 2. Thus, the number of errors is calculated for a recent time with exponential smoothing. If there is one replica with a minimal number of errors (i.e. errors occurred recently on the other replicas), the query is sent to it. If there are multiple replicas with the same minimal number of errors, the query is sent to the replica with a hostname that is most similar to the server’s hostname in the config file (for the number of different characters in identical positions, up to the minimum length of both hostnames).
-
-For instance, example01-01-1 and example01-01-2 are different in one position, while example01-01-1 and example01-02-2 differ in two places.
-This method might seem primitive, but it does not require external data about network topology, and it does not compare IP addresses, which would be complicated for our IPv6 addresses.
-
-Thus, if there are equivalent replicas, the closest one by name is preferred.
-We can also assume that when sending a query to the same server, in the absence of failures, a distributed query will also go to the same servers. So even if different data is placed on the replicas, the query will return mostly the same results.
-
-### Hostname levenshtein distance {#load_balancing-hostname_levenshtein_distance}
-
-``` sql
-load_balancing = hostname_levenshtein_distance
-```
-
-Just like `nearest_hostname`, but it compares hostname in a [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) manner. For example:
-
-``` text
-example-clickhouse-0-0 ample-clickhouse-0-0
-1
-
-example-clickhouse-0-0 example-clickhouse-1-10
-2
-
-example-clickhouse-0-0 example-clickhouse-12-0
-3
-```
-
-### In Order {#load_balancing-in_order}
-
-``` sql
-load_balancing = in_order
-```
-
-Replicas with the same number of errors are accessed in the same order as they are specified in the configuration.
-This method is appropriate when you know exactly which replica is preferable.
-
-### First or Random {#load_balancing-first_or_random}
-
-``` sql
-load_balancing = first_or_random
-```
-
-This algorithm chooses the first replica in the set or a random replica if the first is unavailable. It’s effective in cross-replication topology setups, but useless in other configurations.
-
-The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available.
-
-It's possible to explicitly define what the first replica is by using the setting `load_balancing_first_offset`. This gives more control to rebalance query workloads among replicas.
-
-### Round Robin {#load_balancing-round_robin}
-
-``` sql
-load_balancing = round_robin
-```
-
-This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
-
-## load_balancing_first_offset {#load_balancing_first_offset}
-
-Type: UInt64
-
-Default value: 0
-
-Which replica to preferably send a query when FIRST_OR_RANDOM load balancing strategy is used.
-
-## load_marks_asynchronously {#load_marks_asynchronously}
-
-Type: Bool
-
-Default value: 0
-
-Load MergeTree marks asynchronously
-
-## local_filesystem_read_method {#local_filesystem_read_method}
-
-Type: String
-
-Default value: pread_threadpool
-
-Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.
-
-## local_filesystem_read_prefetch {#local_filesystem_read_prefetch}
-
-Type: Bool
-
-Default value: 0
-
-Should use prefetching when reading data from local filesystem.
-
-## lock_acquire_timeout {#lock_acquire_timeout}
-
-Type: Seconds
-
-Default value: 120
-
-Defines how many seconds a locking request waits before failing.
-
-Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.
-
-Possible values:
-
-- Positive integer (in seconds).
-- 0 — No locking timeout.
-
-## log_comment {#log_comment}
-
-Type: String
-
-Default value:
-
-Specifies the value for the `log_comment` field of the [system.query_log](../system-tables/query_log.md) table and comment text for the server log.
-
-It can be used to improve the readability of server logs. Additionally, it helps to select queries related to the test from the `system.query_log` after running [clickhouse-test](../../development/tests.md).
-
-Possible values:
-
-- Any string no longer than [max_query_size](#max_query_size). If the max_query_size is exceeded, the server throws an exception.
-
-**Example**
-
-Query:
-
-``` sql
-SET log_comment = 'log_comment test', log_queries = 1;
-SELECT 1;
-SYSTEM FLUSH LOGS;
-SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 2;
-```
-
-Result:
-
-``` text
-┌─type────────┬─query─────┐
-│ QueryStart  │ SELECT 1; │
-│ QueryFinish │ SELECT 1; │
-└─────────────┴───────────┘
-```
-
-## log_formatted_queries {#log_formatted_queries}
-
-Type: Bool
-
-Default value: 0
-
-Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
-
-Possible values:
-
-- 0 — Formatted queries are not logged in the system table.
-- 1 — Formatted queries are logged in the system table.
-
-## log_processors_profiles {#log_processors_profiles}
-
-Type: Bool
-
-Default value: 1
-
-Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table.
-
-See also:
-
-- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md)
-- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
-
-## log_profile_events {#log_profile_events}
-
-Type: Bool
-
-Default value: 1
-
-Log query performance statistics into the query_log, query_thread_log and query_views_log.
-
-## log_queries {#log_queries}
-
-Type: Bool
-
-Default value: 1
-
-Setting up query logging.
-
-Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md/#query-log) server configuration parameter.
-
-Example:
-
-``` text
-log_queries=1
-```
-
-## log_queries_cut_to_length {#log_queries_cut_to_length}
-
-Type: UInt64
-
-Default value: 100000
-
-If query length is greater than a specified threshold (in bytes), then cut query when writing to query log. Also limit the length of printed query in ordinary text log.
-
-## log_queries_min_query_duration_ms {#log_queries_min_query_duration_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
-
-- `system.query_log`
-- `system.query_thread_log`
-
-Only the queries with the following type will get to the log:
-
-- `QUERY_FINISH`
-- `EXCEPTION_WHILE_PROCESSING`
-
-- Type: milliseconds
-- Default value: 0 (any query)
-
-## log_queries_min_type {#log_queries_min_type}
-
-Type: LogQueriesType
-
-Default value: QUERY_START
-
-`query_log` minimal type to log.
-
-Possible values:
-- `QUERY_START` (`=1`)
-- `QUERY_FINISH` (`=2`)
-- `EXCEPTION_BEFORE_START` (`=3`)
-- `EXCEPTION_WHILE_PROCESSING` (`=4`)
-
-Can be used to limit which entities will go to `query_log`, say you are interested only in errors, then you can use `EXCEPTION_WHILE_PROCESSING`:
-
-``` text
-log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
-```
-
-## log_queries_probability {#log_queries_probability}
-
-Type: Float
-
-Default value: 1
-
-Allows a user to write to [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), and [query_views_log](../../operations/system-tables/query_views_log.md) system tables only a sample of queries selected randomly with the specified probability. It helps to reduce the load with a large volume of queries in a second.
-
-Possible values:
-
-- 0 — Queries are not logged in the system tables.
-- Positive floating-point number in the range [0..1]. For example, if the setting value is `0.5`, about half of the queries are logged in the system tables.
-- 1 — All queries are logged in the system tables.
-
-## log_query_settings {#log_query_settings}
-
-Type: Bool
-
-Default value: 1
-
-Log query settings into the query_log.
-
-## log_query_threads {#log_query_threads}
-
-Type: Bool
-
-Default value: 0
-
-Setting up query threads logging.
-
-Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#query_thread_log) server configuration parameter.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-**Example**
-
-``` text
-log_query_threads=1
-```
-
-## log_query_views {#log_query_views}
-
-Type: Bool
-
-Default value: 1
-
-Setting up query views logging.
-
-When a query run by ClickHouse with this setting enabled has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#query_views_log) server configuration parameter.
-
-Example:
-
-``` text
-log_query_views=1
-```
-
-## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format}
-
-Type: Bool
-
-Default value: 1
-
-Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md/#native) format.
-
-If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries.
-
-This setting is required mainly for third-party clients which do not support `LowCardinality` data type.
-
-Possible values:
-
-- 1 — Usage of `LowCardinality` is not restricted.
-- 0 — Usage of `LowCardinality` is restricted.
-
-## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size}
-
-Type: UInt64
-
-Default value: 8192
-
-Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can’t be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
-
-Possible values:
-
-- Any positive integer.
-
-## low_cardinality_use_single_dictionary_for_part {#low_cardinality_use_single_dictionary_for_part}
-
-Type: Bool
-
-Default value: 0
-
-Turns on or turns off using of single dictionary for the data part.
-
-By default, the ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`.
-
-Possible values:
-
-- 1 — Creating several dictionaries for the data part is prohibited.
-- 0 — Creating several dictionaries for the data part is not prohibited.
-
-## materialize_skip_indexes_on_insert {#materialize_skip_indexes_on_insert}
-
-Type: Bool
-
-Default value: 1
-
-If true skip indexes are calculated on inserts, otherwise skip indexes will be calculated only during merges
-
-## materialize_statistics_on_insert {#materialize_statistics_on_insert}
-
-Type: Bool
-
-Default value: 1
-
-If true statistics are calculated on inserts, otherwise statistics will be calculated only during merges
-
-## materialize_ttl_after_modify {#materialize_ttl_after_modify}
-
-Type: Bool
-
-Default value: 1
-
-Apply TTL for old data, after ALTER MODIFY TTL query
-
-## materialized_views_ignore_errors {#materialized_views_ignore_errors}
-
-Type: Bool
-
-Default value: 0
-
-Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs
-
-## max_analyze_depth {#max_analyze_depth}
-
-Type: UInt64
-
-Default value: 5000
-
-Maximum number of analyses performed by interpreter.
-
-## max_ast_depth {#max_ast_depth}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum depth of query syntax tree. Checked after parsing.
-
-## max_ast_elements {#max_ast_elements}
-
-Type: UInt64
-
-Default value: 50000
-
-Maximum size of query syntax tree in number of nodes. Checked after parsing.
-
-## max_backup_bandwidth {#max_backup_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.
-
-## max_block_size {#max_block_size}
-
-Type: UInt64
-
-Default value: 65409
-
-In ClickHouse, data is processed by blocks, which are sets of column parts. The internal processing cycles for a single block are efficient but there are noticeable costs when processing each block.
-
-The `max_block_size` setting indicates the recommended maximum number of rows to include in a single block when loading data from tables. Blocks the size of `max_block_size` are not always loaded from the table: if ClickHouse determines that less data needs to be retrieved, a smaller block is processed.
-
-The block size should not be too small to avoid noticeable costs when processing each block. It should also not be too large to ensure that queries with a LIMIT clause execute quickly after processing the first block. When setting `max_block_size`, the goal should be to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality.
-
-## max_bytes_before_external_group_by {#max_bytes_before_external_group_by}
-
-Type: UInt64
-
-Default value: 0
-
-If memory usage during GROUP BY operation is exceeding this threshold in bytes, activate the 'external aggregation' mode (spill data to disk). Recommended value is half of the available system memory.
-
-## max_bytes_before_external_sort {#max_bytes_before_external_sort}
-
-Type: UInt64
-
-Default value: 0
-
-If memory usage during ORDER BY operation is exceeding this threshold in bytes, activate the 'external sorting' mode (spill data to disk). Recommended value is half of the available system memory.
-
-## max_bytes_before_remerge_sort {#max_bytes_before_remerge_sort}
-
-Type: UInt64
-
-Default value: 1000000000
-
-In case of ORDER BY with LIMIT, when memory usage is higher than specified threshold, perform additional steps of merging blocks before final merge to keep just top LIMIT rows.
-
-## max_bytes_in_distinct {#max_bytes_in_distinct}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum total size of the state (in uncompressed bytes) in memory for the execution of DISTINCT.
-
-## max_bytes_in_join {#max_bytes_in_join}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size of the hash table for JOIN (in number of bytes in memory).
-
-## max_bytes_in_set {#max_bytes_in_set}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size of the set (in bytes in memory) resulting from the execution of the IN section.
-
-## max_bytes_to_read {#max_bytes_to_read}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on read bytes (after decompression) from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.
-
-## max_bytes_to_read_leaf {#max_bytes_to_read_leaf}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on read bytes (after decompression) on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.
-
-## max_bytes_to_sort {#max_bytes_to_sort}
-
-Type: UInt64
-
-Default value: 0
-
-If more than the specified amount of (uncompressed) bytes have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception
-
-## max_bytes_to_transfer {#max_bytes_to_transfer}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.
-
-## max_columns_to_read {#max_columns_to_read}
-
-Type: UInt64
-
-Default value: 0
-
-If a query requires reading more than specified number of columns, exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries.
-
-## max_compress_block_size {#max_compress_block_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying a smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
-
-:::note
-This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
-:::
-
-Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table).
-
-## max_concurrent_queries_for_all_users {#max_concurrent_queries_for_all_users}
-
-Type: UInt64
-
-Default value: 0
-
-Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries.
-
-Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded.
-
-Modifying the setting for one query or user does not affect other queries.
-
-Possible values:
-
-- Positive integer.
-- 0 — No limit.
-
-**Example**
-
-``` xml
-<max_concurrent_queries_for_all_users>99</max_concurrent_queries_for_all_users>
-```
-
-**See Also**
-
-- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries)
-
-## max_concurrent_queries_for_user {#max_concurrent_queries_for_user}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum number of simultaneously processed queries per user.
-
-Possible values:
-
-- Positive integer.
-- 0 — No limit.
-
-**Example**
-
-``` xml
-<max_concurrent_queries_for_user>5</max_concurrent_queries_for_user>
-```
-
-## max_distributed_connections {#max_distributed_connections}
-
-Type: UInt64
-
-Default value: 1024
-
-The maximum number of simultaneous connections with remote servers for distributed processing of a single query to a single Distributed table. We recommend setting a value no less than the number of servers in the cluster.
-
-The following parameters are only used when creating Distributed tables (and when launching a server), so there is no reason to change them at runtime.
-
-## max_distributed_depth {#max_distributed_depth}
-
-Type: UInt64
-
-Default value: 5
-
-Limits the maximum depth of recursive queries for [Distributed](../../engines/table-engines/special/distributed.md) tables.
-
-If the value is exceeded, the server throws an exception.
-
-Possible values:
-
-- Positive integer.
-- 0 — Unlimited depth.
-
-## max_download_buffer_size {#max_download_buffer_size}
-
-Type: UInt64
-
-Default value: 10485760
-
-The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.
-
-## max_download_threads {#max_download_threads}
-
-Type: MaxThreads
-
-Default value: 4
-
-The maximum number of threads to download data (e.g. for URL engine).
-
-## max_estimated_execution_time {#max_estimated_execution_time}
-
-Type: Seconds
-
-Default value: 0
-
-Maximum query estimate execution time in seconds.
-
-## max_execution_speed {#max_execution_speed}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum number of execution rows per second.
-
-## max_execution_speed_bytes {#max_execution_speed_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum number of execution bytes per second.
-
-## max_execution_time {#max_execution_time}
-
-Type: Seconds
-
-Default value: 0
-
-If query runtime exceeds the specified number of seconds, the behavior will be determined by the 'timeout_overflow_mode', which by default is - throw an exception. Note that the timeout is checked and the query can stop only in designated places during data processing. It currently cannot stop during merging of aggregation states or during query analysis, and the actual run time will be higher than the value of this setting.
-
-## max_execution_time_leaf {#max_execution_time_leaf}
-
-Type: Seconds
-
-Default value: 0
-
-Similar semantic to max_execution_time but only apply on leaf node for distributed queries, the time out behavior will be determined by 'timeout_overflow_mode_leaf' which by default is - throw an exception
-
-## max_expanded_ast_elements {#max_expanded_ast_elements}
-
-Type: UInt64
-
-Default value: 500000
-
-Maximum size of query syntax tree in number of nodes after expansion of aliases and the asterisk.
-
-## max_fetch_partition_retries_count {#max_fetch_partition_retries_count}
-
-Type: UInt64
-
-Default value: 5
-
-Amount of retries while fetching partition from another host.
-
-## max_final_threads {#max_final_threads}
-
-Type: MaxThreads
-
-Default value: 'auto(16)'
-
-Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
-
-Possible values:
-
-- Positive integer.
-- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread.
-
-## max_http_get_redirects {#max_http_get_redirects}
-
-Type: UInt64
-
-Default value: 0
-
-Max number of HTTP GET redirects hops allowed. Ensures additional security measures are in place to prevent a malicious server from redirecting your requests to unexpected services.\n\nIt is the case when an external server redirects to another address, but that address appears to be internal to the company's infrastructure, and by sending an HTTP request to an internal server, you could request an internal API from the internal network, bypassing the auth, or even query other services, such as Redis or Memcached. When you don't have an internal infrastructure (including something running on your localhost), or you trust the server, it is safe to allow redirects. Although keep in mind, that if the URL uses HTTP instead of HTTPS, and you will have to trust not only the remote server but also your ISP and every network in the middle.
-
-## max_hyperscan_regexp_length {#max_hyperscan_regexp_length}
-
-Type: UInt64
-
-Default value: 0
-
-Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn).
-
-Possible values:
-
-- Positive integer.
-- 0 - The length is not limited.
-
-**Example**
-
-Query:
-
-```sql
-SELECT multiMatchAny('abcd', ['ab','bcd','c','d']) SETTINGS max_hyperscan_regexp_length = 3;
-```
-
-Result:
-
-```text
-┌─multiMatchAny('abcd', ['ab', 'bcd', 'c', 'd'])─┐
-│                                              1 │
-└────────────────────────────────────────────────┘
-```
-
-Query:
-
-```sql
-SELECT multiMatchAny('abcd', ['ab','bcd','c','d']) SETTINGS max_hyperscan_regexp_length = 2;
-```
-
-Result:
-
-```text
-Exception: Regexp length too large.
-```
-
-**See Also**
-
-- [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length)
-
-## max_hyperscan_regexp_total_length {#max_hyperscan_regexp_total_length}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn).
-
-Possible values:
-
-- Positive integer.
-- 0 - The length is not limited.
-
-**Example**
-
-Query:
-
-```sql
-SELECT multiMatchAny('abcd', ['a','b','c','d']) SETTINGS max_hyperscan_regexp_total_length = 5;
-```
-
-Result:
-
-```text
-┌─multiMatchAny('abcd', ['a', 'b', 'c', 'd'])─┐
-│                                           1 │
-└─────────────────────────────────────────────┘
-```
-
-Query:
-
-```sql
-SELECT multiMatchAny('abcd', ['ab','bc','c','d']) SETTINGS max_hyperscan_regexp_total_length = 5;
-```
-
-Result:
-
-```text
-Exception: Total regexp lengths too large.
-```
-
-**See Also**
-
-- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length)
-
-## max_insert_block_size {#max_insert_block_size}
-
-Type: UInt64
-
-Default value: 1048449
-
-The size of blocks (in a count of rows) to form for insertion into a table.
-This setting only applies in cases when the server forms the blocks.
-For example, for an INSERT via the HTTP interface, the server parses the data format and forms blocks of the specified size.
-But when using clickhouse-client, the client parses the data itself, and the ‘max_insert_block_size’ setting on the server does not affect the size of the inserted blocks.
-The setting also does not have a purpose when using INSERT SELECT, since data is inserted using the same blocks that are formed after SELECT.
-
-The default is slightly more than `max_block_size`. The reason for this is that certain table engines (`*MergeTree`) form a data part on the disk for each inserted block, which is a fairly large entity. Similarly, `*MergeTree` tables sort data during insertion, and a large enough block size allow sorting more data in RAM.
-
-## max_insert_delayed_streams_for_parallel_write {#max_insert_delayed_streams_for_parallel_write}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)
-
-## max_insert_threads {#max_insert_threads}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum number of threads to execute the `INSERT SELECT` query.
-
-Possible values:
-
-- 0 (or 1) — `INSERT SELECT` no parallel execution.
-- Positive integer. Bigger than 1.
-
-Cloud default value: from `2` to `4`, depending on the service size.
-
-Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
-Higher values will lead to higher memory usage.
-
-## max_joined_block_size_rows {#max_joined_block_size_rows}
-
-Type: UInt64
-
-Default value: 65409
-
-Maximum block size for JOIN result (if join algorithm supports it). 0 means unlimited.
-
-## max_limit_for_ann_queries {#max_limit_for_ann_queries}
-
-Type: UInt64
-
-Default value: 1000000
-
-SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.
-
-## max_live_view_insert_blocks_before_refresh {#max_live_view_insert_blocks_before_refresh}
-
-Type: UInt64
-
-Default value: 64
-
-Limit maximum number of inserted blocks after which mergeable blocks are dropped and query is re-executed.
-
-## max_local_read_bandwidth {#max_local_read_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum speed of local reads in bytes per second.
-
-## max_local_write_bandwidth {#max_local_write_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum speed of local writes in bytes per second.
-
-## max_memory_usage {#max_memory_usage}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum memory usage for processing of single query. Zero means unlimited.
-
-## max_memory_usage_for_user {#max_memory_usage_for_user}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.
-
-## max_network_bandwidth {#max_network_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query.
-
-Possible values:
-
-- Positive integer.
-- 0 — Bandwidth control is disabled.
-
-## max_network_bandwidth_for_all_users {#max_network_bandwidth_for_all_users}
-
-Type: UInt64
-
-Default value: 0
-
-Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server.
-
-Possible values:
-
-- Positive integer.
-- 0 — Control of the data speed is disabled.
-
-## max_network_bandwidth_for_user {#max_network_bandwidth_for_user}
-
-Type: UInt64
-
-Default value: 0
-
-Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user.
-
-Possible values:
-
-- Positive integer.
-- 0 — Control of the data speed is disabled.
-
-## max_network_bytes {#max_network_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
-
-Possible values:
-
-- Positive integer.
-- 0 — Data volume control is disabled.
-
-## max_number_of_partitions_for_independent_aggregation {#max_number_of_partitions_for_independent_aggregation}
-
-Type: UInt64
-
-Default value: 128
-
-Maximal number of partitions in table to apply optimization
-
-## max_parallel_replicas {#max_parallel_replicas}
-
-Type: NonZeroUInt64
-
-Default value: 1
-
-The maximum number of replicas for each shard when executing a query.
-
-Possible values:
-
-- Positive integer.
-
-**Additional Info**
-
-This options will produce different results depending on the settings used.
-
-:::note
-This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
-:::
-
-### Parallel processing using `SAMPLE` key
-
-A query may be processed faster if it is executed on several servers in parallel. But the query performance may degrade in the following cases:
-
-- The position of the sampling key in the partitioning key does not allow efficient range scans.
-- Adding a sampling key to the table makes filtering by other columns less efficient.
-- The sampling key is an expression that is expensive to calculate.
-- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
-
-### Parallel processing using [parallel_replicas_custom_key](#parallel_replicas_custom_key)
-
-This setting is useful for any replicated table.
-
-## max_parser_backtracks {#max_parser_backtracks}
-
-Type: UInt64
-
-Default value: 1000000
-
-Maximum parser backtracking (how many times it tries different alternatives in the recursive descend parsing process).
-
-## max_parser_depth {#max_parser_depth}
-
-Type: UInt64
-
-Default value: 1000
-
-Limits maximum recursion depth in the recursive descent parser. Allows controlling the stack size.
-
-Possible values:
-
-- Positive integer.
-- 0 — Recursion depth is unlimited.
-
-## max_parsing_threads {#max_parsing_threads}
-
-Type: MaxThreads
-
-Default value: 'auto(16)'
-
-The maximum number of threads to parse data in input formats that support parallel parsing. By default, it is determined automatically
-
-## max_partition_size_to_drop {#max_partition_size_to_drop}
-
-Type: UInt64
-
-Default value: 50000000000
-
-Restriction on dropping partitions in query time. The value 0 means that you can drop partitions without any restrictions.
-
-Cloud default value: 1 TB.
-
-:::note
-This query setting overwrites its server setting equivalent, see [max_partition_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-partition-size-to-drop)
-:::
-
-## max_partitions_per_insert_block {#max_partitions_per_insert_block}
-
-Type: UInt64
-
-Default value: 100
-
-Limit maximum number of partitions in the single INSERTed block. Zero means unlimited. Throw an exception if the block contains too many partitions. This setting is a safety threshold because using a large number of partitions is a common misconception.
-
-## max_partitions_to_read {#max_partitions_to_read}
-
-Type: Int64
-
-Default value: -1
-
-Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited.
-
-## max_parts_to_move {#max_parts_to_move}
-
-Limit the number of parts that can be moved in one query. Zero means unlimited.
-
-Default value: `1000`.
-
-## max_query_size {#max_query_size}
-
-Type: UInt64
-
-Default value: 262144
-
-The maximum number of bytes of a query string parsed by the SQL parser.
-Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
-
-:::note
-`max_query_size` cannot be set within an SQL query (e.g., `SELECT now() SETTINGS max_query_size=10000`) because ClickHouse needs to allocate a buffer to parse the query, and this buffer size is determined by the `max_query_size` setting, which must be configured before the query is executed.
-:::
-
-## max_read_buffer_size {#max_read_buffer_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-The maximum size of the buffer to read from the filesystem.
-
-## max_read_buffer_size_local_fs {#max_read_buffer_size_local_fs}
-
-Type: UInt64
-
-Default value: 131072
-
-The maximum size of the buffer to read from local filesystem. If set to 0 then max_read_buffer_size will be used.
-
-## max_read_buffer_size_remote_fs {#max_read_buffer_size_remote_fs}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum size of the buffer to read from remote filesystem. If set to 0 then max_read_buffer_size will be used.
-
-## max_recursive_cte_evaluation_depth {#max_recursive_cte_evaluation_depth}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum limit on recursive CTE evaluation depth
-
-## max_remote_read_network_bandwidth {#max_remote_read_network_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum speed of data exchange over the network in bytes per second for read.
-
-## max_remote_write_network_bandwidth {#max_remote_write_network_bandwidth}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum speed of data exchange over the network in bytes per second for write.
-
-## max_replica_delay_for_distributed_queries {#max_replica_delay_for_distributed_queries}
-
-Type: UInt64
-
-Default value: 300
-
-Disables lagging replicas for distributed queries. See [Replication](../../engines/table-engines/mergetree-family/replication.md).
-
-Sets the time in seconds. If a replica's lag is greater than or equal to the set value, this replica is not used.
-
-Possible values:
-
-- Positive integer.
-- 0 — Replica lags are not checked.
-
-To prevent the use of any replica with a non-zero lag, set this parameter to 1.
-
-Used when performing `SELECT` from a distributed table that points to replicated tables.
-
-## max_result_bytes {#max_result_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on result size in bytes (uncompressed).  The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold. Caveats: the result size in memory is taken into account for this threshold. Even if the result size is small, it can reference larger data structures in memory, representing dictionaries of LowCardinality columns, and Arenas of AggregateFunction columns, so the threshold can be exceeded despite the small result size. The setting is fairly low level and should be used with caution.
-
-## max_result_rows {#max_result_rows}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on result size in rows. The query will stop after processing a block of data if the threshold is met, but it will not cut the last block of the result, therefore the result size can be larger than the threshold.
-
-## max_rows_in_distinct {#max_rows_in_distinct}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum number of elements during execution of DISTINCT.
-
-## max_rows_in_join {#max_rows_in_join}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size of the hash table for JOIN (in number of rows).
-
-## max_rows_in_set {#max_rows_in_set}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size of the set (in number of elements) resulting from the execution of the IN section.
-
-## max_rows_in_set_to_optimize_join {#max_rows_in_set_to_optimize_join}
-
-Type: UInt64
-
-Default value: 0
-
-Maximal size of the set to filter joined tables by each other's row sets before joining.
-
-Possible values:
-
-- 0 — Disable.
-- Any positive integer.
-
-## max_rows_to_group_by {#max_rows_to_group_by}
-
-Type: UInt64
-
-Default value: 0
-
-If aggregation during GROUP BY is generating more than the specified number of rows (unique GROUP BY keys), the behavior will be determined by the 'group_by_overflow_mode' which by default is - throw an exception, but can be also switched to an approximate GROUP BY mode.
-
-## max_rows_to_read {#max_rows_to_read}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on read rows from the most 'deep' sources. That is, only in the deepest subquery. When reading from a remote server, it is only checked on a remote server.
-
-## max_rows_to_read_leaf {#max_rows_to_read_leaf}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on read rows on the leaf nodes for distributed queries. Limit is applied for local reads only, excluding the final merge stage on the root node. Note, the setting is unstable with prefer_localhost_replica=1.
-
-## max_rows_to_sort {#max_rows_to_sort}
-
-Type: UInt64
-
-Default value: 0
-
-If more than the specified amount of records have to be processed for ORDER BY operation, the behavior will be determined by the 'sort_overflow_mode' which by default is - throw an exception
-
-## max_rows_to_transfer {#max_rows_to_transfer}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.
-
-## max_sessions_for_user {#max_sessions_for_user}
-
-Type: UInt64
-
-Default value: 0
-
-Maximum number of simultaneous sessions for a user.
-
-## max_size_to_preallocate_for_aggregation {#max_size_to_preallocate_for_aggregation}
-
-Type: UInt64
-
-Default value: 100000000
-
-For how many elements it is allowed to preallocate space in all hash tables in total before aggregation
-
-## max_size_to_preallocate_for_joins {#max_size_to_preallocate_for_joins}
-
-Type: UInt64
-
-Default value: 100000000
-
-For how many elements it is allowed to preallocate space in all hash tables in total before join
-
-## max_streams_for_merge_tree_reading {#max_streams_for_merge_tree_reading}
-
-Type: UInt64
-
-Default value: 0
-
-If is not zero, limit the number of reading streams for MergeTree table.
-
-## max_streams_multiplier_for_merge_tables {#max_streams_multiplier_for_merge_tables}
-
-Type: Float
-
-Default value: 5
-
-Ask more streams when reading from Merge table. Streams will be spread across tables that Merge table will use. This allows more even distribution of work across threads and is especially helpful when merged tables differ in size.
-
-## max_streams_to_max_threads_ratio {#max_streams_to_max_threads_ratio}
-
-Type: Float
-
-Default value: 1
-
-Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.
-
-## max_subquery_depth {#max_subquery_depth}
-
-Type: UInt64
-
-Default value: 100
-
-If a query has more than the specified number of nested subqueries, throw an exception. This allows you to have a sanity check to protect the users of your cluster from going insane with their queries.
-
-## max_table_size_to_drop {#max_table_size_to_drop}
-
-Type: UInt64
-
-Default value: 50000000000
-
-Restriction on deleting tables in query time. The value 0 means that you can delete all tables without any restrictions.
-
-Cloud default value: 1 TB.
-
-:::note
-This query setting overwrites its server setting equivalent, see [max_table_size_to_drop](/docs/en/operations/server-configuration-parameters/settings.md/#max-table-size-to-drop)
-:::
-
-## max_temporary_columns {#max_temporary_columns}
-
-Type: UInt64
-
-Default value: 0
-
-If a query generates more than the specified number of temporary columns in memory as a result of intermediate calculation, the exception is thrown. Zero value means unlimited. This setting is useful to prevent too complex queries.
-
-## max_temporary_data_on_disk_size_for_query {#max_temporary_data_on_disk_size_for_query}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries. Zero means unlimited.
-
-## max_temporary_data_on_disk_size_for_user {#max_temporary_data_on_disk_size_for_user}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries. Zero means unlimited.
-
-## max_temporary_non_const_columns {#max_temporary_non_const_columns}
-
-Type: UInt64
-
-Default value: 0
-
-Similar to the 'max_temporary_columns' setting but applies only to non-constant columns. This makes sense because constant columns are cheap and it is reasonable to allow more of them.
-
-## max_threads {#max_threads}
-
-Type: MaxThreads
-
-Default value: 'auto(16)'
-
-The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max_distributed_connections’ parameter).
-
-This parameter applies to threads that perform the same stages of the query processing pipeline in parallel.
-For example, when reading from a table, if it is possible to evaluate expressions with functions, filter with WHERE and pre-aggregate for GROUP BY in parallel using at least ‘max_threads’ number of threads, then ‘max_threads’ are used.
-
-For queries that are completed quickly because of a LIMIT, you can set a lower ‘max_threads’. For example, if the necessary number of entries are located in every block and max_threads = 8, then 8 blocks are retrieved, although it would have been enough to read just one.
-
-The smaller the `max_threads` value, the less memory is consumed.
-
-## max_threads_for_indexes {#max_threads_for_indexes}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum number of threads process indices.
-
-## max_untracked_memory {#max_untracked_memory}
-
-Type: UInt64
-
-Default value: 4194304
-
-Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when an amount (in absolute value) becomes larger than the specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.
-
-## memory_overcommit_ratio_denominator {#memory_overcommit_ratio_denominator}
-
-Type: UInt64
-
-Default value: 1073741824
-
-It represents the soft memory limit when the hard limit is reached on the global level.
-This value is used to compute the overcommit ratio for the query.
-Zero means skip the query.
-Read more about [memory overcommit](memory-overcommit.md).
-
-## memory_overcommit_ratio_denominator_for_user {#memory_overcommit_ratio_denominator_for_user}
-
-Type: UInt64
-
-Default value: 1073741824
-
-It represents the soft memory limit when the hard limit is reached on the user level.
-This value is used to compute the overcommit ratio for the query.
-Zero means skip the query.
-Read more about [memory overcommit](memory-overcommit.md).
-
-## memory_profiler_sample_max_allocation_size {#memory_profiler_sample_max_allocation_size}
-
-Type: UInt64
-
-Default value: 0
-
-Collect random allocations of size less or equal than the specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold work as expected.
-
-## memory_profiler_sample_min_allocation_size {#memory_profiler_sample_min_allocation_size}
-
-Type: UInt64
-
-Default value: 0
-
-Collect random allocations of size greater or equal than the specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold work as expected.
-
-## memory_profiler_sample_probability {#memory_profiler_sample_probability}
-
-Type: Float
-
-Default value: 0
-
-Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless of the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine-grained sampling.
-
-## memory_profiler_step {#memory_profiler_step}
-
-Type: UInt64
-
-Default value: 4194304
-
-Sets the step of memory profiler. Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stacktrace and will write it into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log).
-
-Possible values:
-
-- A positive integer number of bytes.
-
-- 0 for turning off the memory profiler.
-
-## memory_tracker_fault_probability {#memory_tracker_fault_probability}
-
-Type: Float
-
-Default value: 0
-
-For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.
-
-## memory_usage_overcommit_max_wait_microseconds {#memory_usage_overcommit_max_wait_microseconds}
-
-Type: UInt64
-
-Default value: 5000000
-
-Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
-If the timeout is reached and memory is not freed, an exception is thrown.
-Read more about [memory overcommit](memory-overcommit.md).
-
-## merge_tree_coarse_index_granularity {#merge_tree_coarse_index_granularity}
-
-Type: UInt64
-
-Default value: 8
-
-When searching for data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range into `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively.
-
-Possible values:
-
-- Any positive even integer.
-
-## merge_tree_compact_parts_min_granules_to_multibuffer_read {#merge_tree_compact_parts_min_granules_to_multibuffer_read}
-
-Type: UInt64
-
-Default value: 16
-
-Only available in ClickHouse Cloud. Number of granules in stripe of compact part of MergeTree tables to use multibuffer reader, which supports parallel reading and prefetch. In case of reading from remote fs using of multibuffer reader increases number of read request.
-
-## merge_tree_determine_task_size_by_prewhere_columns {#merge_tree_determine_task_size_by_prewhere_columns}
-
-Type: Bool
-
-Default value: 1
-
-Whether to use only prewhere columns size to determine reading task size.
-
-## merge_tree_max_bytes_to_use_cache {#merge_tree_max_bytes_to_use_cache}
-
-Type: UInt64
-
-Default value: 2013265920
-
-If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it does not use the cache of uncompressed blocks.
-
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
-
-Possible values:
-
-- Any positive integer.
-
-## merge_tree_max_rows_to_use_cache {#merge_tree_max_rows_to_use_cache}
-
-Type: UInt64
-
-Default value: 1048576
-
-If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cache of uncompressed blocks.
-
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
-
-Possible values:
-
-- Any positive integer.
-
-## merge_tree_min_bytes_for_concurrent_read {#merge_tree_min_bytes_for_concurrent_read}
-
-Type: UInt64
-
-Default value: 251658240
-
-If the number of bytes to read from one file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine table exceeds `merge_tree_min_bytes_for_concurrent_read`, then ClickHouse tries to concurrently read from this file in several threads.
-
-Possible value:
-
-- Positive integer.
-
-## merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem {#merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem}
-
-Type: UInt64
-
-Default value: 251658240
-
-The minimum number of bytes to read from one file before [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
-
-Possible values:
-
-- Positive integer.
-
-## merge_tree_min_bytes_for_seek {#merge_tree_min_bytes_for_seek}
-
-Type: UInt64
-
-Default value: 0
-
-If the distance between two data blocks to be read in one file is less than `merge_tree_min_bytes_for_seek` bytes, then ClickHouse sequentially reads a range of file that contains both blocks, thus avoiding extra seek.
-
-Possible values:
-
-- Any positive integer.
-
-## merge_tree_min_bytes_per_task_for_remote_reading {#merge_tree_min_bytes_per_task_for_remote_reading}
-
-Type: UInt64
-
-Default value: 2097152
-
-Min bytes to read per task.
-
-## merge_tree_min_rows_for_concurrent_read {#merge_tree_min_rows_for_concurrent_read}
-
-Type: UInt64
-
-Default value: 163840
-
-If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads.
-
-Possible values:
-
-- Positive integer.
-
-## merge_tree_min_rows_for_concurrent_read_for_remote_filesystem {#merge_tree_min_rows_for_concurrent_read_for_remote_filesystem}
-
-Type: UInt64
-
-Default value: 163840
-
-The minimum number of lines to read from one file before the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) engine can parallelize reading, when reading from remote filesystem.
-
-Possible values:
-
-- Positive integer.
-
-## merge_tree_min_rows_for_seek {#merge_tree_min_rows_for_seek}
-
-Type: UInt64
-
-Default value: 0
-
-If the distance between two data blocks to be read in one file is less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file but reads the data sequentially.
-
-Possible values:
-
-- Any positive integer.
-
-## merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability {#merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability}
-
-Type: Float
-
-Default value: 0
-
-For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability.
-
-## merge_tree_use_const_size_tasks_for_remote_reading {#merge_tree_use_const_size_tasks_for_remote_reading}
-
-Type: Bool
-
-Default value: 1
-
-Whether to use constant size tasks for reading from a remote table.
-
-## metrics_perf_events_enabled {#metrics_perf_events_enabled}
-
-Type: Bool
-
-Default value: 0
-
-If enabled, some of the perf events will be measured throughout queries' execution.
-
-## metrics_perf_events_list {#metrics_perf_events_list}
-
-Type: String
-
-Default value:
-
-Comma separated list of perf metrics that will be measured throughout queries' execution. Empty means all events. See PerfEventInfo in sources for the available events.
-
-## min_bytes_to_use_direct_io {#min_bytes_to_use_direct_io}
-
-Type: UInt64
-
-Default value: 0
-
-The minimum data volume required for using direct I/O access to the storage disk.
-
-ClickHouse uses this setting when reading data from tables. If the total storage volume of all the data to be read exceeds `min_bytes_to_use_direct_io` bytes, then ClickHouse reads the data from the storage disk with the `O_DIRECT` option.
-
-Possible values:
-
-- 0 — Direct I/O is disabled.
-- Positive integer.
-
-## min_bytes_to_use_mmap_io {#min_bytes_to_use_mmap_io}
-
-Type: UInt64
-
-Default value: 0
-
-This is an experimental setting. Sets the minimum amount of memory for reading large files without copying data from the kernel to userspace. Recommended threshold is about 64 MB, because [mmap/munmap](https://en.wikipedia.org/wiki/Mmap) is slow. It makes sense only for large files and helps only if data reside in the page cache.
-
-Possible values:
-
-- Positive integer.
-- 0 — Big files read with only copying data from kernel to userspace.
-
-## min_chunk_bytes_for_parallel_parsing {#min_chunk_bytes_for_parallel_parsing}
-
-Type: UInt64
-
-Default value: 10485760
-
-- Type: unsigned int
-- Default value: 1 MiB
-
-The minimum chunk size in bytes, which each thread will parse in parallel.
-
-## min_compress_block_size {#min_compress_block_size}
-
-Type: UInt64
-
-Default value: 65536
-
-For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536.
-
-The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark.
-
-Let’s look at an example. Assume that `index_granularity` was set to 8192 during table creation.
-
-We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks.
-
-We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed.
-
-:::note
-This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
-:::
-
-## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression}
-
-Type: UInt64
-
-Default value: 3
-
-The minimum number of identical aggregate expressions to start JIT-compilation. Works only if the [compile_aggregate_expressions](#compile_aggregate_expressions) setting is enabled.
-
-Possible values:
-
-- Positive integer.
-- 0 — Identical aggregate expressions are always JIT-compiled.
-
-## min_count_to_compile_expression {#min_count_to_compile_expression}
-
-Type: UInt64
-
-Default value: 3
-
-Minimum count of executing same expression before it is get compiled.
-
-## min_count_to_compile_sort_description {#min_count_to_compile_sort_description}
-
-Type: UInt64
-
-Default value: 3
-
-The number of identical sort descriptions before they are JIT-compiled
-
-## min_execution_speed {#min_execution_speed}
-
-Type: UInt64
-
-Default value: 0
-
-Minimum number of execution rows per second.
-
-## min_execution_speed_bytes {#min_execution_speed_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Minimum number of execution bytes per second.
-
-## min_external_table_block_size_bytes {#min_external_table_block_size_bytes}
-
-Type: UInt64
-
-Default value: 268402944
-
-Squash blocks passed to the external table to a specified size in bytes, if blocks are not big enough.
-
-## min_external_table_block_size_rows {#min_external_table_block_size_rows}
-
-Type: UInt64
-
-Default value: 1048449
-
-Squash blocks passed to external table to specified size in rows, if blocks are not big enough.
-
-## min_free_disk_bytes_to_perform_insert {#min_free_disk_bytes_to_perform_insert}
-
-Type: UInt64
-
-Default value: 0
-
-Minimum free disk space bytes to perform an insert.
-
-## min_free_disk_ratio_to_perform_insert {#min_free_disk_ratio_to_perform_insert}
-
-Type: Float
-
-Default value: 0
-
-Minimum free disk space ratio to perform an insert.
-
-## min_free_disk_space_for_temporary_data {#min_free_disk_space_for_temporary_data}
-
-Type: UInt64
-
-Default value: 0
-
-The minimum disk space to keep while writing temporary data used in external sorting and aggregation.
-
-## min_hit_rate_to_use_consecutive_keys_optimization {#min_hit_rate_to_use_consecutive_keys_optimization}
-
-Type: Float
-
-Default value: 0.5
-
-Minimal hit rate of a cache which is used for consecutive keys optimization in aggregation to keep it enabled
-
-## min_insert_block_size_bytes {#min_insert_block_size_bytes}
-
-Type: UInt64
-
-Default value: 268402944
-
-Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
-
-Possible values:
-
-- Positive integer.
-- 0 — Squashing disabled.
-
-## min_insert_block_size_bytes_for_materialized_views {#min_insert_block_size_bytes_for_materialized_views}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the minimum number of bytes in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
-
-Possible values:
-
-- Any positive integer.
-- 0 — Squashing disabled.
-
-**See also**
-
-- [min_insert_block_size_bytes](#min-insert-block-size-bytes)
-
-## min_insert_block_size_rows {#min_insert_block_size_rows}
-
-Type: UInt64
-
-Default value: 1048449
-
-Sets the minimum number of rows in the block that can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones.
-
-Possible values:
-
-- Positive integer.
-- 0 — Squashing disabled.
-
-## min_insert_block_size_rows_for_materialized_views {#min_insert_block_size_rows_for_materialized_views}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the minimum number of rows in the block which can be inserted into a table by an `INSERT` query. Smaller-sized blocks are squashed into bigger ones. This setting is applied only for blocks inserted into [materialized view](../../sql-reference/statements/create/view.md). By adjusting this setting, you control blocks squashing while pushing to materialized view and avoid excessive memory usage.
-
-Possible values:
-
-- Any positive integer.
-- 0 — Squashing disabled.
-
-**See Also**
-
-- [min_insert_block_size_rows](#min-insert-block-size-rows)
-
-## mongodb_throw_on_unsupported_query {#mongodb_throw_on_unsupported_query}
-
-Type: Bool
-
-Default value: 1
-
-If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'.
-
-## move_all_conditions_to_prewhere {#move_all_conditions_to_prewhere}
-
-Type: Bool
-
-Default value: 1
-
-Move all viable conditions from WHERE to PREWHERE
-
-## move_primary_key_columns_to_end_of_prewhere {#move_primary_key_columns_to_end_of_prewhere}
-
-Type: Bool
-
-Default value: 1
-
-Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.
-
-## multiple_joins_try_to_keep_original_names {#multiple_joins_try_to_keep_original_names}
-
-Type: Bool
-
-Default value: 0
-
-Do not add aliases to top level expression list on multiple joins rewrite
-
-## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator}
-
-Type: Bool
-
-Default value: 0
-
-If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`.
-
-## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator}
-
-Type: Bool
-
-Default value: 0
-
-If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`.
-
-## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace}
-
-Type: UInt64
-
-Default value: 16384
-
-The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB).
-
-## mutations_sync {#mutations_sync}
-
-Type: UInt64
-
-Default value: 0
-
-Allows to execute `ALTER TABLE ... UPDATE|DELETE|MATERIALIZE INDEX|MATERIALIZE PROJECTION|MATERIALIZE COLUMN` queries ([mutations](../../sql-reference/statements/alter/index.md#mutations)) synchronously.
-
-Possible values:
-
-- 0 - Mutations execute asynchronously.
-- 1 - The query waits for all mutations to complete on the current server.
-- 2 - The query waits for all mutations to complete on all replicas (if they exist).
-
-## mysql_datatypes_support_level {#mysql_datatypes_support_level}
-
-Type: MySQLDataTypesSupport
-
-Default value:
-
-Defines how MySQL types are converted to corresponding ClickHouse types. A comma separated list in any combination of `decimal`, `datetime64`, `date2Date32` or `date2String`.
-- `decimal`: convert `NUMERIC` and `DECIMAL` types to `Decimal` when precision allows it.
-- `datetime64`: convert `DATETIME` and `TIMESTAMP` types to `DateTime64` instead of `DateTime` when precision is not `0`.
-- `date2Date32`: convert `DATE` to `Date32` instead of `Date`. Takes precedence over `date2String`.
-- `date2String`: convert `DATE` to `String` instead of `Date`. Overridden by `datetime64`.
-
-## mysql_map_fixed_string_to_text_in_show_columns {#mysql_map_fixed_string_to_text_in_show_columns}
-
-Type: Bool
-
-Default value: 1
-
-When enabled, [FixedString](../../sql-reference/data-types/fixedstring.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
-
-Has an effect only when the connection is made through the MySQL wire protocol.
-
-- 0 - Use `BLOB`.
-- 1 - Use `TEXT`.
-
-## mysql_map_string_to_text_in_show_columns {#mysql_map_string_to_text_in_show_columns}
-
-Type: Bool
-
-Default value: 1
-
-When enabled, [String](../../sql-reference/data-types/string.md) ClickHouse data type will be displayed as `TEXT` in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns).
-
-Has an effect only when the connection is made through the MySQL wire protocol.
-
-- 0 - Use `BLOB`.
-- 1 - Use `TEXT`.
-
-## mysql_max_rows_to_insert {#mysql_max_rows_to_insert}
-
-Type: UInt64
-
-Default value: 65536
-
-The maximum number of rows in MySQL batch insertion of the MySQL storage engine
-
-## network_compression_method {#network_compression_method}
-
-Type: String
-
-Default value: LZ4
-
-Sets the method of data compression that is used for communication between servers and between server and [clickhouse-client](../../interfaces/cli.md).
-
-Possible values:
-
-- `LZ4` — sets LZ4 compression method.
-- `ZSTD` — sets ZSTD compression method.
-
-**See Also**
-
-- [network_zstd_compression_level](#network_zstd_compression_level)
-
-## network_zstd_compression_level {#network_zstd_compression_level}
-
-Type: Int64
-
-Default value: 1
-
-Adjusts the level of ZSTD compression. Used only when [network_compression_method](#network_compression_method) is set to `ZSTD`.
-
-Possible values:
-
-- Positive integer from 1 to 15.
-
-## normalize_function_names {#normalize_function_names}
-
-Type: Bool
-
-Default value: 1
-
-Normalize function names to their canonical names
-
-## number_of_mutations_to_delay {#number_of_mutations_to_delay}
-
-Type: UInt64
-
-Default value: 0
-
-If the mutated table contains at least that many unfinished mutations, artificially slow down mutations of table. 0 - disabled
-
-## number_of_mutations_to_throw {#number_of_mutations_to_throw}
-
-Type: UInt64
-
-Default value: 0
-
-If the mutated table contains at least that many unfinished mutations, throw 'Too many mutations ...' exception. 0 - disabled
-
-## odbc_bridge_connection_pool_size {#odbc_bridge_connection_pool_size}
-
-Type: UInt64
-
-Default value: 16
-
-Connection pool size for each connection settings string in ODBC bridge.
-
-## odbc_bridge_use_connection_pooling {#odbc_bridge_use_connection_pooling}
-
-Type: Bool
-
-Default value: 1
-
-Use connection pooling in ODBC bridge. If set to false, a new connection is created every time.
-
-## offset {#offset}
-
-Type: UInt64
-
-Default value: 0
-
-Sets the number of rows to skip before starting to return rows from the query. It adjusts the offset set by the [OFFSET](../../sql-reference/statements/select/offset.md/#offset-fetch) clause, so that these two values are summarized.
-
-Possible values:
-
-- 0 — No rows are skipped .
-- Positive integer.
-
-**Example**
-
-Input table:
-
-``` sql
-CREATE TABLE test (i UInt64) ENGINE = MergeTree() ORDER BY i;
-INSERT INTO test SELECT number FROM numbers(500);
-```
-
-Query:
-
-``` sql
-SET limit = 5;
-SET offset = 7;
-SELECT * FROM test LIMIT 10 OFFSET 100;
-```
-Result:
-
-``` text
-┌───i─┐
-│ 107 │
-│ 108 │
-│ 109 │
-└─────┘
-```
-
-## opentelemetry_start_trace_probability {#opentelemetry_start_trace_probability}
-
-Type: Float
-
-Default value: 0
-
-Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).
-
-Possible values:
-
-- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied).
-- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
-- 1 — The trace for all executed queries is enabled.
-
-## opentelemetry_trace_processors {#opentelemetry_trace_processors}
-
-Type: Bool
-
-Default value: 0
-
-Collect OpenTelemetry spans for processors.
-
-## optimize_aggregation_in_order {#optimize_aggregation_in_order}
-
-Type: Bool
-
-Default value: 0
-
-Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for aggregating data in corresponding order in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
-
-Possible values:
-
-- 0 — `GROUP BY` optimization is disabled.
-- 1 — `GROUP BY` optimization is enabled.
-
-**See Also**
-
-- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order)
-
-## optimize_aggregators_of_group_by_keys {#optimize_aggregators_of_group_by_keys}
-
-Type: Bool
-
-Default value: 1
-
-Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section
-
-## optimize_append_index {#optimize_append_index}
-
-Type: Bool
-
-Default value: 0
-
-Use [constraints](../../sql-reference/statements/create/table.md#constraints) in order to append index condition. The default is `false`.
-
-Possible values:
-
-- true, false
-
-## optimize_arithmetic_operations_in_aggregate_functions {#optimize_arithmetic_operations_in_aggregate_functions}
-
-Type: Bool
-
-Default value: 1
-
-Move arithmetic operations out of aggregation functions
-
-## optimize_count_from_files {#optimize_count_from_files}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
-
-Possible values:
-
-- 0 — Optimization disabled.
-- 1 — Optimization enabled.
-
-## optimize_distinct_in_order {#optimize_distinct_in_order}
-
-Type: Bool
-
-Default value: 1
-
-Enable DISTINCT optimization if some columns in DISTINCT form a prefix of sorting. For example, prefix of sorting key in merge tree or ORDER BY statement
-
-## optimize_distributed_group_by_sharding_key {#optimize_distributed_group_by_sharding_key}
-
-Type: Bool
-
-Default value: 1
-
-Optimize `GROUP BY sharding_key` queries, by avoiding costly aggregation on the initiator server (which will reduce memory usage for the query on the initiator server).
-
-The following types of queries are supported (and all combinations of them):
-
-- `SELECT DISTINCT [..., ]sharding_key[, ...] FROM dist`
-- `SELECT ... FROM dist GROUP BY sharding_key[, ...]`
-- `SELECT ... FROM dist GROUP BY sharding_key[, ...] ORDER BY x`
-- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1`
-- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1 BY x`
-
-The following types of queries are not supported (support for some of them may be added later):
-
-- `SELECT ... GROUP BY sharding_key[, ...] WITH TOTALS`
-- `SELECT ... GROUP BY sharding_key[, ...] WITH ROLLUP`
-- `SELECT ... GROUP BY sharding_key[, ...] WITH CUBE`
-- `SELECT ... GROUP BY sharding_key[, ...] SETTINGS extremes=1`
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-See also:
-
-- [distributed_group_by_no_merge](#distributed-group-by-no-merge)
-- [distributed_push_down_limit](#distributed-push-down-limit)
-- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
-
-:::note
-Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key).
-:::
-
-## optimize_functions_to_subcolumns {#optimize_functions_to_subcolumns}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read.
-
-These functions can be transformed:
-
-- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
-- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
-- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
-- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
-- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
-- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
-- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
-- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
-
-Possible values:
-
-- 0 — Optimization disabled.
-- 1 — Optimization enabled.
-
-## optimize_group_by_constant_keys {#optimize_group_by_constant_keys}
-
-Type: Bool
-
-Default value: 1
-
-Optimize GROUP BY when all keys in block are constant
-
-## optimize_group_by_function_keys {#optimize_group_by_function_keys}
-
-Type: Bool
-
-Default value: 1
-
-Eliminates functions of other keys in GROUP BY section
-
-## optimize_if_chain_to_multiif {#optimize_if_chain_to_multiif}
-
-Type: Bool
-
-Default value: 0
-
-Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.
-
-## optimize_if_transform_strings_to_enum {#optimize_if_transform_strings_to_enum}
-
-Type: Bool
-
-Default value: 0
-
-Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.
-
-## optimize_injective_functions_in_group_by {#optimize_injective_functions_in_group_by}
-
-Type: Bool
-
-Default value: 1
-
-Replaces injective functions by it's arguments in GROUP BY section
-
-## optimize_injective_functions_inside_uniq {#optimize_injective_functions_inside_uniq}
-
-Type: Bool
-
-Default value: 1
-
-Delete injective functions of one argument inside uniq*() functions.
-
-## optimize_min_equality_disjunction_chain_length {#optimize_min_equality_disjunction_chain_length}
-
-Type: UInt64
-
-Default value: 3
-
-The minimum length of the expression `expr = x1 OR ... expr = xN` for optimization
-
-## optimize_min_inequality_conjunction_chain_length {#optimize_min_inequality_conjunction_chain_length}
-
-Type: UInt64
-
-Default value: 3
-
-The minimum length of the expression `expr <> x1 AND ... expr <> xN` for optimization
-
-## optimize_move_to_prewhere {#optimize_move_to_prewhere}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries.
-
-Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables.
-
-Possible values:
-
-- 0 — Automatic `PREWHERE` optimization is disabled.
-- 1 — Automatic `PREWHERE` optimization is enabled.
-
-## optimize_move_to_prewhere_if_final {#optimize_move_to_prewhere_if_final}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
-
-Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables.
-
-Possible values:
-
-- 0 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is disabled.
-- 1 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is enabled.
-
-**See Also**
-
-- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
-
-## optimize_multiif_to_if {#optimize_multiif_to_if}
-
-Type: Bool
-
-Default value: 1
-
-Replace 'multiIf' with only one condition to 'if'.
-
-## optimize_normalize_count_variants {#optimize_normalize_count_variants}
-
-Type: Bool
-
-Default value: 1
-
-Rewrite aggregate functions that semantically equals to count() as count().
-
-## optimize_on_insert {#optimize_on_insert}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-**Example**
-
-The difference between enabled and disabled:
-
-Query:
-
-```sql
-SET optimize_on_insert = 1;
-
-CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable;
-
-INSERT INTO test1 SELECT number % 2 FROM numbers(5);
-
-SELECT * FROM test1;
-
-SET optimize_on_insert = 0;
-
-CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable;
-
-INSERT INTO test2 SELECT number % 2 FROM numbers(5);
-
-SELECT * FROM test2;
-```
-
-Result:
-
-``` text
-┌─FirstTable─┐
-│          0 │
-│          1 │
-└────────────┘
-
-┌─SecondTable─┐
-│           0 │
-│           0 │
-│           0 │
-│           1 │
-│           1 │
-└─────────────┘
-```
-
-Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md/#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour.
-
-## optimize_or_like_chain {#optimize_or_like_chain}
-
-Type: Bool
-
-Default value: 0
-
-Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.
-
-## optimize_read_in_order {#optimize_read_in_order}
-
-Type: Bool
-
-Default value: 1
-
-Enables [ORDER BY](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for reading data from [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
-
-Possible values:
-
-- 0 — `ORDER BY` optimization is disabled.
-- 1 — `ORDER BY` optimization is enabled.
-
-**See Also**
-
-- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order)
-
-## optimize_read_in_window_order {#optimize_read_in_window_order}
-
-Type: Bool
-
-Default value: 1
-
-Enable ORDER BY optimization in window clause for reading data in corresponding order in MergeTree tables.
-
-## optimize_redundant_functions_in_order_by {#optimize_redundant_functions_in_order_by}
-
-Type: Bool
-
-Default value: 1
-
-Remove functions from ORDER BY if its argument is also in ORDER BY
-
-## optimize_respect_aliases {#optimize_respect_aliases}
-
-Type: Bool
-
-Default value: 1
-
-If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count
-
-## optimize_rewrite_aggregate_function_with_if {#optimize_rewrite_aggregate_function_with_if}
-
-Type: Bool
-
-Default value: 1
-
-Rewrite aggregate functions with if expression as argument when logically equivalent.
-For example, `avg(if(cond, col, null))` can be rewritten to `avgOrNullIf(cond, col)`. It may improve performance.
-
-:::note
-Supported only with experimental analyzer (`enable_analyzer = 1`).
-:::
-
-## optimize_rewrite_array_exists_to_has {#optimize_rewrite_array_exists_to_has}
-
-Type: Bool
-
-Default value: 0
-
-Rewrite arrayExists() functions to has() when logically equivalent. For example, arrayExists(x -> x = 1, arr) can be rewritten to has(arr, 1)
-
-## optimize_rewrite_sum_if_to_count_if {#optimize_rewrite_sum_if_to_count_if}
-
-Type: Bool
-
-Default value: 1
-
-Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent
-
-## optimize_skip_merged_partitions {#optimize_skip_merged_partitions}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables optimization for [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md) query if there is only one part with level > 0 and it doesn't have expired TTL.
-
-- `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1`
-
-By default, `OPTIMIZE TABLE ... FINAL` query rewrites the one part even if there is only a single part.
-
-Possible values:
-
-- 1 - Enable optimization.
-- 0 - Disable optimization.
-
-## optimize_skip_unused_shards {#optimize_skip_unused_shards}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise a query yields incorrect result).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## optimize_skip_unused_shards_limit {#optimize_skip_unused_shards_limit}
-
-Type: UInt64
-
-Default value: 1000
-
-Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached.
-
-Too many values may require significant amount for processing, while the benefit is doubtful, since if you have huge number of values in `IN (...)`, then most likely the query will be sent to all shards anyway.
-
-## optimize_skip_unused_shards_nesting {#optimize_skip_unused_shards_nesting}
-
-Type: UInt64
-
-Default value: 0
-
-Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
-
-Possible values:
-
-- 0 — Disabled, `optimize_skip_unused_shards` works always.
-- 1 — Enables `optimize_skip_unused_shards` only for the first level.
-- 2 — Enables `optimize_skip_unused_shards` up to the second level.
-
-## optimize_skip_unused_shards_rewrite_in {#optimize_skip_unused_shards_rewrite_in}
-
-Type: Bool
-
-Default value: 1
-
-Rewrite IN in query for remote shards to exclude values that does not belong to the shard (requires optimize_skip_unused_shards).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## optimize_sorting_by_input_stream_properties {#optimize_sorting_by_input_stream_properties}
-
-Type: Bool
-
-Default value: 1
-
-Optimize sorting by sorting properties of input stream
-
-## optimize_substitute_columns {#optimize_substitute_columns}
-
-Type: Bool
-
-Default value: 0
-
-Use [constraints](../../sql-reference/statements/create/table.md#constraints) for column substitution. The default is `false`.
-
-Possible values:
-
-- true, false
-
-## optimize_syntax_fuse_functions {#optimize_syntax_fuse_functions}
-
-Type: Bool
-
-Default value: 0
-
-Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md/#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md/#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md/#agg_function-sumCount).
-
-Possible values:
-
-- 0 — Functions with identical argument are not fused.
-- 1 — Functions with identical argument are fused.
-
-**Example**
-
-Query:
-
-``` sql
-CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log;
-SET optimize_syntax_fuse_functions = 1;
-EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b), avg(b) from fuse_tbl FORMAT TSV;
-```
-
-Result:
-
-``` text
-SELECT
-    sum(a),
-    sumCount(b).1,
-    sumCount(b).2,
-    (sumCount(b).1) / (sumCount(b).2)
-FROM fuse_tbl
-```
-
-## optimize_throw_if_noop {#optimize_throw_if_noop}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didn’t perform a merge.
-
-By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message.
-
-Possible values:
-
-- 1 — Throwing an exception is enabled.
-- 0 — Throwing an exception is disabled.
-
-## optimize_time_filter_with_preimage {#optimize_time_filter_with_preimage}
-
-Type: Bool
-
-Default value: 1
-
-Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')
-
-## optimize_trivial_approximate_count_query {#optimize_trivial_approximate_count_query}
-
-Type: Bool
-
-Default value: 0
-
-Use an approximate value for trivial count optimization of storages that support such estimation, for example, EmbeddedRocksDB.
-
-Possible values:
-
-   - 0 — Optimization disabled.
-   - 1 — Optimization enabled.
-
-## optimize_trivial_count_query {#optimize_trivial_count_query}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables the optimization to trivial query `SELECT count() FROM table` using metadata from MergeTree. If you need to use row-level security, disable this setting.
-
-Possible values:
-
-   - 0 — Optimization disabled.
-   - 1 — Optimization enabled.
-
-See also:
-
-- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns)
-
-## optimize_trivial_insert_select {#optimize_trivial_insert_select}
-
-Type: Bool
-
-Default value: 0
-
-Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query
-
-## optimize_uniq_to_count {#optimize_uniq_to_count}
-
-Type: Bool
-
-Default value: 1
-
-Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.
-
-## optimize_use_implicit_projections {#optimize_use_implicit_projections}
-
-Type: Bool
-
-Default value: 1
-
-Automatically choose implicit projections to perform SELECT query
-
-## optimize_use_projections {#optimize_use_projections}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries.
-
-Possible values:
-
-- 0 — Projection optimization disabled.
-- 1 — Projection optimization enabled.
-
-## optimize_using_constraints {#optimize_using_constraints}
-
-Type: Bool
-
-Default value: 0
-
-Use [constraints](../../sql-reference/statements/create/table.md#constraints) for query optimization. The default is `false`.
-
-Possible values:
-
-- true, false
-
-## os_thread_priority {#os_thread_priority}
-
-Type: Int64
-
-Default value: 0
-
-Sets the priority ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) for threads that execute queries. The OS scheduler considers this priority when choosing the next thread to run on each available CPU core.
-
-:::note
-To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickhouse-server` package sets it up during installation. Some virtual environments do not allow you to set the `CAP_SYS_NICE` capability. In this case, `clickhouse-server` shows a message about it at the start.
-:::
-
-Possible values:
-
-- You can set values in the range `[-20, 19]`.
-
-Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive.
-
-## output_format_compression_level {#output_format_compression_level}
-
-Type: UInt64
-
-Default value: 3
-
-Default compression level if query output is compressed. The setting is applied when `SELECT` query has `INTO OUTFILE` or when writing to table functions `file`, `url`, `hdfs`, `s3`, or `azureBlobStorage`.
-
-Possible values: from `1` to `22`
-
-## output_format_compression_zstd_window_log {#output_format_compression_zstd_window_log}
-
-Type: UInt64
-
-Default value: 0
-
-Can be used when the output compression method is `zstd`. If greater than `0`, this setting explicitly sets compression window size (power of `2`) and enables a long-range mode for zstd compression. This can help to achieve a better compression ratio.
-
-Possible values: non-negative numbers. Note that if the value is too small or too big, `zstdlib` will throw an exception. Typical values are from `20` (window size = `1MB`) to `30` (window size = `1GB`).
-
-## output_format_parallel_formatting {#output_format_parallel_formatting}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TSKV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats.
-
-Possible values:
-
-- 1 — Enabled.
-- 0 — Disabled.
-
-## page_cache_inject_eviction {#page_cache_inject_eviction}
-
-Type: Bool
-
-Default value: 0
-
-Userspace page cache will sometimes invalidate some pages at random. Intended for testing.
-
-## parallel_distributed_insert_select {#parallel_distributed_insert_select}
-
-Type: UInt64
-
-Default value: 0
-
-Enables parallel distributed `INSERT ... SELECT` query.
-
-If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table_b` queries and both tables use the same cluster, and both tables are either [replicated](../../engines/table-engines/mergetree-family/replication.md) or non-replicated, then this query is processed locally on every shard.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine.
-- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine.
-
-## parallel_replica_offset {#parallel_replica_offset}
-
-Type: UInt64
-
-Default value: 0
-
-This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.
-
-## parallel_replicas_allow_in_with_subquery {#parallel_replicas_allow_in_with_subquery}
-
-Type: Bool
-
-Default value: 1
-
-If true, subquery for IN will be executed on every follower replica.
-
-## parallel_replicas_count {#parallel_replicas_count}
-
-Type: UInt64
-
-Default value: 0
-
-This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.
-
-## parallel_replicas_custom_key {#parallel_replicas_custom_key}
-
-Type: String
-
-Default value:
-
-An arbitrary integer expression that can be used to split work between replicas for a specific table.
-The value can be any integer expression.
-
-Simple expressions using primary keys are preferred.
-
-If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
-Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
-
-## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower}
-
-Type: UInt64
-
-Default value: 0
-
-Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`.
-
-When used in conjunction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
-
-Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
-
-## parallel_replicas_custom_key_range_upper {#parallel_replicas_custom_key_range_upper}
-
-Type: UInt64
-
-Default value: 0
-
-Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression.
-
-When used in conjunction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
-
-Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing
-
-## parallel_replicas_for_non_replicated_merge_tree {#parallel_replicas_for_non_replicated_merge_tree}
-
-Type: Bool
-
-Default value: 0
-
-If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables
-
-## parallel_replicas_local_plan {#parallel_replicas_local_plan}
-
-Type: Bool
-
-Default value: 0
-
-Build local plan for local replica
-
-## parallel_replicas_mark_segment_size {#parallel_replicas_mark_segment_size}
-
-Type: UInt64
-
-Default value: 0
-
-Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384]
-
-## parallel_replicas_min_number_of_rows_per_replica {#parallel_replicas_min_number_of_rows_per_replica}
-
-Type: UInt64
-
-Default value: 0
-
-Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'
-
-## parallel_replicas_mode {#parallel_replicas_mode}
-
-Type: ParallelReplicasMode
-
-Default value: read_tasks
-
-Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.
-
-## parallel_replicas_prefer_local_join {#parallel_replicas_prefer_local_join}
-
-Type: Bool
-
-Default value: 1
-
-If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.
-
-## parallel_replicas_single_task_marks_count_multiplier {#parallel_replicas_single_task_marks_count_multiplier}
-
-Type: Float
-
-Default value: 2
-
-A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.
-
-## parallel_view_processing {#parallel_view_processing}
-
-Type: Bool
-
-Default value: 0
-
-Enables pushing to attached views concurrently instead of sequentially.
-
-## parallelize_output_from_storages {#parallelize_output_from_storages}
-
-Type: Bool
-
-Default value: 1
-
-Parallelize output for reading step from storage. It allows parallelization of  query processing right after reading from storage if possible
-
-## parsedatetime_parse_without_leading_zeros {#parsedatetime_parse_without_leading_zeros}
-
-Type: Bool
-
-Default value: 1
-
-Formatters '%c', '%l' and '%k' in function 'parseDateTime()' parse months and hours without leading zeros.
-
-## partial_merge_join_left_table_buffer_bytes {#partial_merge_join_left_table_buffer_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-If not 0 group left table blocks in bigger ones for left-side table in partial merge join. It uses up to 2x of specified memory per joining thread.
-
-## partial_merge_join_rows_in_right_blocks {#partial_merge_join_rows_in_right_blocks}
-
-Type: UInt64
-
-Default value: 65536
-
-Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
-
-ClickHouse server:
-
-1.  Splits right-hand join data into blocks with up to the specified number of rows.
-2.  Indexes each block with its minimum and maximum values.
-3.  Unloads prepared blocks to disk if it is possible.
-
-Possible values:
-
-- Any positive integer. Recommended range of values: \[1000, 100000\].
-
-## partial_result_on_first_cancel {#partial_result_on_first_cancel}
-
-Type: Bool
-
-Default value: 0
-
-Allows query to return a partial result after cancel.
-
-## parts_to_delay_insert {#parts_to_delay_insert}
-
-Type: UInt64
-
-Default value: 0
-
-If the destination table contains at least that many active parts in a single partition, artificially slow down insert into table.
-
-## parts_to_throw_insert {#parts_to_throw_insert}
-
-Type: UInt64
-
-Default value: 0
-
-If more than this number active parts in a single partition of the destination table, throw 'Too many parts ...' exception.
-
-## periodic_live_view_refresh {#periodic_live_view_refresh}
-
-Type: Seconds
-
-Default value: 60
-
-Interval after which periodically refreshed live view is forced to refresh.
-
-## poll_interval {#poll_interval}
-
-Type: UInt64
-
-Default value: 10
-
-Block at the query wait loop on the server for the specified number of seconds.
-
-## postgresql_connection_attempt_timeout {#postgresql_connection_attempt_timeout}
-
-Type: UInt64
-
-Default value: 2
-
-Connection timeout in seconds of a single attempt to connect PostgreSQL end-point.
-The value is passed as a `connect_timeout` parameter of the connection URL.
-
-## postgresql_connection_pool_auto_close_connection {#postgresql_connection_pool_auto_close_connection}
-
-Type: Bool
-
-Default value: 0
-
-Close connection before returning connection to the pool.
-
-## postgresql_connection_pool_retries {#postgresql_connection_pool_retries}
-
-Type: UInt64
-
-Default value: 2
-
-Connection pool push/pop retries number for PostgreSQL table engine and database engine.
-
-## postgresql_connection_pool_size {#postgresql_connection_pool_size}
-
-Type: UInt64
-
-Default value: 16
-
-Connection pool size for PostgreSQL table engine and database engine.
-
-## postgresql_connection_pool_wait_timeout {#postgresql_connection_pool_wait_timeout}
-
-Type: UInt64
-
-Default value: 5000
-
-Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.
-
-## prefer_column_name_to_alias {#prefer_column_name_to_alias}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md/#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines.
-
-Possible values:
-
-- 0 — The column name is substituted with the alias.
-- 1 — The column name is not substituted with the alias.
-
-**Example**
-
-The difference between enabled and disabled:
-
-Query:
-
-```sql
-SET prefer_column_name_to_alias = 0;
-SELECT avg(number) AS number, max(number) FROM numbers(10);
-```
-
-Result:
-
-```text
-Received exception from server (version 21.5.1):
-Code: 184. DB::Exception: Received from localhost:9000. DB::Exception: Aggregate function avg(number) is found inside another aggregate function in query: While processing avg(number) AS number.
-```
-
-Query:
-
-```sql
-SET prefer_column_name_to_alias = 1;
-SELECT avg(number) AS number, max(number) FROM numbers(10);
-```
-
-Result:
-
-```text
-┌─number─┬─max(number)─┐
-│    4.5 │           9 │
-└────────┴─────────────┘
-```
-
-## prefer_external_sort_block_bytes {#prefer_external_sort_block_bytes}
-
-Type: UInt64
-
-Default value: 16744704
-
-Prefer maximum block bytes for external sort, reduce the memory usage during merging.
-
-## prefer_global_in_and_join {#prefer_global_in_and_join}
-
-Type: Bool
-
-Default value: 0
-
-Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`.
-
-Possible values:
-
-- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`.
-- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`.
-
-**Usage**
-
-Although `SET distributed_product_mode=global` can change the queries behavior for the distributed tables, it's not suitable for local tables or tables from external resources. Here is when the `prefer_global_in_and_join` setting comes into play.
-
-For example, we have query serving nodes that contain local tables, which are not suitable for distribution. We need to scatter their data on the fly during distributed processing with the `GLOBAL` keyword — `GLOBAL IN`/`GLOBAL JOIN`.
-
-Another use case of `prefer_global_in_and_join` is accessing tables created by external engines. This setting helps to reduce the number of calls to external sources while joining such tables: only one call per query.
-
-**See also:**
-
-- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN`
-
-## prefer_localhost_replica {#prefer_localhost_replica}
-
-Type: Bool
-
-Default value: 1
-
-Enables/disables preferable using the localhost replica when processing distributed queries.
-
-Possible values:
-
-- 1 — ClickHouse always sends a query to the localhost replica if it exists.
-- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#load_balancing) setting.
-
-:::note
-Disable this setting if you use [max_parallel_replicas](#max_parallel_replicas) without [parallel_replicas_custom_key](#parallel_replicas_custom_key).
-If [parallel_replicas_custom_key](#parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
-If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
-:::
-
-## prefer_warmed_unmerged_parts_seconds {#prefer_warmed_unmerged_parts_seconds}
-
-Type: Int64
-
-Default value: 0
-
-Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.
-
-## preferred_block_size_bytes {#preferred_block_size_bytes}
-
-Type: UInt64
-
-Default value: 1000000
-
-This setting adjusts the data block size for query processing and represents additional fine-tuning to the more rough 'max_block_size' setting. If the columns are large and with 'max_block_size' rows the block size is likely to be larger than the specified amount of bytes, its size will be lowered for better CPU cache locality.
-
-## preferred_max_column_in_block_size_bytes {#preferred_max_column_in_block_size_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.
-
-## preferred_optimize_projection_name {#preferred_optimize_projection_name}
-
-Type: String
-
-Default value:
-
-If it is set to a non-empty string, ClickHouse will try to apply specified projection in query.
-
-
-Possible values:
-
-- string: name of preferred projection
-
-## prefetch_buffer_size {#prefetch_buffer_size}
-
-Type: UInt64
-
-Default value: 1048576
-
-The maximum size of the prefetch buffer to read from the filesystem.
-
-## print_pretty_type_names {#print_pretty_type_names}
-
-Type: Bool
-
-Default value: 1
-
-Allows to print deep-nested type names in a pretty way with indents in `DESCRIBE` query and in `toTypeName()` function.
-
-Example:
-
-```sql
-CREATE TABLE test (a Tuple(b String, c Tuple(d Nullable(UInt64), e Array(UInt32), f Array(Tuple(g String, h Map(String, Array(Tuple(i String, j UInt64))))), k Date), l Nullable(String))) ENGINE=Memory;
-DESCRIBE TABLE test FORMAT TSVRaw SETTINGS print_pretty_type_names=1;
-```
-
-```
-a	Tuple(
-    b String,
-    c Tuple(
-        d Nullable(UInt64),
-        e Array(UInt32),
-        f Array(Tuple(
-            g String,
-            h Map(
-                String,
-                Array(Tuple(
-                    i String,
-                    j UInt64
-                ))
-            )
-        )),
-        k Date
-    ),
-    l Nullable(String)
-)
-```
-
-## priority {#priority}
-
-Type: UInt64
-
-Default value: 0
-
-Priority of the query. 1 - the highest, higher value - lower priority; 0 - do not use priorities.
-
-## query_cache_compress_entries {#query_cache_compress_entries}
-
-Type: Bool
-
-Default value: 1
-
-Compress entries in the [query cache](../query-cache.md). Lessens the memory consumption of the query cache at the cost of slower inserts into / reads from it.
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## query_cache_max_entries {#query_cache_max_entries}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited.
-
-Possible values:
-
-- Positive integer >= 0.
-
-## query_cache_max_size_in_bytes {#query_cache_max_size_in_bytes}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
-
-Possible values:
-
-- Positive integer >= 0.
-
-## query_cache_min_query_duration {#query_cache_min_query_duration}
-
-Type: Milliseconds
-
-Default value: 0
-
-Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md).
-
-Possible values:
-
-- Positive integer >= 0.
-
-## query_cache_min_query_runs {#query_cache_min_query_runs}
-
-Type: UInt64
-
-Default value: 0
-
-Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
-
-Possible values:
-
-- Positive integer >= 0.
-
-## query_cache_nondeterministic_function_handling {#query_cache_nondeterministic_function_handling}
-
-Type: QueryCacheNondeterministicFunctionHandling
-
-Default value: throw
-
-Controls how the [query cache](../query-cache.md) handles `SELECT` queries with non-deterministic functions like `rand()` or `now()`.
-
-Possible values:
-
-- `'throw'` - Throw an exception and don't cache the query result.
-- `'save'` - Cache the query result.
-- `'ignore'` - Don't cache the query result and don't throw an exception.
-
-## query_cache_share_between_users {#query_cache_share_between_users}
-
-Type: Bool
-
-Default value: 0
-
-If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users.
-It is not recommended to enable this setting due to security reasons.
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## query_cache_squash_partial_results {#query_cache_squash_partial_results}
-
-Type: Bool
-
-Default value: 1
-
-Squash partial result blocks to blocks of size [max_block_size](#setting-max_block_size). Reduces performance of inserts into the [query cache](../query-cache.md) but improves the compressability of cache entries (see [query_cache_compress-entries](#query-cache-compress-entries)).
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## query_cache_system_table_handling {#query_cache_system_table_handling}
-
-Type: QueryCacheSystemTableHandling
-
-Default value: throw
-
-Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`.
-
-Possible values:
-
-- `'throw'` - Throw an exception and don't cache the query result.
-- `'save'` - Cache the query result.
-- `'ignore'` - Don't cache the query result and don't throw an exception.
-
-## query_cache_tag {#query_cache_tag}
-
-Type: String
-
-Default value:
-
-A string which acts as a label for [query cache](../query-cache.md) entries.
-The same queries with different tags are considered different by the query cache.
-
-Possible values:
-
-- Any string
-
-## query_cache_ttl {#query_cache_ttl}
-
-Type: Seconds
-
-Default value: 60
-
-After this time in seconds entries in the [query cache](../query-cache.md) become stale.
-
-Possible values:
-
-- Positive integer >= 0.
-
-## query_plan_aggregation_in_order {#query_plan_aggregation_in_order}
-
-Type: Bool
-
-Default value: 1
-
-Toggles the aggregation in-order query-plan-level optimization.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_convert_outer_join_to_inner_join {#query_plan_convert_outer_join_to_inner_join}
-
-Type: Bool
-
-Default value: 1
-
-Allow to convert OUTER JOIN to INNER JOIN if filter after JOIN always filters default values
-
-## query_plan_enable_multithreading_after_window_functions {#query_plan_enable_multithreading_after_window_functions}
-
-Type: Bool
-
-Default value: 1
-
-Enable multithreading after evaluating window functions to allow parallel stream processing
-
-## query_plan_enable_optimizations {#query_plan_enable_optimizations}
-
-Type: Bool
-
-Default value: 1
-
-Toggles query optimization at the query plan level.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable all optimizations at the query plan level
-- 1 - Enable optimizations at the query plan level (but individual optimizations may still be disabled via their individual settings)
-
-## query_plan_execute_functions_after_sorting {#query_plan_execute_functions_after_sorting}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which moves expressions after sorting steps.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_filter_push_down {#query_plan_filter_push_down}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which moves filters down in the execution plan.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_lift_up_array_join {#query_plan_lift_up_array_join}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which moves ARRAY JOINs up in the execution plan.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_lift_up_union {#query_plan_lift_up_union}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which moves larger subtrees of the query plan into union to enable further optimizations.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_max_optimizations_to_apply {#query_plan_max_optimizations_to_apply}
-
-Type: UInt64
-
-Default value: 10000
-
-Limits the total number of optimizations applied to query plan, see setting [query_plan_enable_optimizations](#query_plan_enable_optimizations).
-Useful to avoid long optimization times for complex queries.
-If the actual number of optimizations exceeds this setting, an exception is thrown.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-## query_plan_merge_expressions {#query_plan_merge_expressions}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which merges consecutive filters.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_merge_filters {#query_plan_merge_filters}
-
-Type: Bool
-
-Default value: 0
-
-Allow to merge filters in the query plan
-
-## query_plan_optimize_prewhere {#query_plan_optimize_prewhere}
-
-Type: Bool
-
-Default value: 1
-
-Allow to push down filter to PREWHERE expression for supported storages
-
-## query_plan_push_down_limit {#query_plan_push_down_limit}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which moves LIMITs down in the execution plan.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_read_in_order {#query_plan_read_in_order}
-
-Type: Bool
-
-Default value: 1
-
-Toggles the read in-order optimization query-plan-level optimization.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_remove_redundant_distinct {#query_plan_remove_redundant_distinct}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which removes redundant DISTINCT steps.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_remove_redundant_sorting {#query_plan_remove_redundant_sorting}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which removes redundant sorting steps, e.g. in subqueries.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_reuse_storage_ordering_for_window_functions {#query_plan_reuse_storage_ordering_for_window_functions}
-
-Type: Bool
-
-Default value: 1
-
-Toggles a query-plan-level optimization which uses storage sorting when sorting for window functions.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_plan_split_filter {#query_plan_split_filter}
-
-Type: Bool
-
-Default value: 1
-
-:::note
-This is an expert-level setting which should only be used for debugging by developers. The setting may change in future in backward-incompatible ways or be removed.
-:::
-
-Toggles a query-plan-level optimization which splits filters into expressions.
-Only takes effect if setting [query_plan_enable_optimizations](#query_plan_enable_optimizations) is 1.
-
-Possible values:
-
-- 0 - Disable
-- 1 - Enable
-
-## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns}
-
-Type: UInt64
-
-Default value: 1000000000
-
-Sets the period for a CPU clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). This timer counts only CPU time.
-
-Possible values:
-
-- A positive integer number of nanoseconds.
-
-    Recommended values:
-
-            - 10000000 (100 times a second) nanoseconds and more for single queries.
-            - 1000000000 (once a second) for cluster-wide profiling.
-
-- 0 for turning off the timer.
-
-**Temporarily disabled in ClickHouse Cloud.**
-
-See also:
-
-- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
-
-## query_profiler_real_time_period_ns {#query_profiler_real_time_period_ns}
-
-Type: UInt64
-
-Default value: 1000000000
-
-Sets the period for a real clock timer of the [query profiler](../../operations/optimizing-performance/sampling-query-profiler.md). Real clock timer counts wall-clock time.
-
-Possible values:
-
-- Positive integer number, in nanoseconds.
-
-    Recommended values:
-
-            - 10000000 (100 times a second) nanoseconds and less for single queries.
-            - 1000000000 (once a second) for cluster-wide profiling.
-
-- 0 for turning off the timer.
-
-**Temporarily disabled in ClickHouse Cloud.**
-
-See also:
-
-- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
-
-## queue_max_wait_ms {#queue_max_wait_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-The wait time in the request queue, if the number of concurrent requests exceeds the maximum.
-
-## rabbitmq_max_wait_ms {#rabbitmq_max_wait_ms}
-
-Type: Milliseconds
-
-Default value: 5000
-
-The wait time for reading from RabbitMQ before retry.
-
-## read_backoff_max_throughput {#read_backoff_max_throughput}
-
-Type: UInt64
-
-Default value: 1048576
-
-Settings to reduce the number of threads in case of slow reads. Count events when the read bandwidth is less than that many bytes per second.
-
-## read_backoff_min_concurrency {#read_backoff_min_concurrency}
-
-Type: UInt64
-
-Default value: 1
-
-Settings to try keeping the minimal number of threads in case of slow reads.
-
-## read_backoff_min_events {#read_backoff_min_events}
-
-Type: UInt64
-
-Default value: 2
-
-Settings to reduce the number of threads in case of slow reads. The number of events after which the number of threads will be reduced.
-
-## read_backoff_min_interval_between_events_ms {#read_backoff_min_interval_between_events_ms}
-
-Type: Milliseconds
-
-Default value: 1000
-
-Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time.
-
-## read_backoff_min_latency_ms {#read_backoff_min_latency_ms}
-
-Type: Milliseconds
-
-Default value: 1000
-
-Setting to reduce the number of threads in case of slow reads. Pay attention only to reads that took at least that much time.
-
-## read_from_filesystem_cache_if_exists_otherwise_bypass_cache {#read_from_filesystem_cache_if_exists_otherwise_bypass_cache}
-
-Type: Bool
-
-Default value: 0
-
-Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency.
-
-## read_from_page_cache_if_exists_otherwise_bypass_cache {#read_from_page_cache_if_exists_otherwise_bypass_cache}
-
-Type: Bool
-
-Default value: 0
-
-Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.
-
-## read_in_order_two_level_merge_threshold {#read_in_order_two_level_merge_threshold}
-
-Type: UInt64
-
-Default value: 100
-
-Minimal number of parts to read to run preliminary merge step during multithread reading in order of primary key.
-
-## read_in_order_use_buffering {#read_in_order_use_buffering}
-
-Type: Bool
-
-Default value: 1
-
-Use buffering before merging while reading in order of primary key. It increases the parallelism of query execution
-
-## read_overflow_mode {#read_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## read_overflow_mode_leaf {#read_overflow_mode_leaf}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the leaf limit is exceeded.
-
-## read_priority {#read_priority}
-
-Type: Int64
-
-Default value: 0
-
-Priority to read data from local filesystem or remote filesystem. Only supported for 'pread_threadpool' method for local filesystem and for `threadpool` method for remote filesystem.
-
-## read_through_distributed_cache {#read_through_distributed_cache}
-
-Type: Bool
-
-Default value: 0
-
-Only in ClickHouse Cloud. Allow reading from distributed cache
-
-## readonly {#readonly}
-
-Type: UInt64
-
-Default value: 0
-
-0 - no read-only restrictions. 1 - only read requests, as well as changing explicitly allowed settings. 2 - only read requests, as well as changing settings, except for the 'readonly' setting.
-
-## receive_data_timeout_ms {#receive_data_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 2000
-
-Connection timeout for receiving first packet of data or packet with positive progress from replica
-
-## receive_timeout {#receive_timeout}
-
-Type: Seconds
-
-Default value: 300
-
-Timeout for receiving data from the network, in seconds. If no bytes were received in this interval, the exception is thrown. If you set this setting on the client, the 'send_timeout' for the socket will also be set on the corresponding connection end on the server.
-
-## regexp_max_matches_per_row {#regexp_max_matches_per_row}
-
-Type: UInt64
-
-Default value: 1000
-
-Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md/#extractallgroups-horizontal) function.
-
-Possible values:
-
-- Positive integer.
-
-## reject_expensive_hyperscan_regexps {#reject_expensive_hyperscan_regexps}
-
-Type: Bool
-
-Default value: 1
-
-Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion)
-
-## remerge_sort_lowered_memory_bytes_ratio {#remerge_sort_lowered_memory_bytes_ratio}
-
-Type: Float
-
-Default value: 2
-
-If memory usage after remerge does not reduced by this ratio, remerge will be disabled.
-
-## remote_filesystem_read_method {#remote_filesystem_read_method}
-
-Type: String
-
-Default value: threadpool
-
-Method of reading data from remote filesystem, one of: read, threadpool.
-
-## remote_filesystem_read_prefetch {#remote_filesystem_read_prefetch}
-
-Type: Bool
-
-Default value: 1
-
-Should use prefetching when reading data from remote filesystem.
-
-## remote_fs_read_backoff_max_tries {#remote_fs_read_backoff_max_tries}
-
-Type: UInt64
-
-Default value: 5
-
-Max attempts to read with backoff
-
-## remote_fs_read_max_backoff_ms {#remote_fs_read_max_backoff_ms}
-
-Type: UInt64
-
-Default value: 10000
-
-Max wait time when trying to read data for remote disk
-
-## remote_read_min_bytes_for_seek {#remote_read_min_bytes_for_seek}
-
-Type: UInt64
-
-Default value: 4194304
-
-Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.
-
-## rename_files_after_processing {#rename_files_after_processing}
-
-Type: String
-
-Default value:
-
-- **Type:** String
-
-- **Default value:** Empty string
-
-This setting allows to specify renaming pattern for files processed by `file` table function. When option is set, all files read by `file` table function will be renamed according to specified pattern with placeholders, only if files processing was successful.
-
-### Placeholders
-
-- `%a` — Full original filename (e.g., "sample.csv").
-- `%f` — Original filename without extension (e.g., "sample").
-- `%e` — Original file extension with dot (e.g., ".csv").
-- `%t` — Timestamp (in microseconds).
-- `%%` — Percentage sign ("%").
-
-### Example
-- Option: `--rename_files_after_processing="processed_%f_%t%e"`
-
-- Query: `SELECT * FROM file('sample.csv')`
-
-
-If reading `sample.csv` is successful, file will be renamed to `processed_sample_1683473210851438.csv`
-
-## replace_running_query {#replace_running_query}
-
-Type: Bool
-
-Default value: 0
-
-When using the HTTP interface, the ‘query_id’ parameter can be passed. This is any string that serves as the query identifier.
-If a query from the same user with the same ‘query_id’ already exists at this time, the behaviour depends on the ‘replace_running_query’ parameter.
-
-`0` (default) – Throw an exception (do not allow the query to run if a query with the same ‘query_id’ is already running).
-
-`1` – Cancel the old query and start running the new one.
-
-Set this parameter to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasn’t finished yet, it should be cancelled.
-
-## replace_running_query_max_wait_ms {#replace_running_query_max_wait_ms}
-
-Type: Milliseconds
-
-Default value: 5000
-
-The wait time for running the query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active.
-
-Possible values:
-
-- Positive integer.
-- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`.
-
-## replication_wait_for_inactive_replica_timeout {#replication_wait_for_inactive_replica_timeout}
-
-Type: Int64
-
-Default value: 120
-
-Specifies how long (in seconds) to wait for inactive replicas to execute [ALTER](../../sql-reference/statements/alter/index.md), [OPTIMIZE](../../sql-reference/statements/optimize.md) or [TRUNCATE](../../sql-reference/statements/truncate.md) queries.
-
-Possible values:
-
-- 0 — Do not wait.
-- Negative integer — Wait for unlimited time.
-- Positive integer — The number of seconds to wait.
-
-## restore_replace_external_dictionary_source_to_null {#restore_replace_external_dictionary_source_to_null}
-
-Type: Bool
-
-Default value: 0
-
-Replace external dictionary sources to Null on restore. Useful for testing purposes
-
-## restore_replace_external_engines_to_null {#restore_replace_external_engines_to_null}
-
-Type: Bool
-
-Default value: 0
-
-For testing purposes. Replaces all external engines to Null to not initiate external connections.
-
-## restore_replace_external_table_functions_to_null {#restore_replace_external_table_functions_to_null}
-
-Type: Bool
-
-Default value: 0
-
-For testing purposes. Replaces all external table functions to Null to not initiate external connections.
-
-## result_overflow_mode {#result_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## rewrite_count_distinct_if_with_count_distinct_implementation {#rewrite_count_distinct_if_with_count_distinct_implementation}
-
-Type: Bool
-
-Default value: 0
-
-Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.
-
-Possible values:
-
-- true — Allow.
-- false — Disallow.
-
-## s3_allow_parallel_part_upload {#s3_allow_parallel_part_upload}
-
-Type: Bool
-
-Default value: 1
-
-Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage
-
-## s3_check_objects_after_upload {#s3_check_objects_after_upload}
-
-Type: Bool
-
-Default value: 0
-
-Check each uploaded object to s3 with head request to be sure that upload was successful
-
-## s3_connect_timeout_ms {#s3_connect_timeout_ms}
-
-Type: UInt64
-
-Default value: 1000
-
-Connection timeout for host from s3 disks.
-
-## s3_create_new_file_on_insert {#s3_create_new_file_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
-
-initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query creates a new file.
-
-## s3_disable_checksum {#s3_disable_checksum}
-
-Type: Bool
-
-Default value: 0
-
-Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.
-
-## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
-
-Type: Bool
-
-Default value: 0
-
-Ignore absence of file if it does not exist when reading certain keys.
-
-Possible values:
-- 1 — `SELECT` returns empty result.
-- 0 — `SELECT` throws an exception.
-
-## s3_list_object_keys_size {#s3_list_object_keys_size}
-
-Type: UInt64
-
-Default value: 1000
-
-Maximum number of files that could be returned in batch by ListObject request
-
-## s3_max_connections {#s3_max_connections}
-
-Type: UInt64
-
-Default value: 1024
-
-The maximum number of connections per server.
-
-## s3_max_get_burst {#s3_max_get_burst}
-
-Type: UInt64
-
-Default value: 0
-
-Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_get_rps`
-
-## s3_max_get_rps {#s3_max_get_rps}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on S3 GET request per second rate before throttling. Zero means unlimited.
-
-## s3_max_inflight_parts_for_one_file {#s3_max_inflight_parts_for_one_file}
-
-Type: UInt64
-
-Default value: 20
-
-The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.
-
-## s3_max_part_number {#s3_max_part_number}
-
-Type: UInt64
-
-Default value: 10000
-
-Maximum part number number for s3 upload part.
-
-## s3_max_put_burst {#s3_max_put_burst}
-
-Type: UInt64
-
-Default value: 0
-
-Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to `s3_max_put_rps`
-
-## s3_max_put_rps {#s3_max_put_rps}
-
-Type: UInt64
-
-Default value: 0
-
-Limit on S3 PUT request per second rate before throttling. Zero means unlimited.
-
-## s3_max_redirects {#s3_max_redirects}
-
-Type: UInt64
-
-Default value: 10
-
-Max number of S3 redirects hops allowed.
-
-## s3_max_single_operation_copy_size {#s3_max_single_operation_copy_size}
-
-Type: UInt64
-
-Default value: 33554432
-
-Maximum size for a single copy operation in s3
-
-## s3_max_single_part_upload_size {#s3_max_single_part_upload_size}
-
-Type: UInt64
-
-Default value: 33554432
-
-The maximum size of object to upload using singlepart upload to S3.
-
-## s3_max_single_read_retries {#s3_max_single_read_retries}
-
-Type: UInt64
-
-Default value: 4
-
-The maximum number of retries during single S3 read.
-
-## s3_max_unexpected_write_error_retries {#s3_max_unexpected_write_error_retries}
-
-Type: UInt64
-
-Default value: 4
-
-The maximum number of retries in case of unexpected errors during S3 write.
-
-## s3_max_upload_part_size {#s3_max_upload_part_size}
-
-Type: UInt64
-
-Default value: 5368709120
-
-The maximum size of part to upload during multipart upload to S3.
-
-## s3_min_upload_part_size {#s3_min_upload_part_size}
-
-Type: UInt64
-
-Default value: 16777216
-
-The minimum size of part to upload during multipart upload to S3.
-
-## s3_request_timeout_ms {#s3_request_timeout_ms}
-
-Type: UInt64
-
-Default value: 30000
-
-Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.
-
-## s3_retry_attempts {#s3_retry_attempts}
-
-Type: UInt64
-
-Default value: 100
-
-Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries
-
-## s3_skip_empty_files {#s3_skip_empty_files}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
-
-Possible values:
-- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
-- 1 — `SELECT` returns empty result for empty file.
-
-## s3_strict_upload_part_size {#s3_strict_upload_part_size}
-
-Type: UInt64
-
-Default value: 0
-
-The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).
-
-## s3_throw_on_zero_files_match {#s3_throw_on_zero_files_match}
-
-Type: Bool
-
-Default value: 0
-
-Throw an error, when ListObjects request cannot match any files
-
-## s3_truncate_on_insert {#s3_truncate_on_insert}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.
-
-Possible values:
-- 0 — `INSERT` query appends new data to the end of the file.
-- 1 — `INSERT` query replaces existing content of the file with the new data.
-
-## s3_upload_part_size_multiply_factor {#s3_upload_part_size_multiply_factor}
-
-Type: UInt64
-
-Default value: 2
-
-Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.
-
-## s3_upload_part_size_multiply_parts_count_threshold {#s3_upload_part_size_multiply_parts_count_threshold}
-
-Type: UInt64
-
-Default value: 500
-
-Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.
-
-## s3_use_adaptive_timeouts {#s3_use_adaptive_timeouts}
-
-Type: Bool
-
-Default value: 1
-
-When set to `true` than for all s3 requests first two attempts are made with low send and receive timeouts.
-When set to `false` than all attempts are made with identical timeouts.
-
-## s3_validate_request_settings {#s3_validate_request_settings}
-
-Type: Bool
-
-Default value: 1
-
-Enables s3 request settings validation.
-
-Possible values:
-- 1 — validate settings.
-- 0 — do not validate settings.
-
-## s3queue_default_zookeeper_path {#s3queue_default_zookeeper_path}
-
-Type: String
-
-Default value: /clickhouse/s3queue/
-
-Default zookeeper path prefix for S3Queue engine
-
-## s3queue_enable_logging_to_s3queue_log {#s3queue_enable_logging_to_s3queue_log}
-
-Type: Bool
-
-Default value: 0
-
-Enable writing to system.s3queue_log. The value can be overwritten per table with table settings
-
-## schema_inference_cache_require_modification_time_for_url {#schema_inference_cache_require_modification_time_for_url}
-
-Type: Bool
-
-Default value: 1
-
-Use schema from cache for URL with last modification time validation (for URLs with Last-Modified header)
-
-## schema_inference_use_cache_for_azure {#schema_inference_use_cache_for_azure}
-
-Type: Bool
-
-Default value: 1
-
-Use cache in schema inference while using azure table function
-
-## schema_inference_use_cache_for_file {#schema_inference_use_cache_for_file}
-
-Type: Bool
-
-Default value: 1
-
-Use cache in schema inference while using file table function
-
-## schema_inference_use_cache_for_hdfs {#schema_inference_use_cache_for_hdfs}
-
-Type: Bool
-
-Default value: 1
-
-Use cache in schema inference while using hdfs table function
-
-## schema_inference_use_cache_for_s3 {#schema_inference_use_cache_for_s3}
-
-Type: Bool
-
-Default value: 1
-
-Use cache in schema inference while using s3 table function
-
-## schema_inference_use_cache_for_url {#schema_inference_use_cache_for_url}
-
-Type: Bool
-
-Default value: 1
-
-Use cache in schema inference while using url table function
-
-## select_sequential_consistency {#select_sequential_consistency}
-
-Type: UInt64
-
-Default value: 0
-
-:::note
-This setting differ in behavior between SharedMergeTree and ReplicatedMergeTree, see [SharedMergeTree consistency](/docs/en/cloud/reference/shared-merge-tree/#consistency) for more information about the behavior of `select_sequential_consistency` in SharedMergeTree.
-:::
-
-Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-Usage
-
-When sequential consistency is enabled, ClickHouse allows the client to execute the `SELECT` query only for those replicas that contain data from all previous `INSERT` queries executed with `insert_quorum`. If the client refers to a partial replica, ClickHouse will generate an exception. The SELECT query will not include data that has not yet been written to the quorum of replicas.
-
-When `insert_quorum_parallel` is enabled (the default), then `select_sequential_consistency` does not work. This is because parallel `INSERT` queries can be written to different sets of quorum replicas so there is no guarantee a single replica will have received all writes.
-
-See also:
-
-- [insert_quorum](#insert_quorum)
-- [insert_quorum_timeout](#insert_quorum_timeout)
-- [insert_quorum_parallel](#insert_quorum_parallel)
-
-## send_logs_level {#send_logs_level}
-
-Type: LogsLevel
-
-Default value: fatal
-
-Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'
-
-## send_logs_source_regexp {#send_logs_source_regexp}
-
-Type: String
-
-Default value:
-
-Send server text logs with specified regexp to match log source name. Empty means all sources.
-
-## send_progress_in_http_headers {#send_progress_in_http_headers}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses.
-
-For more information, read the [HTTP interface description](../../interfaces/http.md).
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## send_timeout {#send_timeout}
-
-Type: Seconds
-
-Default value: 300
-
-Timeout for sending data to the network, in seconds. If a client needs to send some data but is not able to send any bytes in this interval, the exception is thrown. If you set this setting on the client, the 'receive_timeout' for the socket will also be set on the corresponding connection end on the server.
-
-## session_timezone {#session_timezone}
-
-Type: Timezone
-
-Default value:
-
-Sets the implicit time zone of the current session or query.
-The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone.
-The setting takes precedence over the globally configured (server-level) implicit time zone.
-A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#timezone).
-
-You can use functions `timeZone()` and `serverTimeZone()` to get the session time zone and server time zone.
-
-Possible values:
-
--    Any time zone name from `system.time_zones`, e.g. `Europe/Berlin`, `UTC` or `Zulu`
-
-Examples:
-
-```sql
-SELECT timeZone(), serverTimeZone() FORMAT TSV
-
-Europe/Berlin	Europe/Berlin
-```
-
-```sql
-SELECT timeZone(), serverTimeZone() SETTINGS session_timezone = 'Asia/Novosibirsk' FORMAT TSV
-
-Asia/Novosibirsk	Europe/Berlin
-```
-
-Assign session time zone 'America/Denver' to the inner DateTime without explicitly specified time zone:
-
-```sql
-SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zurich') SETTINGS session_timezone = 'America/Denver' FORMAT TSV
-
-1999-12-13 07:23:23.123
-```
-
-:::warning
-Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors.
-See the following example and explanation.
-:::
-
-```sql
-CREATE TABLE test_tz (`d` DateTime('UTC')) ENGINE = Memory AS SELECT toDateTime('2000-01-01 00:00:00', 'UTC');
-
-SELECT *, timeZone() FROM test_tz WHERE d = toDateTime('2000-01-01 00:00:00') SETTINGS session_timezone = 'Asia/Novosibirsk'
-0 rows in set.
-
-SELECT *, timeZone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS session_timezone = 'Asia/Novosibirsk'
-┌───────────────────d─┬─timeZone()───────┐
-│ 2000-01-01 00:00:00 │ Asia/Novosibirsk │
-└─────────────────────┴──────────────────┘
-```
-
-This happens due to different parsing pipelines:
-
-- `toDateTime()` without explicitly given time zone used in the first `SELECT` query honors setting `session_timezone` and the global time zone.
-- In the second query, a DateTime is parsed from a String, and inherits the type and time zone of the existing column`d`. Thus, setting `session_timezone` and the global time zone are not honored.
-
-**See also**
-
-- [timezone](../server-configuration-parameters/settings.md#timezone)
-
-## set_overflow_mode {#set_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## short_circuit_function_evaluation {#short_circuit_function_evaluation}
-
-Type: ShortCircuitFunctionEvaluation
-
-Default value: enable
-
-Allows calculating the [if](../../sql-reference/functions/conditional-functions.md/#if), [multiIf](../../sql-reference/functions/conditional-functions.md/#multiif), [and](../../sql-reference/functions/logical-functions.md/#logical-and-function), and [or](../../sql-reference/functions/logical-functions.md/#logical-or-function) functions according to a [short scheme](https://en.wikipedia.org/wiki/Short-circuit_evaluation). This helps optimize the execution of complex expressions in these functions and prevent possible exceptions (such as division by zero when it is not expected).
-
-Possible values:
-
-- `enable` — Enables short-circuit function evaluation for functions that are suitable for it (can throw an exception or computationally heavy).
-- `force_enable` — Enables short-circuit function evaluation for all functions.
-- `disable` — Disables short-circuit function evaluation.
-
-## show_table_uuid_in_table_create_query_if_not_nil {#show_table_uuid_in_table_create_query_if_not_nil}
-
-Type: Bool
-
-Default value: 0
-
-Sets the `SHOW TABLE` query display.
-
-Possible values:
-
-- 0 — The query will be displayed without table UUID.
-- 1 — The query will be displayed with table UUID.
-
-## single_join_prefer_left_table {#single_join_prefer_left_table}
-
-Type: Bool
-
-Default value: 1
-
-For single JOIN in case of identifier ambiguity prefer left table
-
-## skip_download_if_exceeds_query_cache {#skip_download_if_exceeds_query_cache}
-
-Type: Bool
-
-Default value: 1
-
-Skip download from remote filesystem if exceeds query cache size
-
-## skip_unavailable_shards {#skip_unavailable_shards}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables silently skipping of unavailable shards.
-
-Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases:
-
-- ClickHouse can’t connect to replica for any reason.
-
-    When connecting to a replica, ClickHouse performs several attempts. If all these attempts fail, the replica is considered unavailable.
-
-- Replica can’t be resolved through DNS.
-
-    If replica’s hostname can’t be resolved through DNS, it can indicate the following situations:
-
-    - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error.
-
-    - Configuration error. ClickHouse configuration file contains a wrong hostname.
-
-Possible values:
-
-- 1 — skipping enabled.
-
-    If a shard is unavailable, ClickHouse returns a result based on partial data and does not report node availability issues.
-
-- 0 — skipping disabled.
-
-    If a shard is unavailable, ClickHouse throws an exception.
-
-## sleep_after_receiving_query_ms {#sleep_after_receiving_query_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-Time to sleep after receiving query in TCPHandler
-
-## sleep_in_send_data_ms {#sleep_in_send_data_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-Time to sleep in sending data in TCPHandler
-
-## sleep_in_send_tables_status_ms {#sleep_in_send_tables_status_ms}
-
-Type: Milliseconds
-
-Default value: 0
-
-Time to sleep in sending tables status response in TCPHandler
-
-## sort_overflow_mode {#sort_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## split_intersecting_parts_ranges_into_layers_final {#split_intersecting_parts_ranges_into_layers_final}
-
-Type: Bool
-
-Default value: 1
-
-Split intersecting parts ranges into layers during FINAL optimization
-
-## split_parts_ranges_into_intersecting_and_non_intersecting_final {#split_parts_ranges_into_intersecting_and_non_intersecting_final}
-
-Type: Bool
-
-Default value: 1
-
-Split parts ranges into intersecting and non intersecting during FINAL optimization
-
-## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
-
-Type: Bool
-
-Default value: 0
-
-Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
-
-Possible values:
-
-- `0` - The remaining string will not be included in the last element of the result array.
-- `1` - The remaining string will be included in the last element of the result array. This is the behavior of Spark's [`split()`](https://spark.apache.org/docs/3.1.2/api/python/reference/api/pyspark.sql.functions.split.html) function and Python's ['string.split()'](https://docs.python.org/3/library/stdtypes.html#str.split) method.
-
-## stop_refreshable_materialized_views_on_startup {#stop_refreshable_materialized_views_on_startup}
-
-Type: Bool
-
-Default value: 0
-
-On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW \\<name\\> afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.
-
-## storage_file_read_method {#storage_file_read_method}
-
-Type: LocalFSReadMethod
-
-Default value: pread
-
-Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).
-
-## storage_system_stack_trace_pipe_read_timeout_ms {#storage_system_stack_trace_pipe_read_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 100
-
-Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.
-
-## stream_flush_interval_ms {#stream_flush_interval_ms}
-
-Type: Milliseconds
-
-Default value: 7500
-
-Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#max_insert_block_size) rows.
-
-The default value is 7500.
-
-The smaller the value, the more often data is flushed into the table. Setting the value too low leads to poor performance.
-
-## stream_like_engine_allow_direct_select {#stream_like_engine_allow_direct_select}
-
-Type: Bool
-
-Default value: 0
-
-Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.
-
-## stream_like_engine_insert_queue {#stream_like_engine_insert_queue}
-
-Type: String
-
-Default value:
-
-When stream-like engine reads from multiple queues, the user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.
-
-## stream_poll_timeout_ms {#stream_poll_timeout_ms}
-
-Type: Milliseconds
-
-Default value: 500
-
-Timeout for polling data from/to streaming storages.
-
-## system_events_show_zero_values {#system_events_show_zero_values}
-
-Type: Bool
-
-Default value: 0
-
-Allows to select zero-valued events from [`system.events`](../../operations/system-tables/events.md).
-
-Some monitoring systems require passing all the metrics values to them for each checkpoint, even if the metric value is zero.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-**Examples**
-
-Query
-
-```sql
-SELECT * FROM system.events WHERE event='QueryMemoryLimitExceeded';
-```
-
-Result
-
-```text
-Ok.
-```
-
-Query
-```sql
-SET system_events_show_zero_values = 1;
-SELECT * FROM system.events WHERE event='QueryMemoryLimitExceeded';
-```
-
-Result
-
-```text
-┌─event────────────────────┬─value─┬─description───────────────────────────────────────────┐
-│ QueryMemoryLimitExceeded │     0 │ Number of times when memory limit exceeded for query. │
-└──────────────────────────┴───────┴───────────────────────────────────────────────────────┘
-```
-
-## table_function_remote_max_addresses {#table_function_remote_max_addresses}
-
-Type: UInt64
-
-Default value: 1000
-
-Sets the maximum number of addresses generated from patterns for the [remote](../../sql-reference/table-functions/remote.md) function.
-
-Possible values:
-
-- Positive integer.
-
-## tcp_keep_alive_timeout {#tcp_keep_alive_timeout}
-
-Type: Seconds
-
-Default value: 290
-
-The time in seconds the connection needs to remain idle before TCP starts sending keepalive probes
-
-## temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds {#temporary_data_in_cache_reserve_space_wait_lock_timeout_milliseconds}
-
-Type: UInt64
-
-Default value: 600000
-
-Wait time to lock cache for space reservation for temporary data in filesystem cache
-
-## temporary_files_codec {#temporary_files_codec}
-
-Type: String
-
-Default value: LZ4
-
-Sets compression codec for temporary files used in sorting and joining operations on disk.
-
-Possible values:
-
-- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied.
-- NONE — No compression is applied.
-
-## throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert {#throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert}
-
-Type: Bool
-
-Default value: 1
-
-Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.
-
-## throw_if_no_data_to_insert {#throw_if_no_data_to_insert}
-
-Type: Bool
-
-Default value: 1
-
-Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)
-
-## throw_on_error_from_cache_on_write_operations {#throw_on_error_from_cache_on_write_operations}
-
-Type: Bool
-
-Default value: 0
-
-Ignore error from cache when caching on write operations (INSERT, merges)
-
-## throw_on_max_partitions_per_insert_block {#throw_on_max_partitions_per_insert_block}
-
-Type: Bool
-
-Default value: 1
-
-Used with max_partitions_per_insert_block. If true (default), an exception will be thrown when max_partitions_per_insert_block is reached. If false, details of the insert query reaching this limit with the number of partitions will be logged. This can be useful if you're trying to understand the impact on users when changing max_partitions_per_insert_block.
-
-## throw_on_unsupported_query_inside_transaction {#throw_on_unsupported_query_inside_transaction}
-
-Type: Bool
-
-Default value: 1
-
-Throw exception if unsupported query is used inside transaction
-
-## timeout_before_checking_execution_speed {#timeout_before_checking_execution_speed}
-
-Type: Seconds
-
-Default value: 10
-
-Check that the speed is not too low after the specified time has elapsed.
-
-## timeout_overflow_mode {#timeout_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## timeout_overflow_mode_leaf {#timeout_overflow_mode_leaf}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the leaf limit is exceeded.
-
-## totals_auto_threshold {#totals_auto_threshold}
-
-Type: Float
-
-Default value: 0.5
-
-The threshold for `totals_mode = 'auto'`.
-See the section “WITH TOTALS modifier”.
-
-## totals_mode {#totals_mode}
-
-Type: TotalsMode
-
-Default value: after_having_exclusive
-
-How to calculate TOTALS when HAVING is present, as well as when max_rows_to_group_by and group_by_overflow_mode = ‘any’ are present.
-See the section “WITH TOTALS modifier”.
-
-## trace_profile_events {#trace_profile_events}
-
-Type: Bool
-
-Default value: 0
-
-Enables or disables collecting stacktraces on each update of profile events along with the name of profile event and the value of increment and sending them into [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log).
-
-Possible values:
-
-- 1 — Tracing of profile events enabled.
-- 0 — Tracing of profile events disabled.
-
-## transfer_overflow_mode {#transfer_overflow_mode}
-
-Type: OverflowMode
-
-Default value: throw
-
-What to do when the limit is exceeded.
-
-## transform_null_in {#transform_null_in}
-
-Type: Bool
-
-Default value: 0
-
-Enables equality of [NULL](../../sql-reference/syntax.md/#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
-
-By default, `NULL` values can’t be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator.
-
-Possible values:
-
-- 0 — Comparison of `NULL` values in `IN` operator returns `false`.
-- 1 — Comparison of `NULL` values in `IN` operator returns `true`.
-
-**Example**
-
-Consider the `null_in` table:
-
-``` text
-┌──idx─┬─────i─┐
-│    1 │     1 │
-│    2 │  NULL │
-│    3 │     3 │
-└──────┴───────┘
-```
-
-Query:
-
-``` sql
-SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 0;
-```
-
-Result:
-
-``` text
-┌──idx─┬────i─┐
-│    1 │    1 │
-└──────┴──────┘
-```
-
-Query:
-
-``` sql
-SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
-```
-
-Result:
-
-``` text
-┌──idx─┬─────i─┐
-│    1 │     1 │
-│    2 │  NULL │
-└──────┴───────┘
-```
-
-**See Also**
-
-- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing)
-
-## traverse_shadow_remote_data_paths {#traverse_shadow_remote_data_paths}
-
-Type: Bool
-
-Default value: 0
-
-Traverse shadow directory when query system.remote_data_paths
-
-## union_default_mode {#union_default_mode}
-
-Type: SetOperationMode
-
-Default value:
-
-Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
-
-Possible values:
-
-- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows.
-- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows.
-- `''` — ClickHouse generates an exception when used with `UNION`.
-
-See examples in [UNION](../../sql-reference/statements/select/union.md).
-
-## unknown_packet_in_send_data {#unknown_packet_in_send_data}
-
-Type: UInt64
-
-Default value: 0
-
-Send unknown packet instead of data Nth data packet
-
-## use_cache_for_count_from_files {#use_cache_for_count_from_files}
-
-Type: Bool
-
-Default value: 1
-
-Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`.
-
-Enabled by default.
-
-## use_client_time_zone {#use_client_time_zone}
-
-Type: Bool
-
-Default value: 0
-
-Use client timezone for interpreting DateTime string values, instead of adopting server timezone.
-
-## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
-
-Type: Bool
-
-Default value: 1
-
-Uses compact format for storing blocks for background (`distributed_foreground_insert`) INSERT into tables with `Distributed` engine.
-
-Possible values:
-
-- 0 — Uses `user[:password]@host:port#default_database` directory format.
-- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format.
-
-:::note
-- with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for background INSERT.
-- with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware.
-:::
-
-## use_concurrency_control {#use_concurrency_control}
-
-Type: Bool
-
-Default value: 1
-
-Respect the server's concurrency control (see the `concurrent_threads_soft_limit_num` and `concurrent_threads_soft_limit_ratio_to_cores` global server settings). If disabled, it allows using a larger number of threads even if the server is overloaded (not recommended for normal usage, and needed mostly for tests).
-
-## use_hedged_requests {#use_hedged_requests}
-
-Type: Bool
-
-Default value: 1
-
-Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query.
-New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout`
-or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`);
-other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported.
-
-Enabled by default.
-
-Disabled by default on Cloud.
-
-## use_hive_partitioning {#use_hive_partitioning}
-
-Type: Bool
-
-Default value: 0
-
-When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
-
-## use_index_for_in_with_subqueries {#use_index_for_in_with_subqueries}
-
-Type: Bool
-
-Default value: 1
-
-Try using an index if there is a subquery or a table expression on the right side of the IN operator.
-
-## use_index_for_in_with_subqueries_max_values {#use_index_for_in_with_subqueries_max_values}
-
-Type: UInt64
-
-Default value: 0
-
-The maximum size of the set in the right-hand side of the IN operator to use table index for filtering. It allows to avoid performance degradation and higher memory usage due to the preparation of additional data structures for large queries. Zero means no limit.
-
-## use_json_alias_for_old_object_type {#use_json_alias_for_old_object_type}
-
-Type: Bool
-
-Default value: 0
-
-When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/json.md) type instead of the new [JSON](../../sql-reference/data-types/newjson.md) type.
-
-## use_local_cache_for_remote_storage {#use_local_cache_for_remote_storage}
-
-Type: Bool
-
-Default value: 1
-
-Use local cache for remote storage like HDFS or S3, it's used for remote table engine only
-
-## use_page_cache_for_disks_without_file_cache {#use_page_cache_for_disks_without_file_cache}
-
-Type: Bool
-
-Default value: 0
-
-Use userspace page cache for remote disks that don't have filesystem cache enabled.
-
-## use_query_cache {#use_query_cache}
-
-Type: Bool
-
-Default value: 0
-
-If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache)
-and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used.
-
-Possible values:
-
-- 0 - Disabled
-- 1 - Enabled
-
-## use_skip_indexes {#use_skip_indexes}
-
-Type: Bool
-
-Default value: 1
-
-Use data skipping indexes during query execution.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## use_skip_indexes_if_final {#use_skip_indexes_if_final}
-
-Type: Bool
-
-Default value: 0
-
-Controls whether skipping indexes are used when executing a query with the FINAL modifier.
-
-By default, this setting is disabled because skip indexes may exclude rows (granules) containing the latest data, which could lead to incorrect results. When enabled, skipping indexes are applied even with the FINAL modifier, potentially improving performance but with the risk of missing recent updates.
-
-Possible values:
-
-- 0 — Disabled.
-- 1 — Enabled.
-
-## use_structure_from_insertion_table_in_table_functions {#use_structure_from_insertion_table_in_table_functions}
-
-Type: UInt64
-
-Default value: 2
-
-Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto
-
-## use_uncompressed_cache {#use_uncompressed_cache}
-
-Type: Bool
-
-Default value: 0
-
-Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
-Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
-
-For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the ‘use_uncompressed_cache’ setting always set to 1.
-
-## use_variant_as_common_type {#use_variant_as_common_type}
-
-Type: Bool
-
-Default value: 0
-
-Allows to use `Variant` type as a result type for [if](../../sql-reference/functions/conditional-functions.md/#if)/[multiIf](../../sql-reference/functions/conditional-functions.md/#multiif)/[array](../../sql-reference/functions/array-functions.md)/[map](../../sql-reference/functions/tuple-map-functions.md) functions when there is no common type for argument types.
-
-Example:
-
-```sql
-SET use_variant_as_common_type = 1;
-SELECT toTypeName(if(number % 2, number, range(number))) as variant_type FROM numbers(1);
-SELECT if(number % 2, number, range(number)) as variant FROM numbers(5);
-```
-
-```text
-┌─variant_type───────────────────┐
-│ Variant(Array(UInt64), UInt64) │
-└────────────────────────────────┘
-┌─variant───┐
-│ []        │
-│ 1         │
-│ [0,1]     │
-│ 3         │
-│ [0,1,2,3] │
-└───────────┘
-```
-
-```sql
-SET use_variant_as_common_type = 1;
-SELECT toTypeName(multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL)) AS variant_type FROM numbers(1);
-SELECT multiIf((number % 4) = 0, 42, (number % 4) = 1, [1, 2, 3], (number % 4) = 2, 'Hello, World!', NULL) AS variant FROM numbers(4);
-```
-
-```text
-─variant_type─────────────────────────┐
-│ Variant(Array(UInt8), String, UInt8) │
-└──────────────────────────────────────┘
-
-┌─variant───────┐
-│ 42            │
-│ [1,2,3]       │
-│ Hello, World! │
-│ ᴺᵁᴸᴸ          │
-└───────────────┘
-```
-
-```sql
-SET use_variant_as_common_type = 1;
-SELECT toTypeName(array(range(number), number, 'str_' || toString(number))) as array_of_variants_type from numbers(1);
-SELECT array(range(number), number, 'str_' || toString(number)) as array_of_variants FROM numbers(3);
-```
-
-```text
-┌─array_of_variants_type────────────────────────┐
-│ Array(Variant(Array(UInt64), String, UInt64)) │
-└───────────────────────────────────────────────┘
-
-┌─array_of_variants─┐
-│ [[],0,'str_0']    │
-│ [[0],1,'str_1']   │
-│ [[0,1],2,'str_2'] │
-└───────────────────┘
-```
-
-```sql
-SET use_variant_as_common_type = 1;
-SELECT toTypeName(map('a', range(number), 'b', number, 'c', 'str_' || toString(number))) as map_of_variants_type from numbers(1);
-SELECT map('a', range(number), 'b', number, 'c', 'str_' || toString(number)) as map_of_variants FROM numbers(3);
-```
-
-```text
-┌─map_of_variants_type────────────────────────────────┐
-│ Map(String, Variant(Array(UInt64), String, UInt64)) │
-└─────────────────────────────────────────────────────┘
-
-┌─map_of_variants───────────────┐
-│ {'a':[],'b':0,'c':'str_0'}    │
-│ {'a':[0],'b':1,'c':'str_1'}   │
-│ {'a':[0,1],'b':2,'c':'str_2'} │
-└───────────────────────────────┘
-```
-
-## use_with_fill_by_sorting_prefix {#use_with_fill_by_sorting_prefix}
-
-Type: Bool
-
-Default value: 1
-
-Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently
-
-## validate_polygons {#validate_polygons}
-
-Type: Bool
-
-Default value: 1
-
-Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
-
-Possible values:
-
-- 0 — Throwing an exception is disabled. `pointInPolygon` accepts invalid polygons and returns possibly incorrect results for them.
-- 1 — Throwing an exception is enabled.
-
-## wait_changes_become_visible_after_commit_mode {#wait_changes_become_visible_after_commit_mode}
-
-Type: TransactionsWaitCSNMode
-
-Default value: wait_unknown
-
-Wait for committed changes to become actually visible in the latest snapshot
-
-## wait_for_async_insert {#wait_for_async_insert}
-
-Type: Bool
-
-Default value: 1
-
-If true wait for processing of asynchronous insertion
-
-## wait_for_async_insert_timeout {#wait_for_async_insert_timeout}
-
-Type: Seconds
-
-Default value: 120
-
-Timeout for waiting for processing asynchronous insertion
-
-## wait_for_window_view_fire_signal_timeout {#wait_for_window_view_fire_signal_timeout}
-
-Type: Seconds
-
-Default value: 10
-
-Timeout for waiting for window view fire signal in event time processing
-
-## window_view_clean_interval {#window_view_clean_interval}
-
-Type: Seconds
-
-Default value: 60
-
-The clean interval of window view in seconds to free outdated data.
-
-## window_view_heartbeat_interval {#window_view_heartbeat_interval}
-
-Type: Seconds
-
-Default value: 15
-
-The heartbeat interval in seconds to indicate watch query is alive.
-
-## workload {#workload}
-
-Type: String
-
-Default value: default
-
-Name of workload to be used to access resources
-
-## write_through_distributed_cache {#write_through_distributed_cache}
-
-Type: Bool
-
-Default value: 0
-
-Only in ClickHouse Cloud. Allow writing to distributed cache (writing to s3 will also be done by distributed cache)
-
-## zstd_window_log_max {#zstd_window_log_max}
-
-Type: Int64
-
-Default value: 0
-
-Allows you to select the max window log of ZSTD (it will not be used for MergeTree family)
-
-Default value: `true`.
-
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index c20e4fc3b09..76b2389cc12 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -177,6 +177,26 @@ When you are ready to insert your files into ClickHouse, startup a ClickHouse se
 :::
 
 
+## Format Conversions
+
+You can use `clickhouse-local` for converting data between different formats. Example:
+
+``` bash
+$ clickhouse-local --input-format JSONLines --output-format CSV --query "SELECT * FROM table" < data.json > data.csv
+```
+
+Formats are auto-detected from file extensions: 
+
+``` bash
+$ clickhouse-local --query "SELECT * FROM table" < data.json > data.csv
+```
+
+As a shortcut, you can write it using the `--copy` argument:
+``` bash
+$ clickhouse-local --copy < data.json > data.csv
+```
+
+
 ## Usage {#usage}
 
 By default `clickhouse-local` has access to data of a ClickHouse server on the same host, and it does not depend on the server's configuration. It also supports loading server configuration using `--config-file` argument. For temporary data, a unique temporary data directory is created by default.
diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
index b06717fcc8c..eabacee3525 100644
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@@ -261,9 +261,10 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN)
 
 - `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`.
 - `mode` — It is an optional argument. One or more modes can be set.
-    - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing.
+    - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. Note: it may work unexpectedly if several conditions hold for the same event.
     - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2.
     - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps.
+    - `'strict_once'` — Count each event only once in the chain even if it meets the condition several times
 
 **Returned value**
 
@@ -490,7 +491,7 @@ Where:
 
 ## uniqUpTo(N)(x)
 
-Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value. 
+Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value.
 
 Recommended for use with small `N`s, up to 10. The maximum value of `N` is 100.
 
@@ -522,7 +523,7 @@ This function behaves the same as [sumMap](../../sql-reference/aggregate-functio
 - `keys`: [Array](../data-types/array.md) of keys.
 - `values`: [Array](../data-types/array.md) of values.
 
-**Returned Value** 
+**Returned Value**
 
 - Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys.
 
@@ -539,10 +540,10 @@ CREATE TABLE sum_map
 )
 ENGINE = Log
 
-INSERT INTO sum_map VALUES 
-    ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), 
+INSERT INTO sum_map VALUES
+    ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
     ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
-    ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), 
+    ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
     ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
 ```
 
@@ -572,7 +573,7 @@ This function behaves the same as [sumMap](../../sql-reference/aggregate-functio
 - `keys`: [Array](../data-types/array.md) of keys.
 - `values`: [Array](../data-types/array.md) of values.
 
-**Returned Value** 
+**Returned Value**
 
 - Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys.
 
@@ -591,10 +592,10 @@ CREATE TABLE sum_map
 )
 ENGINE = Log
 
-INSERT INTO sum_map VALUES 
-    ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), 
+INSERT INTO sum_map VALUES
+    ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]),
     ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]),
-    ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), 
+    ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]),
     ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]);
 ```
 
diff --git a/docs/en/sql-reference/ansi.md b/docs/en/sql-reference/ansi.md
deleted file mode 100644
index 6ba7b16831e..00000000000
--- a/docs/en/sql-reference/ansi.md
+++ /dev/null
@@ -1,190 +0,0 @@
----
-slug: /en/sql-reference/ansi
-sidebar_position: 40
-sidebar_label: ANSI Compatibility
-title: "ANSI SQL Compatibility of ClickHouse SQL Dialect"
----
-
-:::note    
-This article relies on Table 38, “Feature taxonomy and definition for mandatory features”, Annex F of [ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8).
-:::
-
-## Differences in Behaviour
-
-The following table lists cases when query feature works in ClickHouse, but behaves not as specified in ANSI SQL.
-
-| Feature ID | Feature Name                | Difference                                                                                                |
-|------------|-----------------------------|-----------------------------------------------------------------------------------------------------------|
-| E011       | Numeric data types          | Numeric literal with period is interpreted as approximate (`Float64`) instead of exact (`Decimal`)        |
-| E051-05    | Select items can be renamed | Item renames have a wider visibility scope than just the SELECT result                                    |
-| E141-01    | NOT NULL constraints        | `NOT NULL` is implied for table columns by default                                                        |
-| E011-04    | Arithmetic operators        | ClickHouse overflows instead of checked arithmetic and changes the result data type based on custom rules |
-
-## Feature Status
-
-| Feature ID | Feature Name                                                                                                             | Status                     | Comment                                                                                                                                                                                   |
-|------------|--------------------------------------------------------------------------------------------------------------------------|----------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| **E011**   | **Numeric data types**                                                                                                   | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E011-01    | INTEGER and SMALLINT data types                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E011-02    | REAL, DOUBLE PRECISION and FLOAT data types data types                                                                   | <span class="text-success">Yes</span>     |                                                                                                               |
-| E011-03    | DECIMAL and NUMERIC data types                                                                                           | <span class="text-success">Yes</span>     |                                                                                                                                          |
-| E011-04    | Arithmetic operators                                                                                                     | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E011-05    | Numeric comparison                                                                                                       | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E011-06    | Implicit casting among the numeric data types                                                                            | <span class="text-danger">No</span>           | ANSI SQL allows arbitrary implicit cast between numeric types, while ClickHouse relies on functions having multiple overloads instead of implicit cast                                    |
-| **E021**   | **Character string types**                                                                                               | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E021-01    | CHARACTER data type                                                                                                      | <span class="text-success">Yes</span>           |                                                                                                                                                                                           |
-| E021-02    | CHARACTER VARYING data type                                                                                              | <span class="text-success">Yes</span>           |                                                                                                                       |
-| E021-03    | Character literals                                                                                                       | <span class="text-success">Yes</span>     |                                                                                                               |
-| E021-04    | CHARACTER_LENGTH function                                                                                               | <span class="text-warning">Partial</span>     | No `USING` clause                                                                                                                                                                         |
-| E021-05    | OCTET_LENGTH function                                                                                                   | <span class="text-danger">No</span>           | `LENGTH` behaves similarly                                                                                                                                                                |
-| E021-06    | SUBSTRING                                                                                                                | <span class="text-warning">Partial</span>     | No support for `SIMILAR` and `ESCAPE` clauses, no `SUBSTRING_REGEX` variant                                                                                                               |
-| E021-07    | Character concatenation                                                                                                  | <span class="text-warning">Partial</span>     | No `COLLATE` clause                                                                                                                                                                       |
-| E021-08    | UPPER and LOWER functions                                                                                                | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E021-09    | TRIM function                                                                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E021-10    | Implicit casting among the fixed-length and variable-length character string types                                       | <span class="text-warning">Partial</span>           | ANSI SQL allows arbitrary implicit cast between string types, while ClickHouse relies on functions having multiple overloads instead of implicit cast                                     |
-| E021-11    | POSITION function                                                                                                        | <span class="text-warning">Partial</span>     | No support for `IN` and `USING` clauses, no `POSITION_REGEX` variant                                                                                                                      |
-| E021-12    | Character comparison                                                                                                     | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **E031**   | **Identifiers**                                                                                                          | <span class="text-warning">Partial</span>|                                                                                                                                                                                           |
-| E031-01    | Delimited identifiers                                                                                                    | <span class="text-warning">Partial</span>     | Unicode literal support is limited                                                                                                                                                        |
-| E031-02    | Lower case identifiers                                                                                                   | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E031-03    | Trailing underscore                                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **E051**   | **Basic query specification**                                                                                            | <span class="text-warning">Partial</span>|                                                                                                                                                                                           |
-| E051-01    | SELECT DISTINCT                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-02    | GROUP BY clause                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-04    | GROUP BY can contain columns not in `<select list>`                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-05    | Select items can be renamed                                                                                              | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-06    | HAVING clause                                                                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-07    | Qualified \* in select list                                                                                              | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-08    | Correlation name in the FROM clause                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E051-09    | Rename columns in the FROM clause                                                                                        | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **E061**   | **Basic predicates and search conditions**                                                                               | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E061-01    | Comparison predicate                                                                                                     | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-02    | BETWEEN predicate                                                                                                        | <span class="text-warning">Partial</span>     | No `SYMMETRIC` and `ASYMMETRIC` clause                                                                                                                                                    |
-| E061-03    | IN predicate with list of values                                                                                         | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-04    | LIKE predicate                                                                                                           | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-05    | LIKE predicate: ESCAPE clause                                                                                            | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E061-06    | NULL predicate                                                                                                           | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-07    | Quantified comparison predicate                                                                                          | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E061-08    | EXISTS predicate                                                                                                         | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E061-09    | Subqueries in comparison predicate                                                                                       | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-11    | Subqueries in IN predicate                                                                                               | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E061-12    | Subqueries in quantified comparison predicate                                                                            | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E061-13    | Correlated subqueries                                                                                                    | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E061-14    | Search condition                                                                                                         | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **E071**   | **Basic query expressions**                                                                                              | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E071-01    | UNION DISTINCT table operator                                                                                            | <span class="text-success">Yes</span>           |                                                                                                                                                                                           |
-| E071-02    | UNION ALL table operator                                                                                                 | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E071-03    | EXCEPT DISTINCT table operator                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E071-05    | Columns combined via table operators need not have exactly the same data type                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E071-06    | Table operators in subqueries                                                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **E081**   | **Basic privileges**                                                                                                     | <span class="text-success">Yes</span>             |
-| E081-01    | SELECT privilege at the table level                                                                                      | <span class="text-success">Yes</span>             | 
-| E081-02    | DELETE privilege                                                                                                         |                                                   |
-| E081-03    | INSERT privilege at the table level                                                                                      | <span class="text-success">Yes</span>             |
-| E081-04    | UPDATE privilege at the table level                                                                                      | <span class="text-success">Yes</span>             | 
-| E081-05    | UPDATE privilege at the column level                                                                                     |                                                   |
-| E081-06    | REFERENCES privilege at the table level  | | |
-| E081-07    | REFERENCES privilege at the column level | | |
-| E081-08    | WITH GRANT OPTION | <span class="text-success">Yes</span> | |
-| E081-09    | USAGE privilege | | |
-| E081-10    | EXECUTE privilege | | |
-| **E091**   | **Set functions**                                                                                                        |<span class="text-success">Yes</span>              |
-| E091-01    | AVG                                                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E091-02    | COUNT                                                                                                                    | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E091-03    | MAX                                                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E091-04    | MIN                                                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E091-05    | SUM                                                                                                                      | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E091-06    | ALL quantifier                                                                                                           | <span class="text-success">Yes</span>           |                                                                                                                                                                                           |
-| E091-07    | DISTINCT quantifier                                                                                                      | <span class="text-success">Yes</span>     | Not all aggregate functions supported                                                                                                                                                     |
-| **E101**   | **Basic data manipulation**                                                                                              | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E101-01    | INSERT statement                                                                                                         | <span class="text-success">Yes</span>         | Note: primary key in ClickHouse does not imply the `UNIQUE` constraint                                                                                                                    |
-| E101-03    | Searched UPDATE statement                                                                                                | <span class="text-warning">Partial</span>           | There’s an `ALTER UPDATE` statement for batch data modification                                                                                                                           |
-| E101-04    | Searched DELETE statement                                                                                                | <span class="text-warning">Partial</span>           | There’s an `ALTER DELETE` statement for batch data removal                                                                                                                                |
-| **E111**   | **Single row SELECT statement**                                                                                          | <span class="text-danger">No</span>        |                                                                                                                                                                                           |
-| **E121**   | **Basic cursor support**                                                                                                 | <span class="text-danger">No</span>        |                                                                                                                                                                                           |
-| E121-01    | DECLARE CURSOR                                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-02    | ORDER BY columns need not be in select list                                                                              | <span class="text-success">Yes</span>           |                                                                                                                                                                                           |
-| E121-03    | Value expressions in ORDER BY clause                                                                                     | <span class="text-success">Yes</span>           |                                                                                                                                                                                           |
-| E121-04    | OPEN statement                                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-06    | Positioned UPDATE statement                                                                                              | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-07    | Positioned DELETE statement                                                                                              | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-08    | CLOSE statement                                                                                                          | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-10    | FETCH statement: implicit NEXT                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E121-17    | WITH HOLD cursors                                                                                                        | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **E131**   | **Null value support (nulls in lieu of values)**                                                                         | <span class="text-success">Yes</span>             | Some restrictions apply  |
-| **E141**   | **Basic integrity constraints**                                                                                          | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| E141-01    | NOT NULL constraints                                                                                                     | <span class="text-success">Yes</span>         | Note: `NOT NULL` is implied for table columns by default                                                                                                                                  |
-| E141-02    | UNIQUE constraint of NOT NULL columns                                                                                    | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E141-03    | PRIMARY KEY constraints                                                                                                  | <span class="text-warning">Partial</span>           |                                                                                                                                                                                           |
-| E141-04    | Basic FOREIGN KEY constraint with the NO ACTION default for both referential delete action and referential update action | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E141-06    | CHECK constraint                                                                                                         | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E141-07    | Column defaults                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E141-08    | NOT NULL inferred on PRIMARY KEY                                                                                         | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| E141-10    | Names in a foreign key can be specified in any order                                                                     | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **E151**   | **Transaction support**                                                                                                  | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| E151-01    | COMMIT statement                                                                                                         | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E151-02    | ROLLBACK statement                                                                                                       | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **E152**   | **Basic SET TRANSACTION statement**                                                                                      | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| E152-01    | SET TRANSACTION statement: ISOLATION LEVEL SERIALIZABLE clause                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| E152-02    | SET TRANSACTION statement: READ ONLY and READ WRITE clauses                                                              | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **E153**   | **Updatable queries with subqueries**                                                                                    | <span class="text-success">Yes</span>       |                                                                                                                                                                                           |
-| **E161**   | **SQL comments using leading double minus**                                                                              | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
-| **E171**   | **SQLSTATE support**                                                                                                     | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| **E182**   | **Host language binding**                                                                                                | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| **F031**   | **Basic schema manipulation**                                                                                            | <span class="text-warning">Partial</span>|                                                                                                                                                                                           |
-| F031-01    | CREATE TABLE statement to create persistent base tables                                                                  | <span class="text-warning">Partial</span>     | No `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` clauses and no support for user resolved data types |
-| F031-02    | CREATE VIEW statement                                                                                                    | <span class="text-warning">Partial</span>     | No `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` clauses and no support for user resolved data types                                                                                      |
-| F031-03    | GRANT statement                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F031-04    | ALTER TABLE statement: ADD COLUMN clause                                                                                 | <span class="text-success">Yes</span>     | No support for `GENERATED` clause and system time period                                                                                                                                  |
-| F031-13    | DROP TABLE statement: RESTRICT clause                                                                                    | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| F031-16    | DROP VIEW statement: RESTRICT clause                                                                                     | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| F031-19    | REVOKE statement: RESTRICT clause                                                                                        | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **F041**   | **Basic joined table**                                                                                                   | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| F041-01    | Inner join (but not necessarily the INNER keyword)                                                                       | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-02    | INNER keyword                                                                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-03    | LEFT OUTER JOIN                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-04    | RIGHT OUTER JOIN                                                                                                         | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-05    | Outer joins can be nested                                                                                                | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-07    | The inner table in a left or right outer join can also be used in an inner join                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F041-08    | All comparison operators are supported (rather than just =)                                                              | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **F051**   | **Basic date and time**                                                                                                  | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| F051-01    | DATE data type (including support of DATE literal)                                                                       | <span class="text-success">Yes</span>     |                       |
-| F051-02    | TIME data type (including support of TIME literal) with fractional seconds precision of at least 0                       | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| F051-03    | TIMESTAMP data type (including support of TIMESTAMP literal) with fractional seconds precision of at least 0 and 6       | <span class="text-success">Yes</span> | |
-| F051-04    | Comparison predicate on DATE, TIME, and TIMESTAMP data types                                                             | <span class="text-success">Yes</span> | |
-| F051-05    | Explicit CAST between datetime types and character string types                                                          | <span class="text-success">Yes</span> | |
-| F051-06    | CURRENT_DATE                                                                                                            | <span class="text-danger">No</span>           | `today()` is similar                                                                                                                                                                      |
-| F051-07    | LOCALTIME                                                                                                                | <span class="text-danger">No</span>           | `now()` is similar                                                                                                                                                                        |
-| F051-08    | LOCALTIMESTAMP                                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **F081**   | **UNION and EXCEPT in views**                                                                                            | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| **F131**   | **Grouped operations**                                                                                                   | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| F131-01    | WHERE, GROUP BY, and HAVING clauses supported in queries with grouped views                                              | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F131-02    | Multiple tables supported in queries with grouped views                                                                  | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F131-03    | Set functions supported in queries with grouped views                                                                    | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F131-04    | Subqueries with GROUP BY and HAVING clauses and grouped views                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F131-05    | Single row SELECT with GROUP BY and HAVING clauses and grouped views                                                     | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **F181**   | **Multiple module support**                                                                                              | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| **F201**   | **CAST function**                                                                                                        | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
-| **F221**   | **Explicit defaults**                                                                                                    | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| **F261**   | **CASE expression**                                                                                                      | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
-| F261-01    | Simple CASE                                                                                                              | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F261-02    | Searched CASE                                                                                                            | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F261-03    | NULLIF                                                                                                                   | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F261-04    | COALESCE                                                                                                                 | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **F311**   | **Schema definition statement**                                                                                          | <span class="text-warning">Partial</span> |                                                                                                                                                                                           |
-| F311-01    | CREATE SCHEMA                                                                                                            | <span class="text-warning">Partial</span>         | See CREATE DATABASE |
-| F311-02    | CREATE TABLE for persistent base tables                                                                                  | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F311-03    | CREATE VIEW                                                                                                              | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| F311-04    | CREATE VIEW: WITH CHECK OPTION                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| F311-05    | GRANT statement                                                                                                          | <span class="text-success">Yes</span>         |                                                                                                                                                                                           |
-| **F471**   | **Scalar subquery values**                                                                                               | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
-| **F481**   | **Expanded NULL predicate**                                                                                              | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
-| **F812**   | **Basic flagging**                                                                                                       | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| **S011**   | **Distinct data types** | | |
-| **T321**   | **Basic SQL-invoked routines**                                                                                           | <span class="text-danger">No</span>       |                                                                                                                                                                                           |
-| T321-01    | User-defined functions with no overloading                                                                               | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| T321-02    | User-defined stored procedures with no overloading                                                                       | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| T321-03    | Function invocation                                                                                                      | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| T321-04    | CALL statement                                                                                                           | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| T321-05    | RETURN statement                                                                                                         | <span class="text-danger">No</span>           |                                                                                                                                                                                           |
-| **T631**   | **IN predicate with one list element**                                                                                   | <span class="text-success">Yes</span>     |                                                                                                                                                                                           |
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 1d543f11cf3..2357b5b2fdd 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -2933,7 +2933,42 @@ The same as ‘today() - 1’.
 
 ## timeSlot
 
-Rounds the time to the half hour.
+Round the time to the start of a half-an-hour length interval.
+
+**Syntax**
+
+```sql
+timeSlot(time[, time_zone])
+```
+
+**Arguments**
+
+- `time` — Time to round to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md)/[Date32](../data-types/date32.md)/[DateTime64](../data-types/datetime64.md).
+- `time_zone` — A String type const value or an expression representing the time zone. [String](../data-types/string.md).
+
+:::note
+Though this function can take values of the extended types `Date32` and `DateTime64` as an argument, passing it a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
+:::
+
+**Return type**
+
+- Returns the time rounded to the start of a half-an-hour length interval. [DateTime](../data-types/datetime.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'));
+```
+
+Result:
+
+```response
+┌─timeSlot(toDateTime('2000-01-02 03:04:05', 'UTC'))─┐
+│                                2000-01-02 03:00:00 │
+└────────────────────────────────────────────────────┘
+```
 
 ## toYYYYMM
 
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index b37bd057adc..5c39f880a0e 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -5261,9 +5261,9 @@ SELECT toFixedString('foo', 8) AS s;
 Result:
 
 ```response
-┌─s─────────────┬─s_cut─┐
-│ foo\0\0\0\0\0 │ foo   │
-└───────────────┴───────┘
+┌─s─────────────┐
+│ foo\0\0\0\0\0 │
+└───────────────┘
 ```
 
 ## toStringCutToZero
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
deleted file mode 100644
index d29df2fc24b..00000000000
--- a/docs/mkdocs.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-# Just an empty yaml file. Keep it alone.
-{}
diff --git a/docs/ru/introduction/distinctive-features.md b/docs/ru/introduction/distinctive-features.md
index da820c90a1e..a2a848a18f8 100644
--- a/docs/ru/introduction/distinctive-features.md
+++ b/docs/ru/introduction/distinctive-features.md
@@ -33,7 +33,7 @@ sidebar_label: "Отличительные возможности ClickHouse"
 
 ## Поддержка SQL {#sql-support}
 
-ClickHouse поддерживает [декларативный язык запросов на основе SQL](../sql-reference/index.md) и во [многих случаях](../sql-reference/ansi.mdx) совпадающий с SQL-стандартом.
+ClickHouse поддерживает декларативный язык запросов SQL.
 
 Поддерживаются [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), подзапросы в секциях [FROM](../sql-reference/statements/select/from.md), [IN](../sql-reference/operators/in.md), [JOIN](../sql-reference/statements/select/join.md), [функции window](../sql-reference/window-functions/index.mdx), а также скалярные подзапросы.
 
diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md
index aadc178f81a..a71dc73753a 100644
--- a/docs/ru/operations/settings/settings-users.md
+++ b/docs/ru/operations/settings/settings-users.md
@@ -30,7 +30,7 @@ sidebar_label: "Настройки пользователей"
         <profile>profile_name</profile>
 
         <quota>default</quota>
-        <default_database>default<default_database>
+        <default_database>default</default_database>
         <databases>
             <database_name>
                 <table_name>
diff --git a/docs/ru/sql-reference/ansi.mdx b/docs/ru/sql-reference/ansi.mdx
index 3fcdeeda773..e69de29bb2d 100644
--- a/docs/ru/sql-reference/ansi.mdx
+++ b/docs/ru/sql-reference/ansi.mdx
@@ -1,10 +0,0 @@
----
-slug: /ru/sql-reference/ansi
-sidebar_position: 40
-sidebar_label: ANSI Compatibility
-title: "ANSI Compatibility"
----
-
-import Content from '@site/docs/en/sql-reference/ansi.md';
-
-<Content />
diff --git a/docs/zh/introduction/distinctive-features.md b/docs/zh/introduction/distinctive-features.md
index a4803fd4acf..13b18547aa5 100644
--- a/docs/zh/introduction/distinctive-features.md
+++ b/docs/zh/introduction/distinctive-features.md
@@ -37,7 +37,7 @@ ClickHouse会使用服务器上一切可用的资源，从而以最自然的方
 
 ## 支持SQL {#zhi-chi-sql}
 
-ClickHouse支持一种[基于SQL的声明式查询语言](../sql-reference/index.md)，它在许多情况下与[ANSI SQL标准](../sql-reference/ansi.md)相同。
+ClickHouse支持一种基于SQL的声明式查询语言。
 
 支持的查询[GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md), [IN](../sql-reference/operators/in.md)以及非相关子查询。
 
diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md
deleted file mode 100644
index 74f13256eba..00000000000
--- a/docs/zh/sql-reference/ansi.md
+++ /dev/null
@@ -1,191 +0,0 @@
----
-slug: /zh/sql-reference/ansi
-sidebar_position: 40
-sidebar_label: "ANSI\u517C\u5BB9\u6027"
----
-
-# ClickHouse SQL方言 与ANSI SQL的兼容性{#ansi-sql-compatibility-of-clickhouse-sql-dialect}
-
-:::note
-本文参考Annex G所著的[ISO/IEC CD 9075-2:2011](https://www.iso.org/obp/ui/#iso:std:iso-iec:9075:-2:ed-4:v1:en:sec:8)标准.
-:::
-
-## 行为差异 {#differences-in-behaviour}
-
-下表列出了ClickHouse能够使用，但与ANSI SQL规定有差异的查询特性。
-
-| 功能ID  | 功能名称              | 差异                                                                                                                                                                                   |
-| ------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| E011    | 数值型数据类型        | 带小数点的数字被视为近似值 (`Float64`）而不是精确值 (`Decimal`)                                                                                                                        |
-| E051-05 | SELECT 的列可以重命名 | 字段重命名的作用范围不限于进行重命名的SELECT子查询（参考[表达式别名](https://clickhouse.com/docs/zh/sql-reference/syntax/#notes-on-usage)）                                            |
-| E141-01 | NOT NULL（非空）约束          | ClickHouse表中每一列默认为`NOT NULL`                                                                                                                                                   |
-| E011-04 | 算术运算符            | ClickHouse在运算时会进行溢出，而不是四舍五入。此外会根据自定义规则修改结果数据类型（参考[溢出检查](https://clickhouse.com/docs/zh/sql-reference/data-types/decimal/#yi-chu-jian-cha)） |
-
-## 功能状态 {#feature-status}
-
-| 功能ID   | 功能名称                                                                                 | 状态                    | 注释                                                                                                                                                                     |
-| -------- | ---------------------------------------------------------------------------------------- | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| **E011** | **数值型数据类型**                                                                       | **部分**{.text-warning} |                                                                                                                                                                          |
-| E011-01  | INTEGER （整型）和SMALLINT （小整型）数据类型                                            | 是 {.text-success}      |                                                                                                                                                                          |
-| E011-02  | REAL （实数）、DOUBLE PRECISION （双精度浮点数）和FLOAT（单精度浮点数）数据类型数据类型  | 是 {.text-success}      |                                                                                                                                                                          |
-| E011-03  | DECIMAL （精确数字）和NUMERIC （精确数字）数据类型                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| E011-04  | 算术运算符                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E011-05  | 数值比较                                                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| E011-06  | 数值数据类型之间的隐式转换                                                               | 否 {.text-danger}       | ANSI SQL允许在数值类型之间进行任意隐式转换，而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数                                                                   |
-| **E021** | **字符串类型**                                                                           | **部分**{.text-warning} |                                                                                                                                                                          |
-| E021-01  | CHARACTER （字符串）数据类型                                                             | 是 {.text-success}      |                                                                                                                                                                          |
-| E021-02  | CHARACTER VARYING （可变字符串）数据类型                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| E021-03  | 字符字面量                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E021-04  | CHARACTER_LENGTH 函数                                                                    | 部分 {.text-warning}    | 不支持 `using` 从句                                                                                                                                                      |
-| E021-05  | OCTET_LENGTH 函数                                                                        | 否 {.text-danger}       | 使用 `LENGTH` 函数代替                                                                                                                                                   |
-| E021-06  | SUBSTRING                                                                                | 部分 {.text-warning}    | 不支持 `SIMILAR` 和 `ESCAPE` 从句，没有`SUBSTRING_REGEX` 函数                                                                                                            |
-| E021-07  | 字符串拼接                                                                               | 部分 {.text-warning}    | 不支持 `COLLATE` 从句                                                                                                                                                    |
-| E021-08  | 大小写转换                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E021-09  | 裁剪字符串                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E021-10  | 固定长度和可变长度字符串类型之间的隐式转换                                               | 部分 {.text-warning}    | ANSI SQL允许在数据类型之间进行任意隐式转换，而ClickHouse针对不同数据类型有对应的比较函数和类型转换函数                                                                   |
-| E021-11  | POSITION 函数                                                                            | 部分 {.text-warning}    | 不支持 `IN` 和 `USING` 从句，不支持`POSITION_REGEX`函数                                                                                                                  |
-| E021-12  | 字符串比较                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| **E031** | **标识符**                                                                               | **部分**{.text-warning} |                                                                                                                                                                          |
-| E031-01  | 分隔标识符                                                                               | 部分 {.text-warning}    | Unicode文字支持有限                                                                                                                                                      |
-| E031-02  | 小写标识符                                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E031-03  | 标识符最后加下划线                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| **E051** | **基本查询规范**                                                                         | **部分**{.text-warning} |                                                                                                                                                                          |
-| E051-01  | SELECT DISTINCT                                                                          | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-02  | GROUP BY 从句                                                                            | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-04  | GROUP BY 从句中的列可以包含不在 `<select list>`中出现的列                                | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-05  | SELECT 的列可以重命名                                                                    | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-06  | HAVING 从句                                                                              | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-07  | SELECT 选择的列中允许出现\*                                                              | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-08  | FROM 从句中的关联名称                                                                    | 是 {.text-success}      |                                                                                                                                                                          |
-| E051-09  | 重命名 FROM 从句中的列                                                                   | 否 {.text-danger}       |                                                                                                                                                                          |
-| **E061** | **基本谓词和搜索条件**                                                                   | **部分**{.text-warning} |                                                                                                                                                                          |
-| E061-01  | 比较谓词                                                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-02  | BETWEEN 谓词                                                                             | 部分 {.text-warning}    | 不支持 `SYMMETRIC` 和 `ASYMMETRIC` 从句                                                                                                                                  |
-| E061-03  | IN 谓词后可接值列表                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-04  | LIKE 谓词                                                                                | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-05  | LIKE 谓词后接 ESCAPE 从句                                                                | 否 {.text-danger}       |                                                                                                                                                                          |
-| E061-06  | NULL 谓词                                                                                | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-07  | 量化比较谓词（ALL、SOME、ANY）                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| E061-08  | EXISTS 谓词                                                                              | 否 {.text-danger}       |                                                                                                                                                                          |
-| E061-09  | 比较谓词中的子查询                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-11  | IN 谓词中的子查询                                                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| E061-12  | 量化比较谓词（BETWEEN、IN、LIKE）中的子查询                                              | 否 {.text-danger}       |                                                                                                                                                                          |
-| E061-13  | 相关子查询                                                                               | 否 {.text-danger}       |                                                                                                                                                                          |
-| E061-14  | 搜索条件                                                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| **E071** | **基本查询表达式**                                                                       | **部分**{.text-warning} |                                                                                                                                                                          |
-| E071-01  | UNION DISTINCT 表运算符                                                                  | 是 {.text-success}      |                                                                                                                                                                          |
-| E071-02  | UNION ALL 表运算符                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| E071-03  | EXCEPT DISTINCT 表运算符                                                                 | 否 {.text-danger}       |                                                                                                                                                                          |
-| E071-05  | 通过表运算符组合的列不必具有完全相同的数据类型                                           | 是 {.text-success}      |                                                                                                                                                                          |
-| E071-06  | 子查询中的表运算符                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| **E081** | **基本权限**                                                                             | **是**{.text-success}   |                                                                                                                                                                          |
-| E081-01  | 表级别的SELECT（查询）权限                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-02  | DELETE（删除）权限                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-03  | 表级别的INSERT（插入）权限                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-04  | 表级别的UPDATE（更新）权限                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-05  | 列级别的UPDATE（更新）权限                                                               | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-06  | 表级别的REFERENCES（引用）权限                                                           | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-07  | 列级别的REFERENCES（引用）权限                                                           | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-08  | WITH GRANT OPTION                                                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-09  | USAGE（使用）权限                                                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| E081-10  | EXECUTE（执行）权限                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| **E091** | **集合函数**                                                                             | **是**{.text-success}   |                                                                                                                                                                          |
-| E091-01  | AVG                                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| E091-02  | COUNT                                                                                    | 是 {.text-success}      |                                                                                                                                                                          |
-| E091-03  | MAX                                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| E091-04  | MIN                                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| E091-05  | SUM                                                                                      | 是 {.text-success}      |                                                                                                                                                                          |
-| E091-06  | ALL修饰词                                                                                | 否。 {.text-danger}     |                                                                                                                                                                          |
-| E091-07  | DISTINCT修饰词                                                                           | 是 {.text-success}      | 并非所有聚合函数都支持该修饰词                                                                                                                                           |
-| **E101** | **基本数据操作**                                                                         | **部分**{.text-warning} |                                                                                                                                                                          |
-| E101-01  | INSERT（插入）语句                                                                       | 是 {.text-success}      | 注：ClickHouse中的主键并不隐含`UNIQUE` 约束                                                                                                                              |
-| E101-03  | 可指定范围的UPDATE（更新）语句                                                           | 部分 {.text-warning}    | `ALTER UPDATE` 语句用来批量更新数据                                                                                                                                      |
-| E101-04  | 可指定范围的DELETE（删除）语句                                                           | 部分 {.text-warning}    | `ALTER DELETE` 语句用来批量删除数据                                                                                                                                      |
-| **E111** | **返回一行的SELECT语句**                                                                 | **否**{.text-danger}    |                                                                                                                                                                          |
-| **E121** | **基本游标支持**                                                                         | **否**{.text-danger}    |                                                                                                                                                                          |
-| E121-01  | DECLARE CURSOR                                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-02  | ORDER BY 涉及的列不需要出现在SELECT的列中                                                | 是 {.text-success}      |                                                                                                                                                                          |
-| E121-03  | ORDER BY 从句中的表达式                                                                  | 是 {.text-success}      |                                                                                                                                                                          |
-| E121-04  | OPEN 语句                                                                                | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-06  | 受游标位置控制的 UPDATE 语句                                                             | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-07  | 受游标位置控制的 DELETE 语句                                                             | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-08  | CLOSE 语句                                                                               | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-10  | FETCH 语句中包含隐式NEXT                                                                 | 否 {.text-danger}       |                                                                                                                                                                          |
-| E121-17  | WITH HOLD 游标                                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| **E131** | **空值支持**                                                                             | **是**{.text-success}   | 有部分限制                                                                                                                                                               |
-| **E141** | **基本完整性约束**                                                                       | **部分**{.text-warning} |                                                                                                                                                                          |
-| E141-01  | NOT NULL（非空）约束                                                                     | 是 {.text-success}      | 注: 默认情况下ClickHouse表中的列隐含`NOT NULL`约束                                                                                                                                 |
-| E141-02  | NOT NULL（非空）列的UNIQUE（唯一）约束                                                   | 否 {.text-danger}       |                                                                                                                                                                          |
-| E141-03  | 主键约束                                                                                 | 部分 {.text-warning}    |                                                                                                                                                                          |
-| E141-04  | 对于引用删除和引用更新操作，基本的FOREIGN KEY（外键）约束默认不进行任何操作（NO ACTION） | 否 {.text-danger}       |                                                                                                                                                                          |
-| E141-06  | CHECK（检查）约束                                                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| E141-07  | 列默认值                                                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| E141-08  | 在主键上推断非空                                                                         | 是 {.text-success}      |                                                                                                                                                                          |
-| E141-10  | 可以按任何顺序指定外键中的名称                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| **E151** | **事务支持**                                                                             | **否**{.text-danger}    |                                                                                                                                                                          |
-| E151-01  | COMMIT（提交）语句                                                                       | 否 {.text-danger}       |                                                                                                                                                                          |
-| E151-02  | ROLLBACK（回滚）语句                                                                     | 否 {.text-danger}       |                                                                                                                                                                          |
-| **E152** | **基本的SET TRANSACTION（设置事务隔离级别）语句**                                        | **否**{.text-danger}    |                                                                                                                                                                          |
-| E152-01  | SET TRANSACTION语句：ISOLATION LEVEL SERIALIZABLE（隔离级别为串行化）从句                | 否 {.text-danger}       |                                                                                                                                                                          |
-| E152-02  | SET TRANSACTION语句：READ ONLY（只读）和READ WRITE（读写）从句                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| **E153** | **具有子查询的可更新查询**                                                               | **是**{.text-success}   |                                                                                                                                                                          |
-| **E161** | **使用“--”符号作为SQL注释**                                                                  | **是**{.text-success}   |                                                                                                                                                                          |
-| **E171** | **SQLSTATE支持**                                                                         | **否**{.text-danger}    |                                                                                                                                                                          |
-| **E182** | **主机语言绑定**                                                                         | **否**{.text-danger}    |                                                                                                                                                                          |
-| **F031** | **基本架构操作**                                                                         | **部分**{.text-warning} |                                                                                                                                                                          |
-| F031-01  | 使用 CREATE TABLE 语句创建持久表                                                           | 部分 {.text-warning}    | 不支持 `SYSTEM VERSIONING`, `ON COMMIT`, `GLOBAL`, `LOCAL`, `PRESERVE`, `DELETE`, `REF IS`, `WITH OPTIONS`, `UNDER`, `LIKE`, `PERIOD FOR` 从句，不支持用户解析的数据类型 |
-| F031-02  | CREATE VIEW（创建视图）语句                                                              | 部分 {.text-warning}    | 不支持 `RECURSIVE`, `CHECK`, `UNDER`, `WITH OPTIONS` 从句，不支持用户解析的数据类型                                                                                      |
-| F031-03  | GRANT（授权）语句                                                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| F031-04  | ALTER TABLE语句：ADD COLUMN从句                                                          | 是 {.text-success}      | 不支持 `GENERATED` 从句和以系统时间做参数                                                                                                                                |
-| F031-13  | DROP TABLE语句：RESTRICT从句                                                             | 否 {.text-danger}       |                                                                                                                                                                          |
-| F031-16  | DROP VIEW语句：RESTRICT子句                                                              | 否 {.text-danger}       |                                                                                                                                                                          |
-| F031-19  | REVOKE语句：RESTRICT子句                                                                 | 否 {.text-danger}       |                                                                                                                                                                          |
-| **F041** | **基本连接关系**                                                                           | **部分**{.text-warning} |                                                                                                                                                                          |
-| F041-01  | Inner join（但不一定是INNER关键字)                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-02  | INNER 关键字                                                                              | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-03  | LEFT OUTER JOIN                                                                          | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-04  | RIGHT OUTER JOIN                                                                         | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-05  | 外连接可嵌套                                                                             | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-07  | 左外部连接或右外连接中的内部表也可用于内部联接                                           | 是 {.text-success}      |                                                                                                                                                                          |
-| F041-08  | 支持所有比较运算符（而不仅仅是=)                                                         | 否 {.text-danger}       |                                                                                                                                                                          |
-| **F051** | **基本日期和时间**                                                                       | **部分**{.text-warning} |                                                                                                                                                                          |
-| F051-01  | DATE（日期）数据类型（并支持用于表达日期的字面量)                                        | 是 {.text-success}      |                                                                                                                                                                          |
-| F051-02  | TIME（时间）数据类型（并支持用于表达时间的字面量），小数秒精度至少为0                    | 否 {.text-danger}       |                                                                                                                                                                          |
-| F051-03  | 时间戳数据类型（并支持用于表达时间戳的字面量），小数秒精度至少为0和6                     | 是 {.text-danger}       |                                                                                                                                                                          |
-| F051-04  | 日期、时间和时间戳数据类型的比较谓词                                                     | 是 {.text-success}      |                                                                                                                                                                          |
-| F051-05  | DateTime 类型和字符串形式表达的时间之间的显式转换                                         | 是 {.text-success}      |                                                                                                                                                                          |
-| F051-06  | CURRENT_DATE                                                                             | 否 {.text-danger}       | 使用`today()`替代                                                                                                                                                        |
-| F051-07  | LOCALTIME                                                                                | 否 {.text-danger}       | 使用`now()`替代                                                                                                                                                          |
-| F051-08  | LOCALTIMESTAMP                                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| **F081** | **视图的UNION和EXCEPT操作**                                                              | **部分**{.text-warning} |                                                                                                                                                                          |
-| **F131** | **分组操作**                                                                             | **部分**{.text-warning} |                                                                                                                                                                          |
-| F131-01  | 在具有分组视图的查询中支持 WHERE、GROUP BY 和 HAVING 子句                                    | 是 {.text-success}      |                                                                                                                                                                          |
-| F131-02  | 在分组视图中支持多张表                                                                   | 是 {.text-success}      |                                                                                                                                                                          |
-| F131-03  | 分组视图的查询中支持集合函数                                                             | 是 {.text-success}      |                                                                                                                                                                          |
-| F131-04  | 带有 `GROUP BY` 和 `HAVING` 从句，以及分组视图的子查询                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| F131-05  | 带有 `GROUP BY` 和 `HAVING` 从句，以及分组视图的仅返回1条记录的SELECT查询                    | 否 {.text-danger}       |                                                                                                                                                                          |
-| **F181** | **多模块支持**                                                                           | **否**{.text-danger}    |                                                                                                                                                                          |
-| **F201** | **CAST 函数**                                                                           | **是**{.text-success}   |                                                                                                                                                                          |
-| **F221** | **显式默认值**                                                                           | **否**{.text-danger}    |                                                                                                                                                                          |
-| **F261** | **CASE 表达式**                                                                         | **是**{.text-success}   |                                                                                                                                                                          |
-| F261-01  | 简单 CASE 表达式                                                                         | 是 {.text-success}      |                                                                                                                                                                          |
-| F261-02  | 搜索型 CASE 表达式                                                                       | 是 {.text-success}      |                                                                                                                                                                          |
-| F261-03  | NULLIF                                                                                   | 是 {.text-success}      |                                                                                                                                                                          |
-| F261-04  | COALESCE                                                                                 | 是 {.text-success}      |                                                                                                                                                                          |
-| **F311** | **架构定义语句**                                                                         | **部分**{.text-warning} |                                                                                                                                                                          |
-| F311-01  | CREATE SCHEMA                                                                            | 部分 {.text-warning}    | 见`CREATE DATABASE`                                                                                                                                                      |
-| F311-02  | 用于创建持久表的 CREATE TABLE                                                             | 是 {.text-success}      |                                                                                                                                                                          |
-| F311-03  | CREATE VIEW                                                                              | 是 {.text-success}      |                                                                                                                                                                          |
-| F311-04  | CREATE VIEW: WITH CHECK OPTION                                                           | 否 {.text-danger}       |                                                                                                                                                                          |
-| F311-05  | GRANT 语句                                                                                | 是 {.text-success}      |                                                                                                                                                                          |
-| **F471** | **标量子查询**                                                                           | **是**{.text-success}   |                                                                                                                                                                          |
-| **F481** | **扩展 NULL 谓词**                                                                         | **是**{.text-success}   |                                                                                                                                                                          |
-| **F812** | **基本标志位**                                                                           | **否**{.text-danger}    |
-| **S011** | **用于不重复数据的数据类型**                                                             | **否**{.text-danger}    |
-| **T321** | **基本的SQL调用例程**                                                                    | **否**{.text-danger}    |                                                                                                                                                                          |
-| T321-01  | 没有重载的用户定义函数                                                                   | 否{.text-danger}        |                                                                                                                                                                          |
-| T321-02  | 没有重载的用户定义存储过程                                                               | 否{.text-danger}        |                                                                                                                                                                          |
-| T321-03  | 功能调用                                                                                 | 否 {.text-danger}       |                                                                                                                                                                          |
-| T321-04  | CALL 语句                                                                                 | 否 {.text-danger}       |                                                                                                                                                                          |
-| T321-05  | RETURN 语句                                                                               | 否 {.text-danger}       |                                                                                                                                                                          |
-| **T631** | **IN 谓词后接一个列表**                                                                   | **是**{.text-success}   |                                                                                                                                                                          |
diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp
index 2732e46b9b5..0fe33ea7804 100644
--- a/programs/disks/CommandCopy.cpp
+++ b/programs/disks/CommandCopy.cpp
@@ -38,18 +38,10 @@ public:
         String path_to = disk_to.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
         bool recursive = options.count("recursive");
 
-        if (!disk_from.getDisk()->exists(path_from))
-        {
-            throw Exception(
-                ErrorCodes::BAD_ARGUMENTS,
-                "cannot stat '{}' on disk '{}': No such file or directory",
-                path_from,
-                disk_from.getDisk()->getName());
-        }
-        if (disk_from.getDisk()->isFile(path_from))
+        if (disk_from.getDisk()->existsFile(path_from))
         {
             auto target_location = getTargetLocation(path_from, disk_to, path_to);
-            if (!disk_to.getDisk()->exists(target_location) || disk_to.getDisk()->isFile(target_location))
+            if (!disk_to.getDisk()->existsDirectory(target_location))
             {
                 disk_from.getDisk()->copyFile(
                     path_from,
@@ -65,7 +57,7 @@ public:
                     ErrorCodes::BAD_ARGUMENTS, "cannot overwrite directory {} with non-directory {}", target_location, path_from);
             }
         }
-        else if (disk_from.getDisk()->isDirectory(path_from))
+        else if (disk_from.getDisk()->existsDirectory(path_from))
         {
             if (!recursive)
             {
@@ -73,11 +65,11 @@ public:
             }
             auto target_location = getTargetLocation(path_from, disk_to, path_to);
 
-            if (disk_to.getDisk()->isFile(target_location))
+            if (disk_to.getDisk()->existsFile(target_location))
             {
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory {} with directory {}", path_to, target_location);
             }
-            if (!disk_to.getDisk()->exists(target_location))
+            if (!disk_to.getDisk()->existsDirectory(target_location))
             {
                 disk_to.getDisk()->createDirectory(target_location);
             }
@@ -89,6 +81,14 @@ public:
                 /* write_settings= */ {},
                 /* cancellation_hook= */ {});
         }
+        else
+        {
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "cannot stat '{}' on disk '{}': No such file or directory",
+                path_from,
+                disk_from.getDisk()->getName());
+        }
     }
 };
 
diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp
index cba8a3745cf..96646b76276 100644
--- a/programs/disks/CommandMove.cpp
+++ b/programs/disks/CommandMove.cpp
@@ -30,21 +30,21 @@ public:
         String path_from = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-from"));
         String path_to = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path-to"));
 
-        if (disk.getDisk()->isFile(path_from))
+        if (disk.getDisk()->existsFile(path_from))
         {
             disk.getDisk()->moveFile(path_from, path_to);
         }
-        else if (disk.getDisk()->isDirectory(path_from))
+        else if (disk.getDisk()->existsDirectory(path_from))
         {
             auto target_location = getTargetLocation(path_from, disk, path_to);
-            if (!disk.getDisk()->exists(target_location))
+            if (!disk.getDisk()->existsDirectory(target_location))
             {
                 disk.getDisk()->createDirectory(target_location);
                 disk.getDisk()->moveDirectory(path_from, target_location);
             }
             else
             {
-                if (disk.getDisk()->isFile(target_location))
+                if (disk.getDisk()->existsFile(target_location))
                 {
                     throw Exception(
                         ErrorCodes::BAD_ARGUMENTS, "cannot overwrite non-directory '{}' with directory '{}'", target_location, path_from);
@@ -57,7 +57,7 @@ public:
                 disk.getDisk()->moveDirectory(path_from, target_location);
             }
         }
-        else if (!disk.getDisk()->exists(path_from))
+        else
         {
             throw Exception(
                 ErrorCodes::BAD_ARGUMENTS,
diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp
index c7e0cf22ab7..e6a85d4233d 100644
--- a/programs/disks/CommandRemove.cpp
+++ b/programs/disks/CommandRemove.cpp
@@ -28,11 +28,7 @@ public:
         auto disk = client.getCurrentDiskWithPath();
         const String & path = disk.getRelativeFromRoot(getValueFromCommandLineOptionsThrow<String>(options, "path"));
         bool recursive = options.count("recursive");
-        if (!disk.getDisk()->exists(path))
-        {
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName());
-        }
-        if (disk.getDisk()->isDirectory(path))
+        if (disk.getDisk()->existsDirectory(path))
         {
             if (!recursive)
             {
@@ -41,10 +37,12 @@ public:
 
             disk.getDisk()->removeRecursive(path);
         }
-        else
+        else if (disk.getDisk()->existsFile(path))
         {
             disk.getDisk()->removeFileIfExists(path);
         }
+        else
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path {} on disk {} doesn't exist", path, disk.getDisk()->getName());
     }
 };
 
diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp
index 547ebc11459..1d61809afb6 100644
--- a/programs/disks/CommandWrite.cpp
+++ b/programs/disks/CommandWrite.cpp
@@ -1,6 +1,7 @@
 #include <Interpreters/Context.h>
 #include "ICommand.h"
 
+#include <IO/ReadBufferFromEmptyFile.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/copyData.h>
@@ -36,7 +37,11 @@ public:
                 return std::make_unique<ReadBufferFromFileDescriptor>(STDIN_FILENO);
 
             String relative_path_from = disk.getRelativeFromRoot(path_from.value());
-            return disk.getDisk()->readFile(relative_path_from, getReadSettings());
+            auto res = disk.getDisk()->readFileIfExists(relative_path_from, getReadSettings());
+            if (res)
+                return res;
+            /// For backward compatibility.
+            return std::make_unique<ReadBufferFromEmptyFile>();
         }();
 
         auto out = disk.getDisk()->writeFile(path_to);
diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp
index af7fa1bf41f..dcfb51c420e 100644
--- a/programs/disks/DisksClient.cpp
+++ b/programs/disks/DisksClient.cpp
@@ -29,7 +29,7 @@ DiskWithPath::DiskWithPath(DiskPtr disk_, std::optional<String> path_) : disk(di
     }
 
     String relative_path = normalizePathAndGetAsRelative(path);
-    if (disk->isDirectory(relative_path) || (relative_path.empty() && (disk->isDirectory("/"))))
+    if (disk->existsDirectory(relative_path) || (relative_path.empty() && (disk->existsDirectory("/"))))
     {
         return;
     }
diff --git a/programs/disks/DisksClient.h b/programs/disks/DisksClient.h
index 299631f1afe..52970b9d9a8 100644
--- a/programs/disks/DisksClient.h
+++ b/programs/disks/DisksClient.h
@@ -33,7 +33,7 @@ public:
 
     bool isDirectory(const String & any_path) const
     {
-        return disk->isDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->isDirectory("/")));
+        return disk->existsDirectory(getRelativeFromRoot(any_path)) || (getRelativeFromRoot(any_path).empty() && (disk->existsDirectory("/")));
     }
 
     std::vector<String> listAllFilesByPath(const String & any_path) const;
diff --git a/programs/disks/ICommand.h b/programs/disks/ICommand.h
index 46eda86c2f9..f0df4bb9cf7 100644
--- a/programs/disks/ICommand.h
+++ b/programs/disks/ICommand.h
@@ -90,7 +90,7 @@ protected:
 
     String getTargetLocation(const String & path_from, DiskWithPath & disk_to, const String & path_to)
     {
-        if (!disk_to.getDisk()->isDirectory(path_to))
+        if (!disk_to.getDisk()->existsDirectory(path_to))
         {
             return path_to;
         }
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 278eb7b9181..affd75024e6 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -296,9 +296,14 @@ void LocalServer::tryInitPath()
 
     global_context->setUserFilesPath(""); /// user's files are everywhere
 
-    std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts/");
+    std::string user_scripts_path = getClientConfiguration().getString("user_scripts_path", fs::path(path) / "user_scripts" / "");
     global_context->setUserScriptsPath(user_scripts_path);
 
+    /// Set path for filesystem caches
+    String filesystem_caches_path(getClientConfiguration().getString("filesystem_caches_path", fs::path(path) / "cache" / ""));
+    if (!filesystem_caches_path.empty())
+        global_context->setFilesystemCachesPath(filesystem_caches_path);
+
     /// top_level_domains_lists
     const std::string & top_level_domains_path = getClientConfiguration().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
     if (!top_level_domains_path.empty())
@@ -853,6 +858,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
 {
     options_description.main_description->add_options()
         ("table,N", po::value<std::string>(), "name of the initial table")
+        ("copy", "shortcut for format conversion, equivalent to: --query 'SELECT * FROM table'")
 
         /// If structure argument is omitted then initial query is not generated
         ("structure,S", po::value<std::string>(), "structure of the initial table (list of column and type names)")
@@ -925,6 +931,12 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
         getClientConfiguration().setString("send_logs_level", options["send_logs_level"].as<std::string>());
     if (options.count("wait_for_suggestions_to_load"))
         getClientConfiguration().setBool("wait_for_suggestions_to_load", true);
+    if (options.count("copy"))
+    {
+        if (!queries.empty())
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--copy' and '--query' cannot be specified at the same time");
+        queries.emplace_back("SELECT * FROM table");
+    }
 }
 
 void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 99e6f5d900b..39d27153b1f 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1496,6 +1496,8 @@ try
 
     NamedCollectionFactory::instance().loadIfNot();
 
+    FileCacheFactory::instance().loadDefaultCaches(config());
+
     /// Initialize main config reloader.
     std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
 
diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp
index 92bf2145909..57a1cd756ff 100644
--- a/src/Access/AuthenticationData.cpp
+++ b/src/Access/AuthenticationData.cpp
@@ -117,20 +117,20 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs)
 }
 
 
-void AuthenticationData::setPassword(const String & password_)
+void AuthenticationData::setPassword(const String & password_, bool validate)
 {
     switch (type)
     {
         case AuthenticationType::PLAINTEXT_PASSWORD:
-            setPasswordHashBinary(Util::stringToDigest(password_));
+            setPasswordHashBinary(Util::stringToDigest(password_), validate);
             return;
 
         case AuthenticationType::SHA256_PASSWORD:
-            setPasswordHashBinary(Util::encodeSHA256(password_));
+            setPasswordHashBinary(Util::encodeSHA256(password_), validate);
             return;
 
         case AuthenticationType::DOUBLE_SHA1_PASSWORD:
-            setPasswordHashBinary(Util::encodeDoubleSHA1(password_));
+            setPasswordHashBinary(Util::encodeDoubleSHA1(password_), validate);
             return;
 
         case AuthenticationType::BCRYPT_PASSWORD:
@@ -149,12 +149,12 @@ void AuthenticationData::setPassword(const String & password_)
     throw Exception(ErrorCodes::NOT_IMPLEMENTED, "setPassword(): authentication type {} not supported", toString(type));
 }
 
-void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_)
+void AuthenticationData::setPasswordBcrypt(const String & password_, int workfactor_, bool validate)
 {
     if (type != AuthenticationType::BCRYPT_PASSWORD)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify bcrypt password for authentication type {}", toString(type));
 
-    setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_));
+    setPasswordHashBinary(Util::encodeBcrypt(password_, workfactor_), validate);
 }
 
 String AuthenticationData::getPassword() const
@@ -165,7 +165,7 @@ String AuthenticationData::getPassword() const
 }
 
 
-void AuthenticationData::setPasswordHashHex(const String & hash)
+void AuthenticationData::setPasswordHashHex(const String & hash, bool validate)
 {
     Digest digest;
     digest.resize(hash.size() / 2);
@@ -179,7 +179,7 @@ void AuthenticationData::setPasswordHashHex(const String & hash)
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read password hash in hex, check for valid characters [0-9a-fA-F] and length");
     }
 
-    setPasswordHashBinary(digest);
+    setPasswordHashBinary(digest, validate);
 }
 
 
@@ -195,7 +195,7 @@ String AuthenticationData::getPasswordHashHex() const
 }
 
 
-void AuthenticationData::setPasswordHashBinary(const Digest & hash)
+void AuthenticationData::setPasswordHashBinary(const Digest & hash, bool validate)
 {
     switch (type)
     {
@@ -217,7 +217,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
 
         case AuthenticationType::DOUBLE_SHA1_PASSWORD:
         {
-            if (hash.size() != 20)
+            if (validate && hash.size() != 20)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
                                 "Password hash for the 'DOUBLE_SHA1_PASSWORD' authentication type has length {} "
                                 "but must be exactly 20 bytes.", hash.size());
@@ -231,7 +231,7 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
             /// However the library we use to encode it requires hash string to be 64 characters long,
             ///  so we also allow the hash of this length.
 
-            if (hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
+            if (validate && hash.size() != 59 && hash.size() != 60 && hash.size() != 64)
                 throw Exception(ErrorCodes::BAD_ARGUMENTS,
                                 "Password hash for the 'BCRYPT_PASSWORD' authentication type has length {} "
                                 "but must be 59 or 60 bytes.", hash.size());
@@ -240,10 +240,13 @@ void AuthenticationData::setPasswordHashBinary(const Digest & hash)
             resized.resize(64);
 
 #if USE_BCRYPT
-            /// Verify that it is a valid hash
-            int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
-            if (ret == -1)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
+            if (validate)
+            {
+                /// Verify that it is a valid hash
+                int ret = bcrypt_checkpw("", reinterpret_cast<const char *>(resized.data()));
+                if (ret == -1)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not decode the provided hash with 'bcrypt_hash'");
+            }
 #endif
 
             password_hash = hash;
@@ -385,7 +388,7 @@ std::shared_ptr<ASTAuthenticationData> AuthenticationData::toAST() const
 }
 
 
-AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules)
+AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate)
 {
     if (query.type && query.type == AuthenticationType::NO_PASSWORD)
         return AuthenticationData();
@@ -431,7 +434,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
         if (!query.type && !context)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get default password type without context");
 
-        if (check_password_rules && !context)
+        if (validate && !context)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot check password complexity rules without context");
 
         if (query.type == AuthenticationType::BCRYPT_PASSWORD && !context)
@@ -448,13 +451,13 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
 
         AuthenticationData auth_data(current_type);
 
-        if (check_password_rules)
+        if (validate)
             context->getAccessControl().checkPasswordComplexityRules(value);
 
         if (query.type == AuthenticationType::BCRYPT_PASSWORD)
         {
             int workfactor = context->getAccessControl().getBcryptWorkfactor();
-            auth_data.setPasswordBcrypt(value, workfactor);
+            auth_data.setPasswordBcrypt(value, workfactor, validate);
             return auth_data;
         }
 
@@ -486,7 +489,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
 #endif
         }
 
-        auth_data.setPassword(value);
+        auth_data.setPassword(value, validate);
         return auth_data;
     }
 
@@ -498,11 +501,11 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que
 
         if (query.type == AuthenticationType::BCRYPT_PASSWORD)
         {
-            auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value));
+            auth_data.setPasswordHashBinary(AuthenticationData::Util::stringToDigest(value), validate);
             return auth_data;
         }
 
-        auth_data.setPasswordHashHex(value);
+        auth_data.setPasswordHashHex(value, validate);
 
 
         if (query.type == AuthenticationType::SHA256_PASSWORD && args_size == 2)
diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h
index 8093fe1d888..a0c100264f8 100644
--- a/src/Access/AuthenticationData.h
+++ b/src/Access/AuthenticationData.h
@@ -31,17 +31,17 @@ public:
     AuthenticationType getType() const { return type; }
 
     /// Sets the password and encrypt it using the authentication type set in the constructor.
-    void setPassword(const String & password_);
+    void setPassword(const String & password_, bool validate);
 
     /// Returns the password. Allowed to use only for Type::PLAINTEXT_PASSWORD.
     String getPassword() const;
 
     /// Sets the password as a string of hexadecimal digits.
-    void setPasswordHashHex(const String & hash);
+    void setPasswordHashHex(const String & hash, bool validate);
     String getPasswordHashHex() const;
 
     /// Sets the password in binary form.
-    void setPasswordHashBinary(const Digest & hash);
+    void setPasswordHashBinary(const Digest & hash, bool validate);
     const Digest & getPasswordHashBinary() const { return password_hash; }
 
     /// Sets the salt in String form.
@@ -49,7 +49,7 @@ public:
     String getSalt() const;
 
     /// Sets the password using bcrypt hash with specified workfactor
-    void setPasswordBcrypt(const String & password_, int workfactor_);
+    void setPasswordBcrypt(const String & password_, int workfactor_, bool validate);
 
     /// Sets the server name for authentication type LDAP.
     const String & getLDAPServerName() const { return ldap_server_name; }
@@ -77,7 +77,7 @@ public:
     friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs);
     friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); }
 
-    static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool check_password_rules);
+    static AuthenticationData fromAST(const ASTAuthenticationData & query, ContextPtr context, bool validate);
     std::shared_ptr<ASTAuthenticationData> toAST() const;
 
     struct Util
diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp
index b89d5f136b3..eddc7ca1e0e 100644
--- a/src/Access/UsersConfigAccessStorage.cpp
+++ b/src/Access/UsersConfigAccessStorage.cpp
@@ -121,6 +121,7 @@ namespace
         bool allow_no_password,
         bool allow_plaintext_password)
     {
+        const bool validate = true;
         auto user = std::make_shared<User>();
         user->setName(user_name);
         String user_config = "users." + user_name;
@@ -157,17 +158,17 @@ namespace
         if (has_password_plaintext)
         {
             user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD);
-            user->authentication_methods.back().setPassword(config.getString(user_config + ".password"));
+            user->authentication_methods.back().setPassword(config.getString(user_config + ".password"), validate);
         }
         else if (has_password_sha256_hex)
         {
             user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD);
-            user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"));
+            user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"), validate);
         }
         else if (has_password_double_sha1_hex)
         {
             user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD);
-            user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"));
+            user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"), validate);
         }
         else if (has_ldap)
         {
diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
index 44eebfc21a9..5b3190ad052 100644
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.cpp
@@ -28,11 +28,36 @@ namespace ErrorCodes
 namespace
 {
 
-constexpr size_t max_events = 32;
+constexpr size_t MAX_EVENTS = 32;
+
+
+template <typename T>
+void mergeEventsList(T & events_list, size_t prefix_size, bool prefix_sorted, bool suffix_sorted)
+{
+    /// either sort whole container or do so partially merging ranges afterwards
+    if (!prefix_sorted && !suffix_sorted)
+        std::stable_sort(std::begin(events_list), std::end(events_list));
+    else
+    {
+        const auto begin = std::begin(events_list);
+        const auto middle = std::next(begin, prefix_size);
+        const auto end = std::end(events_list);
+
+        if (!prefix_sorted)
+            std::stable_sort(begin, middle);
+
+        if (!suffix_sorted)
+            std::stable_sort(middle, end);
+
+        std::inplace_merge(begin, middle, end);
+    }
+}
 
 template <typename T>
 struct AggregateFunctionWindowFunnelData
 {
+    static constexpr bool strict_once_enabled = false;
+
     using TimestampEvent = std::pair<T, UInt8>;
     using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
 
@@ -66,24 +91,7 @@ struct AggregateFunctionWindowFunnelData
 
         events_list.insert(std::begin(other.events_list), std::end(other.events_list));
 
-        /// either sort whole container or do so partially merging ranges afterwards
-        if (!sorted && !other.sorted)
-            std::stable_sort(std::begin(events_list), std::end(events_list));
-        else
-        {
-            const auto begin = std::begin(events_list);
-            const auto middle = std::next(begin, size);
-            const auto end = std::end(events_list);
-
-            if (!sorted)
-                std::stable_sort(begin, middle);
-
-            if (!other.sorted)
-                std::stable_sort(middle, end);
-
-            std::inplace_merge(begin, middle, end);
-        }
-
+        mergeEventsList(events_list, size, sorted, other.sorted);
         sorted = true;
     }
 
@@ -133,6 +141,131 @@ struct AggregateFunctionWindowFunnelData
     }
 };
 
+template <typename T>
+struct AggregateFunctionWindowFunnelStrictOnceData
+{
+    static constexpr bool strict_once_enabled = true;
+    struct TimestampEvent
+    {
+        T timestamp;
+        UInt8 event_type;
+        UInt64 unique_id;
+
+        TimestampEvent(T timestamp_, UInt8 event_type_, UInt64 unique_id_)
+            : timestamp(timestamp_), event_type(event_type_), unique_id(unique_id_) {}
+
+        bool operator<(const TimestampEvent & other) const
+        {
+            return std::tie(timestamp, event_type, unique_id) < std::tie(other.timestamp, other.event_type, other.unique_id);
+        }
+
+        bool operator<=(const TimestampEvent & other) const
+        {
+            return std::tie(timestamp, event_type, unique_id) <= std::tie(other.timestamp, other.event_type, other.unique_id);
+        }
+    };
+
+
+    using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
+    TimestampEvents events_list;
+
+    /// Next unique identifier for events
+    /// Used to distinguish events with the same timestamp that matches several conditions.
+    UInt64 next_unique_id = 1;
+    bool sorted = true;
+
+    size_t size() const
+    {
+        return events_list.size();
+    }
+
+    void advanceId()
+    {
+        ++next_unique_id;
+    }
+
+    void add(T timestamp, UInt8 event_type)
+    {
+        TimestampEvent new_event(timestamp, event_type, next_unique_id);
+        /// Check if the new event maintains the sorted order
+        if (sorted && !events_list.empty())
+            sorted = events_list.back() <= new_event;
+        events_list.push_back(new_event);
+    }
+
+    void merge(const AggregateFunctionWindowFunnelStrictOnceData & other)
+    {
+        if (other.events_list.empty())
+            return;
+
+        const auto current_size = events_list.size();
+
+        UInt64 new_next_unique_id = next_unique_id;
+        events_list.reserve(current_size + other.events_list.size());
+        for (auto other_event : other.events_list)
+        {
+            /// Assign unique IDs to the new events to prevent conflicts
+            other_event.unique_id += next_unique_id;
+            new_next_unique_id = std::max(new_next_unique_id, other_event.unique_id + 1);
+            events_list.push_back(other_event);
+        }
+        next_unique_id = new_next_unique_id;
+
+        mergeEventsList(events_list, current_size, sorted, other.sorted);
+
+        sorted = true;
+    }
+
+    void sort()
+    {
+        if (!sorted)
+        {
+            std::stable_sort(std::begin(events_list), std::end(events_list));
+            sorted = true;
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeBinary(sorted, buf);
+        writeBinary(events_list.size(), buf);
+
+        for (const auto & event : events_list)
+        {
+            writeBinary(event.timestamp, buf);
+            writeBinary(event.event_type, buf);
+            writeBinary(event.unique_id, buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        readBinary(sorted, buf);
+
+        size_t events_size;
+        readBinary(events_size, buf);
+
+        if (events_size > 100'000'000) /// Arbitrary limit to prevent excessive memory allocation
+            throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large size of the state of windowFunnel");
+
+        events_list.clear();
+        events_list.reserve(events_size);
+
+        T timestamp;
+        UInt8 event_type;
+        UInt64 unique_id = 0;
+
+        for (size_t i = 0; i < events_size; ++i)
+        {
+            readBinary(timestamp, buf);
+            readBinary(event_type, buf);
+            readBinary(unique_id, buf);
+            next_unique_id = std::max(next_unique_id, unique_id + 1);
+            events_list.emplace_back(timestamp, event_type, unique_id);
+        }
+    }
+};
+
 /** Calculates the max event level in a sliding window.
   * The max size of events is 32, that's enough for funnel analytics
   *
@@ -160,22 +293,15 @@ private:
     /// The level path must be 1---2---3---...---check_events_size, find the max event level that satisfied the path in the sliding window.
     /// If found, returns the max event level, else return 0.
     /// The algorithm works in O(n) time, but the overall function works in O(n * log(n)) due to sorting.
-    UInt8 getEventLevel(Data & data) const
+    UInt8 getEventLevelNonStrictOnce(const AggregateFunctionWindowFunnelData<T>::TimestampEvents & events_list) const
     {
-        if (data.size() == 0)
-            return 0;
-        if (!strict_order && events_size == 1)
-            return 1;
-
-        data.sort();
-
         /// events_timestamp stores the timestamp of the first and previous i-th level event happen within time window
         std::vector<std::optional<std::pair<UInt64, UInt64>>> events_timestamp(events_size);
         bool first_event = false;
-        for (size_t i = 0; i < data.events_list.size(); ++i)
+        for (size_t i = 0; i < events_list.size(); ++i)
         {
-            const T & timestamp = data.events_list[i].first;
-            const auto & event_idx = data.events_list[i].second - 1;
+            const T & timestamp = events_list[i].first;
+            const auto & event_idx = events_list[i].second - 1;
             if (strict_order && event_idx == -1)
             {
                 if (first_event)
@@ -189,7 +315,7 @@ private:
             }
             else if (strict_deduplication && events_timestamp[event_idx].has_value())
             {
-                return data.events_list[i - 1].second;
+                return events_list[i - 1].second;
             }
             else if (strict_order && first_event && !events_timestamp[event_idx - 1].has_value())
             {
@@ -222,6 +348,126 @@ private:
         return 0;
     }
 
+    UInt8 getEventLevelStrictOnce(const AggregateFunctionWindowFunnelStrictOnceData<T>::TimestampEvents & events_list) const
+    {
+        /// Stores the timestamp of the first and last i-th level event happen within time window
+        struct EventMatchTimeWindow
+        {
+            UInt64 first_timestamp;
+            UInt64 last_timestamp;
+            std::array<UInt64, MAX_EVENTS> event_path;
+
+            EventMatchTimeWindow() = default;
+            EventMatchTimeWindow(UInt64 first_ts, UInt64 last_ts)
+                : first_timestamp(first_ts), last_timestamp(last_ts) {}
+        };
+
+        /// We track all possible event sequences up to the current event.
+        /// It's required because one event can meet several conditions.
+        /// For example: for events 'start', 'a', 'b', 'a', 'end'.
+        /// The second occurrence of 'a' should be counted only once in one sequence.
+        /// However, we do not know in advance if the next event will be 'b' or 'end', so we try to keep both paths.
+        std::vector<std::list<EventMatchTimeWindow>> event_sequences(events_size);
+
+        bool has_first_event = false;
+        for (size_t i = 0; i < events_list.size(); ++i)
+        {
+            const auto & current_event = events_list[i];
+            auto timestamp = current_event.timestamp;
+            Int64 event_idx = current_event.event_type - 1;
+            UInt64 unique_id = current_event.unique_id;
+
+            if (strict_order && event_idx == -1)
+            {
+                if (has_first_event)
+                    break;
+                else
+                    continue;
+            }
+            else if (event_idx == 0)
+            {
+                auto & event_seq = event_sequences[0].emplace_back(timestamp, timestamp);
+                event_seq.event_path[0] = unique_id;
+                has_first_event = true;
+            }
+            else if (strict_deduplication && !event_sequences[event_idx].empty())
+            {
+                return events_list[i - 1].event_type;
+            }
+            else if (strict_order && has_first_event && event_sequences[event_idx - 1].empty())
+            {
+                for (size_t event = 0; event < event_sequences.size(); ++event)
+                {
+                    if (event_sequences[event].empty())
+                        return event;
+                }
+            }
+            else if (!event_sequences[event_idx - 1].empty())
+            {
+                auto & prev_level = event_sequences[event_idx - 1];
+                for (auto it = prev_level.begin(); it != prev_level.end();)
+                {
+                    auto first_ts = it->first_timestamp;
+                    bool time_matched = timestamp <= first_ts + window;
+                    if (!time_matched && prev_level.size() > 1)
+                    {
+                        // Remove old events that are out of the window, but keep at least one
+                        it = prev_level.erase(it);
+                        continue;
+                    }
+
+                    auto prev_path = it->event_path;
+                    chassert(event_idx > 0);
+
+                    /// Ensure the unique_id hasn't been used in the path already
+                    for (size_t j = 0; j < static_cast<size_t>(event_idx); ++j)
+                    {
+                        if (!time_matched)
+                            break;
+                        time_matched = prev_path[j] != unique_id;
+                    }
+
+                    if (time_matched && strict_increase)
+                        time_matched = it->last_timestamp < timestamp;
+
+                    if (time_matched)
+                    {
+                        prev_path[event_idx] = unique_id;
+
+                        auto & new_seq = event_sequences[event_idx].emplace_back(first_ts, timestamp);
+                        new_seq.event_path = std::move(prev_path);
+                        if (event_idx + 1 == events_size)
+                            return events_size;
+                    }
+                    ++it;
+                }
+            }
+        }
+
+        for (size_t event = event_sequences.size(); event > 0; --event)
+        {
+            if (!event_sequences[event - 1].empty())
+                return event;
+        }
+        return 0;
+    }
+
+
+    UInt8 getEventLevel(Data & data) const
+    {
+        if (data.size() == 0)
+            return 0;
+        if (!strict_order && events_size == 1)
+            return 1;
+
+        data.sort();
+
+        if constexpr (Data::strict_once_enabled)
+            return getEventLevelStrictOnce(data.events_list);
+        else
+            return getEventLevelNonStrictOnce(data.events_list);
+    }
+
 public:
     String getName() const override
     {
@@ -246,6 +492,9 @@ public:
                 strict_order = true;
             else if (option == "strict_increase")
                 strict_increase = true;
+            else if (option == "strict_once")
+                /// Checked in factory
+                chassert(Data::strict_once_enabled);
             else if (option == "strict")
                 throw Exception(ErrorCodes::BAD_ARGUMENTS, "strict is replaced with strict_deduplication in Aggregate function {}", getName());
             else
@@ -272,6 +521,9 @@ public:
 
         if (strict_order && !has_event)
             this->data(place).add(timestamp, 0);
+
+        if constexpr (Data::strict_once_enabled)
+            this->data(place).advanceId();
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@@ -296,7 +548,6 @@ public:
 };
 
 
-template <template <typename> class Data>
 AggregateFunctionPtr
 createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes & arguments, const Array & params, const Settings *)
 {
@@ -309,7 +560,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
         throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
                         "Aggregate function {} requires one timestamp argument and at least one event condition.", name);
 
-    if (arguments.size() > max_events + 1)
+    if (arguments.size() > MAX_EVENTS + 1)
         throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many event arguments for aggregate function {}", name);
 
     for (const auto i : collections::range(1, arguments.size()))
@@ -321,16 +572,29 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
                             cond_arg->getName(), toString(i + 1), name);
     }
 
-    AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, Data>(*arguments[0], arguments, params));
-    WhichDataType which(arguments.front().get());
-    if (res)
-        return res;
-    if (which.isDate())
-        return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, Data<DataTypeDate::FieldType>>>(arguments, params);
-    if (which.isDateTime())
-        return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType>>>(
-            arguments, params);
-
+    bool strict_once = params.size() > 1 && std::any_of(params.begin() + 1, params.end(), [](const auto & f) { return f.template safeGet<String>() == "strict_once"; });
+    if (strict_once)
+    {
+        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelStrictOnceData>(*arguments[0], arguments, params));
+        WhichDataType which(arguments.front().get());
+        if (res)
+            return res;
+        if (which.isDate())
+            return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDate::FieldType>>>(arguments, params);
+        if (which.isDateTime())
+            return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelStrictOnceData<DataTypeDateTime::FieldType>>>(arguments, params);
+    }
+    else
+    {
+        AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionWindowFunnel, AggregateFunctionWindowFunnelData>(*arguments[0], arguments, params));
+        WhichDataType which(arguments.front().get());
+        if (res)
+            return res;
+        if (which.isDate())
+            return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDate::FieldType, AggregateFunctionWindowFunnelData<DataTypeDate::FieldType>>>(arguments, params);
+        if (which.isDateTime())
+            return std::make_shared<AggregateFunctionWindowFunnel<DataTypeDateTime::FieldType, AggregateFunctionWindowFunnelData<DataTypeDateTime::FieldType>>>(arguments, params);
+    }
     throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                     "Illegal type {} of first argument of aggregate function {}, must "
                     "be Unsigned Number, Date, DateTime", arguments.front().get()->getName(), name);
@@ -340,7 +604,7 @@ createAggregateFunctionWindowFunnel(const std::string & name, const DataTypes &
 
 void registerAggregateFunctionWindowFunnel(AggregateFunctionFactory & factory)
 {
-    factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel<AggregateFunctionWindowFunnelData>);
+    factory.registerFunction("windowFunnel", createAggregateFunctionWindowFunnel);
 }
 
 }
diff --git a/src/AggregateFunctions/WindowFunction.h b/src/AggregateFunctions/WindowFunction.h
index f7fbd7389ea..be214f4c824 100644
--- a/src/AggregateFunctions/WindowFunction.h
+++ b/src/AggregateFunctions/WindowFunction.h
@@ -78,11 +78,6 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
     }
 
     String getName() const override { return name; }
-    void create(AggregateDataPtr __restrict) const override { }
-    void destroy(AggregateDataPtr __restrict) const noexcept override { }
-    bool hasTrivialDestructor() const override { return true; }
-    size_t sizeOfData() const override { return 0; }
-    size_t alignOfData() const override { return 1; }
     void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
     void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
     void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
@@ -90,6 +85,22 @@ struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public
     void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
 };
 
+struct StatelessWindowFunction : public WindowFunction
+{
+    StatelessWindowFunction(
+        const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
+        : WindowFunction(name_, argument_types_, parameters_, result_type_)
+    {
+    }
+
+    size_t sizeOfData() const override { return 0; }
+    size_t alignOfData() const override { return 1; }
+
+    void create(AggregateDataPtr __restrict) const override { }
+    void destroy(AggregateDataPtr __restrict) const noexcept override { }
+    bool hasTrivialDestructor() const override { return true; }
+};
+
 template <typename State>
 struct StatefulWindowFunction : public WindowFunction
 {
@@ -100,7 +111,7 @@ struct StatefulWindowFunction : public WindowFunction
     }
 
     size_t sizeOfData() const override { return sizeof(State); }
-    size_t alignOfData() const override { return 1; }
+    size_t alignOfData() const override { return alignof(State); }
 
     void create(AggregateDataPtr __restrict place) const override { new (place) State(); }
 
diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
index 3603745b279..03e27c9fdd2 100644
--- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
+++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp
@@ -34,7 +34,7 @@ namespace ErrorCodes
 namespace
 {
 
-void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
+void extractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi_conditions, QueryTreeNodes & other)
 {
     auto * func = node->as<FunctionNode>();
     if (!func)
@@ -52,7 +52,7 @@ void exctractJoinConditions(const QueryTreeNodePtr & node, QueryTreeNodes & equi
     else if (func->getFunctionName() == "and")
     {
         for (const auto & arg : args)
-            exctractJoinConditions(arg, equi_conditions, other);
+            extractJoinConditions(arg, equi_conditions, other);
     }
     else
     {
@@ -118,7 +118,7 @@ public:
 
         QueryTreeNodes equi_conditions;
         QueryTreeNodes other_conditions;
-        exctractJoinConditions(where_condition, equi_conditions, other_conditions);
+        extractJoinConditions(where_condition, equi_conditions, other_conditions);
         bool can_convert_cross_to_inner = false;
         for (auto & condition : equi_conditions)
         {
diff --git a/src/Analyzer/Resolve/IdentifierResolver.cpp b/src/Analyzer/Resolve/IdentifierResolver.cpp
index 7efc0d2a367..15a90c52a2a 100644
--- a/src/Analyzer/Resolve/IdentifierResolver.cpp
+++ b/src/Analyzer/Resolve/IdentifierResolver.cpp
@@ -432,6 +432,14 @@ QueryTreeNodePtr IdentifierResolver::tryResolveTableIdentifierFromDatabaseCatalo
     else
         storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
 
+    if (!storage && storage_id.hasUUID())
+    {
+        // If `storage_id` has UUID, it is possible that the UUID is removed from `DatabaseCatalog` after `context->resolveStorageID(storage_id)`
+        // We try to get the table with the database name and the table name.
+        auto database = DatabaseCatalog::instance().tryGetDatabase(storage_id.getDatabaseName());
+        if (database)
+            storage = database->tryGetTable(table_name, context);
+    }
     if (!storage)
         return {};
 
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 27b594f6bb8..aeb07b154f5 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -20,7 +20,7 @@ BackupReaderDisk::~BackupReaderDisk() = default;
 
 bool BackupReaderDisk::fileExists(const String & file_name)
 {
-    return disk->exists(root_path / file_name);
+    return disk->existsFile(root_path / file_name);
 }
 
 UInt64 BackupReaderDisk::getFileSize(const String & file_name)
@@ -68,7 +68,7 @@ BackupWriterDisk::~BackupWriterDisk() = default;
 
 bool BackupWriterDisk::fileExists(const String & file_name)
 {
-    return disk->exists(root_path / file_name);
+    return disk->existsFile(root_path / file_name);
 }
 
 UInt64 BackupWriterDisk::getFileSize(const String & file_name)
@@ -91,7 +91,7 @@ std::unique_ptr<WriteBuffer> BackupWriterDisk::writeFile(const String & file_nam
 void BackupWriterDisk::removeFile(const String & file_name)
 {
     disk->removeFileIfExists(root_path / file_name);
-    if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
+    if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
         disk->removeDirectory(root_path);
 }
 
@@ -99,7 +99,7 @@ void BackupWriterDisk::removeFiles(const Strings & file_names)
 {
     for (const auto & file_name : file_names)
         disk->removeFileIfExists(root_path / file_name);
-    if (disk->isDirectory(root_path) && disk->isDirectoryEmpty(root_path))
+    if (disk->existsDirectory(root_path) && disk->isDirectoryEmpty(root_path))
         disk->removeDirectory(root_path);
 }
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b24e154f204..22d20fc82ce 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -244,6 +244,7 @@ add_object_library(clickhouse_storages Storages)
 add_object_library(clickhouse_storages_mysql Storages/MySQL)
 add_object_library(clickhouse_storages_distributed Storages/Distributed)
 add_object_library(clickhouse_storages_mergetree Storages/MergeTree)
+add_object_library(clickhouse_storages_mergetree_merge_selectors Storages/MergeTree/MergeSelectors)
 add_object_library(clickhouse_storages_statistics Storages/Statistics)
 add_object_library(clickhouse_storages_liveview Storages/LiveView)
 add_object_library(clickhouse_storages_windowview Storages/WindowView)
diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h
index 382bfe34a3d..85e5fcb0ce1 100644
--- a/src/Client/ConnectionParameters.h
+++ b/src/Client/ConnectionParameters.h
@@ -15,7 +15,7 @@ namespace DB
 {
 struct ConnectionParameters
 {
-    std::string host;
+    String host;
     UInt16 port{};
     std::string default_database;
     std::string user;
@@ -30,8 +30,8 @@ struct ConnectionParameters
     ConnectionTimeouts timeouts;
 
     ConnectionParameters() = default;
-    ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host);
-    ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional<UInt16> port);
+    ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host);
+    ConnectionParameters(const Poco::Util::AbstractConfiguration & config, String host, std::optional<UInt16> port);
 
     static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & connection_host);
 
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index 658eaedbda1..64877196aef 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -291,9 +291,14 @@
     M(CacheWarmerBytesInProgress, "Total size of remote file segments waiting to be asynchronously loaded into filesystem cache.") \
     M(DistrCacheOpenedConnections, "Number of open connections to Distributed Cache") \
     M(DistrCacheUsedConnections, "Number of currently used connections to Distributed Cache") \
+    M(DistrCacheAllocatedConnections, "Number of currently allocated connections to Distributed Cache connection pool") \
+    M(DistrCacheBorrowedConnections, "Number of currently borrowed connections to Distributed Cache connection pool") \
     M(DistrCacheReadRequests, "Number of executed Read requests to Distributed Cache") \
     M(DistrCacheWriteRequests, "Number of executed Write requests to Distributed Cache") \
     M(DistrCacheServerConnections, "Number of open connections to ClickHouse server from Distributed Cache") \
+    M(DistrCacheRegisteredServers, "Number of distributed cache registered servers") \
+    M(DistrCacheRegisteredServersCurrentAZ, "Number of distributed cache registered servers in current az") \
+    M(DistrCacheServerS3CachedClients, "Number of distributed cache S3 cached clients") \
     \
     M(SchedulerIOReadScheduled, "Number of IO reads are being scheduled currently") \
     M(SchedulerIOWriteScheduled, "Number of IO writes are being scheduled currently") \
@@ -314,6 +319,20 @@
     M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \
     \
     M(DiskS3NoSuchKeyErrors, "The number of `NoSuchKey` errors that occur when reading data from S3 cloud storage through ClickHouse disks.") \
+    \
+    M(SharedCatalogStateApplicationThreads, "Number of threads in the threadpool for state application in Shared Catalog.") \
+    M(SharedCatalogStateApplicationThreadsActive, "Number of active threads in the threadpool for state application in Shared Catalog.") \
+    M(SharedCatalogStateApplicationThreadsScheduled, "Number of queued or active jobs in the threadpool for state application in Shared Catalog.") \
+    \
+    M(SharedCatalogDropLocalThreads, "Number of threads in the threadpool for drop of local tables in Shared Catalog.") \
+    M(SharedCatalogDropLocalThreadsActive, "Number of active threads in the threadpool for drop of local tables in Shared Catalog.") \
+    M(SharedCatalogDropLocalThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of local tables in Shared Catalog.") \
+    \
+    M(SharedCatalogDropZooKeeperThreads, "Number of threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
+    M(SharedCatalogDropZooKeeperThreadsActive, "Number of active threads in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
+    M(SharedCatalogDropZooKeeperThreadsScheduled, "Number of queued or active jobs in the threadpool for drop of object in ZooKeeper in Shared Catalog.") \
+    \
+    M(SharedDatabaseCatalogTablesInLocalDropDetachQueue, "Number of tables in the queue for local drop or detach in Shared Catalog.") \
 
 #ifdef APPLY_FOR_EXTERNAL_METRICS
     #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 09a5375191b..3f4a75fae3c 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -452,7 +452,7 @@
     M(553, LZMA_STREAM_ENCODER_FAILED) \
     M(554, LZMA_STREAM_DECODER_FAILED) \
     M(555, ROCKSDB_ERROR) \
-    M(556, SYNC_MYSQL_USER_ACCESS_ERROR)\
+    M(556, SYNC_MYSQL_USER_ACCESS_ERROR) \
     M(557, UNKNOWN_UNION) \
     M(558, EXPECTED_ALL_OR_DISTINCT) \
     M(559, INVALID_GRPC_QUERY_INFO) \
@@ -578,7 +578,7 @@
     M(697, CANNOT_RESTORE_TO_NONENCRYPTED_DISK) \
     M(698, INVALID_REDIS_STORAGE_TYPE) \
     M(699, INVALID_REDIS_TABLE_STRUCTURE) \
-    M(700, USER_SESSION_LIMIT_EXCEEDED)  \
+    M(700, USER_SESSION_LIMIT_EXCEEDED) \
     M(701, CLUSTER_DOESNT_EXIST) \
     M(702, CLIENT_INFO_DOES_NOT_MATCH) \
     M(703, INVALID_IDENTIFIER) \
@@ -610,15 +610,17 @@
     M(729, ILLEGAL_TIME_SERIES_TAGS) \
     M(730, REFRESH_FAILED) \
     M(731, QUERY_CACHE_USED_WITH_NON_THROW_OVERFLOW_MODE) \
-    \
+    M(733, TABLE_IS_BEING_RESTARTED) \
+\
     M(900, DISTRIBUTED_CACHE_ERROR) \
     M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
-    \
+    M(902, PROTOCOL_VERSION_MISMATCH) \
+\
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
     M(1001, STD_EXCEPTION) \
     M(1002, UNKNOWN_EXCEPTION) \
-/* See END */
+    /* See END */
 
 #ifdef APPLY_FOR_EXTERNAL_ERROR_CODES
     #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M)
diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp
index 5978dbc18d2..0898bdded83 100644
--- a/src/Common/FailPoint.cpp
+++ b/src/Common/FailPoint.cpp
@@ -49,11 +49,21 @@ static struct InitFiu
     ONCE(smt_commit_write_zk_fail_before_op) \
     ONCE(smt_commit_merge_change_version_before_op) \
     ONCE(smt_merge_mutate_intention_freeze_in_destructor) \
+    ONCE(smt_add_part_sleep_after_add_before_commit) \
+    ONCE(smt_sleep_in_constructor) \
     ONCE(meta_in_keeper_create_metadata_failure) \
+    ONCE(smt_insert_retry_timeout) \
+    ONCE(smt_insert_fake_hardware_error) \
+    ONCE(smt_sleep_after_hardware_in_insert) \
+    ONCE(smt_throw_keeper_exception_after_successful_insert) \
+    REGULAR(smt_dont_merge_first_part) \
+    REGULAR(smt_sleep_in_schedule_data_processing_job) \
     REGULAR(cache_warmer_stall) \
     REGULAR(check_table_query_delay_for_part) \
     REGULAR(dummy_failpoint) \
     REGULAR(prefetched_reader_pool_failpoint) \
+    REGULAR(shared_set_sleep_during_update) \
+    REGULAR(smt_outdated_parts_exception_response) \
     PAUSEABLE_ONCE(replicated_merge_tree_insert_retry_pause) \
     PAUSEABLE_ONCE(finish_set_quorum_failed_parts) \
     PAUSEABLE_ONCE(finish_clean_quorum_failed_parts) \
diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp
index 01fa91dd4ef..13bd4a513df 100644
--- a/src/Common/FileChecker.cpp
+++ b/src/Common/FileChecker.cpp
@@ -196,7 +196,7 @@ void FileChecker::load()
 
 bool FileChecker::fileReallyExists(const String & path_) const
 {
-    return disk ? disk->exists(path_) : fs::exists(path_);
+    return disk ? disk->existsFile(path_) : fs::exists(path_);
 }
 
 size_t FileChecker::getRealFileSize(const String & path_) const
diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp
index eff8206e676..ec10e25f74e 100644
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@@ -222,6 +222,8 @@
     M(SelectedBytes, "Number of bytes (uncompressed; for columns as they stored in memory) SELECTed from all tables.", ValueType::Bytes) \
     M(RowsReadByMainReader, "Number of rows read from MergeTree tables by the main reader (after PREWHERE step).", ValueType::Number) \
     M(RowsReadByPrewhereReaders, "Number of rows read from MergeTree tables (in total) by prewhere readers.", ValueType::Number) \
+    M(LoadedDataParts, "Number of data parts loaded by MergeTree tables during initialization.", ValueType::Number) \
+    M(LoadedDataPartsMicroseconds, "Microseconds spent by MergeTree tables for loading data parts during initialization.", ValueType::Microseconds) \
     \
     M(WaitMarksLoadMicroseconds, "Time spent loading marks", ValueType::Microseconds) \
     M(BackgroundLoadingMarksTasks, "Number of background tasks for loading marks", ValueType::Number) \
@@ -241,6 +243,8 @@
     M(MergeVerticalStageExecuteMilliseconds, "Total busy time spent for execution of vertical stage of background merges", ValueType::Milliseconds) \
     M(MergeProjectionStageTotalMilliseconds, "Total time spent for projection stage of background merges", ValueType::Milliseconds) \
     M(MergeProjectionStageExecuteMilliseconds, "Total busy time spent for execution of projection stage of background merges", ValueType::Milliseconds) \
+    M(MergePrewarmStageTotalMilliseconds, "Total time spent for prewarm stage of background merges", ValueType::Milliseconds) \
+    M(MergePrewarmStageExecuteMilliseconds, "Total busy time spent for execution of prewarm stage of background merges", ValueType::Milliseconds) \
     \
     M(MergingSortedMilliseconds, "Total time spent while merging sorted columns", ValueType::Milliseconds) \
     M(AggregatingSortedMilliseconds, "Total time spent while aggregating sorted columns", ValueType::Milliseconds) \
@@ -639,6 +643,8 @@ The server successfully detected this situation and will download merged part fr
     M(MetadataFromKeeperBackgroundCleanupTransactions, "Number of times old transaction idempotency token was cleaned up by background task", ValueType::Number) \
     M(MetadataFromKeeperBackgroundCleanupErrors, "Number of times an error was encountered in background cleanup task", ValueType::Number) \
     \
+    M(SharedMergeTreeMetadataCacheHintLoadedFromCache, "Number of times metadata cache hint was found without going to Keeper", ValueType::Number) \
+    \
     M(KafkaRebalanceRevocations, "Number of partition revocations (the first stage of consumer group rebalance)", ValueType::Number) \
     M(KafkaRebalanceAssignments, "Number of partition assignments (the final stage of consumer group rebalance)", ValueType::Number) \
     M(KafkaRebalanceErrors, "Number of failed consumer group rebalances", ValueType::Number) \
@@ -742,29 +748,51 @@ The server successfully detected this situation and will download merged part fr
     M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.", ValueType::Microseconds) \
     M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.", ValueType::Microseconds) \
     \
-    M(DistrCacheServerSwitches, "Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
-    M(DistrCacheReadMicroseconds, "Time spent reading from distributed cache", ValueType::Microseconds) \
-    M(DistrCacheFallbackReadMicroseconds, "Time spend reading from fallback buffer instead of distribted cache", ValueType::Microseconds) \
-    M(DistrCachePrecomputeRangesMicroseconds, "Time spent to precompute read ranges", ValueType::Microseconds) \
-    M(DistrCacheNextImplMicroseconds, "Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
-    M(DistrCacheOpenedConnections, "The number of open connections to distributed cache", ValueType::Number) \
-    M(DistrCacheReusedConnections, "The number of reused connections to distributed cache", ValueType::Number) \
-    M(DistrCacheHoldConnections, "The number of used connections to distributed cache", ValueType::Number) \
+    M(DistrCacheServerSwitches, "Distributed Cache read buffer event. Number of server switches between distributed cache servers in read/write-through cache", ValueType::Number) \
+    M(DistrCacheReadMicroseconds, "Distributed Cache read buffer event. Time spent reading from distributed cache", ValueType::Microseconds) \
+    M(DistrCacheFallbackReadMicroseconds, "Distributed Cache read buffer event. Time spend reading from fallback buffer instead of distributed cache", ValueType::Microseconds) \
+    M(DistrCachePrecomputeRangesMicroseconds, "Distributed Cache read buffer event. Time spent to precompute read ranges", ValueType::Microseconds) \
+    M(DistrCacheNextImplMicroseconds, "Distributed Cache read buffer event. Time spend in ReadBufferFromDistributedCache::nextImpl", ValueType::Microseconds) \
+    M(DistrCacheStartRangeMicroseconds, "Distributed Cache read buffer event. Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
+    M(DistrCacheIgnoredBytesWhileWaitingProfileEvents, "Distributed Cache read buffer event. Ignored bytes while waiting for profile events in distributed cache", ValueType::Number) \
+    M(DistrCacheRangeChange, "Distributed Cache read buffer event. Number of times we changed read range because of seek/last_position change", ValueType::Number) \
     \
-    M(DistrCacheGetResponseMicroseconds, "Time spend to wait for response from distributed cache", ValueType::Microseconds) \
-    M(DistrCacheStartRangeMicroseconds, "Time spent to start a new read range with distributed cache", ValueType::Microseconds) \
-    M(DistrCacheLockRegistryMicroseconds, "Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
-    M(DistrCacheUnusedPackets, "Number of skipped unused packets from distributed cache", ValueType::Number) \
-    M(DistrCachePackets, "Total number of packets received from distributed cache", ValueType::Number) \
-    M(DistrCacheUnusedPacketsBytes, "The number of bytes in Data packets which were ignored", ValueType::Bytes) \
-    M(DistrCacheRegistryUpdateMicroseconds, "Time spent updating distributed cache registry", ValueType::Microseconds) \
-    M(DistrCacheRegistryUpdates, "Number of distributed cache registry updates", ValueType::Number) \
+    M(DistrCacheGetResponseMicroseconds, "Distributed Cache client event. Time spend to wait for response from distributed cache", ValueType::Microseconds) \
+    M(DistrCacheReadErrors, "Distributed Cache client event. Number of distributed cache errors during read", ValueType::Number) \
+    M(DistrCacheMakeRequestErrors, "Distributed Cache client event. Number of distributed cache errors when making a request", ValueType::Number) \
+    M(DistrCacheReceiveResponseErrors, "Distributed Cache client event. Number of distributed cache errors when receiving response a request", ValueType::Number) \
     \
-    M(DistrCacheConnectMicroseconds, "The time spent to connect to distributed cache", ValueType::Microseconds) \
-    M(DistrCacheConnectAttempts, "The number of connection attempts to distributed cache", ValueType::Number) \
-    M(DistrCacheGetClient, "Number of client access times", ValueType::Number) \
+    M(DistrCachePackets, "Distributed Cache client event. Total number of packets received from distributed cache", ValueType::Number) \
+    M(DistrCachePacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were not ignored", ValueType::Bytes) \
+    M(DistrCacheUnusedPackets, "Distributed Cache client event. Number of skipped unused packets from distributed cache", ValueType::Number) \
+    M(DistrCacheUnusedPacketsBytes, "Distributed Cache client event. The number of bytes in Data packets which were ignored", ValueType::Bytes) \
+    M(DistrCacheUnusedPacketsBufferAllocations, "Distributed Cache client event. The number of extra buffer allocations in case we could not reuse existing buffer", ValueType::Number) \
     \
-    M(DistrCacheServerProcessRequestMicroseconds, "Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
+    M(DistrCacheLockRegistryMicroseconds, "Distributed Cache registry event. Time spent to take DistributedCacheRegistry lock", ValueType::Microseconds) \
+    M(DistrCacheRegistryUpdateMicroseconds, "Distributed Cache registry event. Time spent updating distributed cache registry", ValueType::Microseconds) \
+    M(DistrCacheRegistryUpdates, "Distributed Cache registry event. Number of distributed cache registry updates", ValueType::Number) \
+    M(DistrCacheHashRingRebuilds, "Distributed Cache registry event. Number of distributed cache hash ring rebuilds", ValueType::Number) \
+    \
+    M(DistrCacheReadBytesFromCache, "Distributed Cache read buffer event. Bytes read from distributed cache", ValueType::Bytes) \
+    M(DistrCacheReadBytesFromFallbackBuffer, "Distributed Cache read buffer event. Bytes read from fallback buffer", ValueType::Number) \
+    \
+    M(DistrCacheRangeResetBackward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
+    M(DistrCacheRangeResetForward, "Distributed Cache read buffer event. Number of times we reset read range because of seek/last_position change", ValueType::Number) \
+    \
+    M(DistrCacheOpenedConnections, "Distributed Cache connection event. The number of open connections to distributed cache", ValueType::Number) \
+    M(DistrCacheReusedConnections, "Distributed Cache connection event. The number of reused connections to distributed cache", ValueType::Number) \
+    M(DistrCacheOpenedConnectionsBypassingPool, "Distributed Cache connection event. The number of open connections to distributed cache bypassing pool", ValueType::Number) \
+    M(DistrCacheConnectMicroseconds, "Distributed Cache connection event. The time spent to connect to distributed cache", ValueType::Microseconds) \
+    M(DistrCacheConnectAttempts, "Distributed Cache connection event. The number of connection attempts to distributed cache", ValueType::Number) \
+    M(DistrCacheGetClientMicroseconds, "Distributed Cache connection event. Time spent getting client for distributed cache", ValueType::Microseconds) \
+    \
+    M(DistrCacheServerProcessRequestMicroseconds, "Distributed Cache server event. Time spent processing request on DistributedCache server side", ValueType::Microseconds) \
+    M(DistrCacheServerStartRequestPackets, "Distributed Cache server event. Number of StartRequest packets in DistributedCacheServer", ValueType::Number) \
+    M(DistrCacheServerContinueRequestPackets, "Distributed Cache server event. Number of ContinueRequest packets in DistributedCacheServer", ValueType::Number) \
+    M(DistrCacheServerEndRequestPackets, "Distributed Cache server event. Number of EndRequest packets in DistributedCacheServer", ValueType::Number) \
+    M(DistrCacheServerAckRequestPackets, "Distributed Cache server event. Number of AckRequest packets in DistributedCacheServer", ValueType::Number) \
+    M(DistrCacheServerNewS3CachedClients, "Distributed Cache server event. The number of new cached s3 clients", ValueType::Number) \
+    M(DistrCacheServerReusedS3CachedClients, "Distributed Cache server event. The number of reused cached s3 clients", ValueType::Number) \
     \
     M(LogTest, "Number of log messages with level Test", ValueType::Number) \
     M(LogTrace, "Number of log messages with level Trace", ValueType::Number) \
@@ -788,15 +816,38 @@ The server successfully detected this situation and will download merged part fr
     M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces", ValueType::Bytes) \
     \
     M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas", ValueType::Number) \
+    M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
+    M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
     \
+    M(SharedMergeTreeVirtualPartsUpdates, "Virtual parts update count", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesByLeader, "Virtual parts updates by leader", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdateMicroseconds, "Virtual parts update microseconds", ValueType::Microseconds) \
+    M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeper, "Virtual parts updates count from ZooKeeper", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesFromZooKeeperMicroseconds, "Virtual parts updates from ZooKeeper microseconds", ValueType::Microseconds) \
+    M(SharedMergeTreeVirtualPartsUpdatesPeerNotFound, "Virtual updates from peer failed because no one found", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesFromPeer, "Virtual parts updates count from peer", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesFromPeerMicroseconds, "Virtual parts updates from peer microseconds", ValueType::Microseconds) \
+    M(SharedMergeTreeVirtualPartsUpdatesForMergesOrStatus, "Virtual parts updates from non-default background job", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesLeaderFailedElection, "Virtual parts updates leader election failed", ValueType::Number) \
+    M(SharedMergeTreeVirtualPartsUpdatesLeaderSuccessfulElection, "Virtual parts updates leader election successful", ValueType::Number) \
+    M(SharedMergeTreeMergeMutationAssignmentAttempt, "How many times we tried to assign merge or mutation", ValueType::Number) \
+    M(SharedMergeTreeMergeMutationAssignmentFailedWithNothingToDo, "How many times we tried to assign merge or mutation and failed because nothing to merge", ValueType::Number) \
+    M(SharedMergeTreeMergeMutationAssignmentFailedWithConflict, "How many times we tried to assign merge or mutation and failed because of conflict in Keeper", ValueType::Number) \
+    M(SharedMergeTreeMergeMutationAssignmentSuccessful, "How many times we tried to assign merge or mutation", ValueType::Number) \
+    M(SharedMergeTreeMergePartsMovedToOudated, "How many parts moved to oudated directory", ValueType::Number) \
+    M(SharedMergeTreeMergePartsMovedToCondemned, "How many parts moved to condemned directory", ValueType::Number) \
+    M(SharedMergeTreeOutdatedPartsConfirmationRequest, "How many ZooKeeper requests were used to config outdated parts", ValueType::Number) \
+    M(SharedMergeTreeOutdatedPartsConfirmationInvocations, "How many invocations were made to confirm outdated parts", ValueType::Number) \
+    M(SharedMergeTreeOutdatedPartsHTTPRequest, "How many HTTP requests were send to confirm outdated parts", ValueType::Number) \
+    M(SharedMergeTreeOutdatedPartsHTTPResponse, "How many HTTP responses were send to confirm outdated parts", ValueType::Number) \
+    M(SharedMergeTreeCondemnedPartsKillRequest, "How many ZooKeeper requests were used to remove condemned parts", ValueType::Number) \
+    M(SharedMergeTreeCondemnedPartsLockConfict, "How many times we failed to acquite lock because of conflict", ValueType::Number) \
+    M(SharedMergeTreeCondemnedPartsRemoved, "How many condemned parts were removed", ValueType::Number) \
     M(KeeperLogsEntryReadFromLatestCache, "Number of log entries in Keeper being read from latest logs cache", ValueType::Number) \
     M(KeeperLogsEntryReadFromCommitCache, "Number of log entries in Keeper being read from commit logs cache", ValueType::Number) \
     M(KeeperLogsEntryReadFromFile, "Number of log entries in Keeper being read directly from the changelog file", ValueType::Number) \
     M(KeeperLogsPrefetchedEntries, "Number of log entries in Keeper being prefetched from the changelog file", ValueType::Number) \
     \
-    M(ParallelReplicasAvailableCount, "Number of replicas available to execute a query with task-based parallel replicas", ValueType::Number) \
-    M(ParallelReplicasUnavailableCount, "Number of replicas which was chosen, but found to be unavailable during query execution with task-based parallel replicas", ValueType::Number) \
-    \
     M(StorageConnectionsCreated, "Number of created connections for storages", ValueType::Number) \
     M(StorageConnectionsReused, "Number of reused connections for storages", ValueType::Number) \
     M(StorageConnectionsReset, "Number of reset connections for storages", ValueType::Number) \
@@ -828,6 +879,9 @@ The server successfully detected this situation and will download merged part fr
     M(ReadWriteBufferFromHTTPRequestsSent, "Number of HTTP requests sent by ReadWriteBufferFromHTTP", ValueType::Number) \
     M(ReadWriteBufferFromHTTPBytes, "Total size of payload bytes received and sent by ReadWriteBufferFromHTTP. Doesn't include HTTP headers.", ValueType::Bytes) \
     \
+    M(SharedDatabaseCatalogFailedToApplyState, "Number of failures to apply new state in SharedDatabaseCatalog", ValueType::Number) \
+    M(SharedDatabaseCatalogStateApplicationMicroseconds, "Total time spend on application of new state in SharedDatabaseCatalog", ValueType::Microseconds) \
+    \
     M(GWPAsanAllocateSuccess, "Number of successful allocations done by GWPAsan", ValueType::Number) \
     M(GWPAsanAllocateFailed, "Number of failed allocations done by GWPAsan (i.e. filled pool)", ValueType::Number) \
     M(GWPAsanFree, "Number of free operations done by GWPAsan", ValueType::Number) \
diff --git a/src/Common/ProfileEvents.h b/src/Common/ProfileEvents.h
index 50d6a5e1a18..94551c23ac0 100644
--- a/src/Common/ProfileEvents.h
+++ b/src/Common/ProfileEvents.h
@@ -38,6 +38,9 @@ namespace ProfileEvents
         };
         Timer(Counters & counters_, Event timer_event_, Resolution resolution_);
         Timer(Counters & counters_, Event timer_event_, Event counter_event, Resolution resolution_);
+        Timer(Timer && other) noexcept
+            : counters(other.counters), timer_event(std::move(other.timer_event)), watch(std::move(other.watch)), resolution(std::move(other.resolution))
+            {}
         ~Timer() { end(); }
         void cancel() { watch.reset(); }
         void restart() { watch.restart(); }
diff --git a/src/Common/RemoteProxyConfigurationResolver.cpp b/src/Common/RemoteProxyConfigurationResolver.cpp
index 8fd9d381ece..6fd18c2c971 100644
--- a/src/Common/RemoteProxyConfigurationResolver.cpp
+++ b/src/Common/RemoteProxyConfigurationResolver.cpp
@@ -31,7 +31,7 @@ std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const
             endpoint.toString(),
             response.getStatus(),
             response.getReason(),
-            "");
+            /* body_length = */ 0);
 
     std::string proxy_host;
     Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
diff --git a/src/Common/ZooKeeper/ZooKeeperRetries.h b/src/Common/ZooKeeper/ZooKeeperRetries.h
index 39782d08b33..b5b03971385 100644
--- a/src/Common/ZooKeeper/ZooKeeperRetries.h
+++ b/src/Common/ZooKeeper/ZooKeeperRetries.h
@@ -159,6 +159,8 @@ public:
     const std::string & getLastKeeperErrorMessage() const { return keeper_error.message; }
 
     /// action will be called only once and only after latest failed retry
+    /// NOTE: this one will be called only in case when retries finishes with Keeper exception
+    /// if it will be some other exception this function will not be called.
     void actionAfterLastFailedRetry(std::function<void()> f) { action_after_last_failed_retry = std::move(f); }
 
     const std::string & getName() const { return name; }
diff --git a/src/Common/proxyConfigurationToPocoProxyConfig.cpp b/src/Common/proxyConfigurationToPocoProxyConfig.cpp
index c06014ac2dc..ed4742aaaae 100644
--- a/src/Common/proxyConfigurationToPocoProxyConfig.cpp
+++ b/src/Common/proxyConfigurationToPocoProxyConfig.cpp
@@ -25,15 +25,11 @@ namespace
  * `curl` strips leading dot and accepts url gitlab.com as a match for no_proxy .gitlab.com,
  * while `wget` does an exact match.
  * */
-std::string buildPocoRegexpEntryWithoutLeadingDot(const std::string & host)
+std::string buildPocoRegexpEntryWithoutLeadingDot(std::string_view host)
 {
-    std::string_view view_without_leading_dot = host;
-    if (host[0] == '.')
-    {
-        view_without_leading_dot = std::string_view {host.begin() + 1u, host.end()};
-    }
-
-    return RE2::QuoteMeta(view_without_leading_dot);
+    if (host.starts_with('.'))
+        host.remove_prefix(1);
+    return RE2::QuoteMeta(host);
 }
 
 }
diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp
index c96f17d7064..ad8ff89678e 100644
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@@ -1890,7 +1890,7 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end)
     {
         auto & changelog_description = itr->second;
 
-        if (!disk->exists(timestamp_folder))
+        if (!disk->existsDirectory(timestamp_folder))
         {
             LOG_WARNING(log, "Moving broken logs to {}", timestamp_folder);
             disk->createDirectories(timestamp_folder);
diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp
index 97c57b65a05..1c19a19a6a4 100644
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@@ -964,7 +964,7 @@ static uint64_t getTotalSize(const DiskPtr & disk, const std::string & path = ""
     uint64_t size = 0;
     for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
     {
-        if (disk->isFile(it->path()))
+        if (disk->existsFile(it->path()))
             size += disk->getFileSize(it->path());
         else
             size += getTotalSize(disk, it->path());
diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp
index 1195b2d4150..1568b616b44 100644
--- a/src/Coordination/KeeperSnapshotManager.cpp
+++ b/src/Coordination/KeeperSnapshotManager.cpp
@@ -913,7 +913,7 @@ SnapshotFileInfoPtr KeeperSnapshotManager<Storage>::getLatestSnapshotInfo() cons
 
         try
         {
-            if (disk->exists(path))
+            if (disk->existsFile(path))
                 return std::make_shared<SnapshotFileInfo>(path, disk);
         }
         catch (...)
diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp
index 93131c09f59..87f17a67f86 100644
--- a/src/Coordination/KeeperStateManager.cpp
+++ b/src/Coordination/KeeperStateManager.cpp
@@ -334,7 +334,7 @@ void KeeperStateManager::save_state(const nuraft::srv_state & state)
 
     auto disk = getStateFileDisk();
 
-    if (disk->exists(server_state_file_name))
+    if (disk->existsFile(server_state_file_name))
     {
         auto buf = disk->writeFile(copy_lock_file);
         buf->finalize();
@@ -422,7 +422,7 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
         }
     };
 
-    if (disk->exists(server_state_file_name))
+    if (disk->existsFile(server_state_file_name))
     {
         auto state = try_read_file(server_state_file_name);
 
@@ -435,9 +435,9 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
         disk->removeFile(server_state_file_name);
     }
 
-    if (disk->exists(old_path))
+    if (disk->existsFile(old_path))
     {
-        if (disk->exists(copy_lock_file))
+        if (disk->existsFile(copy_lock_file))
         {
             disk->removeFile(old_path);
             disk->removeFile(copy_lock_file);
@@ -453,7 +453,7 @@ nuraft::ptr<nuraft::srv_state> KeeperStateManager::read_state()
             disk->removeFile(old_path);
         }
     }
-    else if (disk->exists(copy_lock_file))
+    else if (disk->existsFile(copy_lock_file))
     {
         disk->removeFile(copy_lock_file);
     }
diff --git a/src/Core/FormatFactorySettingsDeclaration.h b/src/Core/FormatFactorySettingsDeclaration.h
index 28fae8e305f..4225605c957 100644
--- a/src/Core/FormatFactorySettingsDeclaration.h
+++ b/src/Core/FormatFactorySettingsDeclaration.h
@@ -170,6 +170,9 @@ Avoid reordering rows when reading from Parquet files. Usually makes it much slo
 )", 0) \
     M(Bool, input_format_parquet_filter_push_down, true, R"(
 When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and min/max statistics in the Parquet metadata.
+)", 0) \
+    M(Bool, input_format_parquet_bloom_filter_push_down, false, R"(
+When reading Parquet files, skip whole row groups based on the WHERE expressions and bloom filter in the Parquet metadata.
 )", 0) \
     M(Bool, input_format_parquet_use_native_reader, false, R"(
 When reading Parquet files, to use native reader instead of arrow reader.
@@ -190,6 +193,9 @@ When reading ORC files, skip whole stripes or row groups based on the WHERE/PREW
 )", 0) \
     M(String, input_format_orc_reader_time_zone_name, "GMT", R"(
 The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.
+)", 0) \
+    M(Bool, input_format_orc_dictionary_as_low_cardinality, true, R"(
+Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files.
 )", 0) \
     M(Bool, input_format_parquet_allow_missing_columns, true, R"(
 Allow missing columns while reading Parquet input formats
@@ -604,6 +610,9 @@ See also:
 -   [Interval](../../sql-reference/data-types/special-data-types/interval.md)
 )", 0) \
     \
+    M(Bool, date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands, false, R"(
+Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to [0, 3, 6],
+corresponding to 'seconds', 'milliseconds', and 'microseconds')", 0) \
     M(Bool, input_format_ipv4_default_on_conversion_error, false, R"(
 Deserialization of IPv4 will use default values instead of throwing exception on conversion error.
 
diff --git a/src/Core/MergeSelectorAlgorithm.h b/src/Core/MergeSelectorAlgorithm.h
new file mode 100644
index 00000000000..0f6831c1f9e
--- /dev/null
+++ b/src/Core/MergeSelectorAlgorithm.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <cstdint>
+namespace DB
+{
+
+enum class MergeSelectorAlgorithm : uint8_t
+{
+    SIMPLE,
+    STOCHASTIC_SIMPLE,
+};
+
+}
diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp
index 01d08fa4238..cdaa305e804 100644
--- a/src/Core/Settings.cpp
+++ b/src/Core/Settings.cpp
@@ -700,6 +700,9 @@ Move more conditions from WHERE to PREWHERE and do reads from disk and filtering
 )", 0) \
     M(Bool, move_primary_key_columns_to_end_of_prewhere, true, R"(
 Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.
+)", 0) \
+    M(Bool, allow_reorder_prewhere_conditions, true, R"(
+When moving conditions from WHERE to PREWHERE, allow reordering them to optimize filtering
 )", 0) \
     \
     M(UInt64, alter_sync, 1, R"(
@@ -2700,7 +2703,7 @@ The maximum read speed in bytes per second for particular backup on server. Zero
 Log query performance statistics into the query_log, query_thread_log and query_views_log.
 )", 0) \
     M(Bool, log_query_settings, true, R"(
-Log query settings into the query_log.
+Log query settings into the query_log and OpenTelemetry span log.
 )", 0) \
     M(Bool, log_query_threads, false, R"(
 Setting up query threads logging.
@@ -4812,6 +4815,9 @@ Max attempts to read with backoff
 )", 0) \
     M(Bool, enable_filesystem_cache, true, R"(
 Use cache for remote filesystem. This setting does not turn on/off cache for disks (must be done via disk config), but allows to bypass cache for some queries if intended
+)", 0) \
+    M(String, filesystem_cache_name, "", R"(
+Filesystem cache name to use for stateless table engines or data lakes
 )", 0) \
     M(Bool, enable_filesystem_cache_on_write_operations, false, R"(
 Write into cache on write operations. To actually work this setting requires be added to disk config too
@@ -5151,7 +5157,7 @@ SELECT * FROM test_table
 Rewrite count distinct to subquery of group by
 )", 0) \
     M(Bool, throw_if_no_data_to_insert, true, R"(
-Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert)
+Allows or forbids empty INSERTs, enabled by default (throws an error on an empty insert). Only applies to INSERTs using [`clickhouse-client`](/docs/en/interfaces/cli) or using the [gRPC interface](/docs/en/interfaces/grpc).
 )", 0) \
     M(Bool, compatibility_ignore_auto_increment_in_create_table, false, R"(
 Ignore AUTO_INCREMENT keyword in column declaration if true, otherwise return error. It simplifies migration from MySQL
@@ -5376,7 +5382,7 @@ Result:
 If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes
 )", 0) \
     M(Bool, traverse_shadow_remote_data_paths, false, R"(
-Traverse shadow directory when query system.remote_data_paths
+Traverse frozen data (shadow directory) in addition to actual table data when query system.remote_data_paths
 )", 0) \
     M(Bool, geo_distance_returns_float64_on_float64_arguments, true, R"(
 If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.
@@ -5498,8 +5504,8 @@ Replace external dictionary sources to Null on restore. Useful for testing purpo
     M(Bool, create_if_not_exists, false, R"(
 Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
 )", 0) \
-    M(Bool, enable_secure_identifiers, false, R"(
-If enabled, only allow secure identifiers which contain only underscore and alphanumeric characters
+    M(Bool, enforce_strict_identifier_format, false, R"(
+If enabled, only allow identifiers containing alphanumeric characters and underscores.
 )", 0) \
     M(Bool, mongodb_throw_on_unsupported_query, true, R"(
 If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'.
@@ -6199,6 +6205,16 @@ std::vector<std::string_view> Settings::getUnchangedNames() const
     return setting_names;
 }
 
+std::vector<std::string_view> Settings::getChangedNames() const
+{
+    std::vector<std::string_view> setting_names;
+    for (const auto & setting : impl->allChanged())
+    {
+        setting_names.emplace_back(setting.getName());
+    }
+    return setting_names;
+}
+
 void Settings::dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const
 {
     MutableColumns & res_columns = params.res_columns;
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index ecfd4240a59..2cb5f95d71c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -134,6 +134,7 @@ struct Settings
     std::vector<std::string_view> getAllRegisteredNames() const;
     std::vector<std::string_view> getChangedAndObsoleteNames() const;
     std::vector<std::string_view> getUnchangedNames() const;
+    std::vector<std::string_view> getChangedNames() const;
 
     void dumpToSystemSettingsColumns(MutableColumnsAndConstraints & params) const;
     void dumpToMapColumn(IColumn * column, bool changed_only = true) const;
diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp
index 34e3e9ec3f4..bfb78b36a2d 100644
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@@ -69,17 +69,18 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
     {"24.10",
         {
             {"check_table_structure_completely", true, false, "Add new setting to allow attach when source table's projections and secondary indices is a subset of those in the target table."},
+            {"enforce_strict_identifier_format", false, false, "New setting."},
             {"enable_parsing_to_custom_serialization", false, true, "New setting"},
             {"mongodb_throw_on_unsupported_query", false, true, "New setting."},
             {"enable_parallel_replicas", false, false, "Parallel replicas with read tasks became the Beta tier feature."},
             {"parallel_replicas_mode", "read_tasks", "read_tasks", "This setting was introduced as a part of making parallel replicas feature Beta"},
+            {"filesystem_cache_name", "", "", "Filesystem cache name to use for stateless table engines or data lakes"},
             {"restore_replace_external_dictionary_source_to_null", false, false, "New setting."},
             {"show_create_query_identifier_quoting_rule", "when_necessary", "when_necessary", "New setting."},
             {"show_create_query_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
             {"output_format_native_write_json_as_string", false, false, "Add new setting to allow write JSON column as single String column in Native format"},
             {"output_format_binary_write_json_as_string", false, false, "Add new setting to write values of JSON type as JSON string in RowBinary output format"},
             {"input_format_binary_read_json_as_string", false, false, "Add new setting to read values of JSON type as JSON string in RowBinary input format"},
-            {"enable_secure_identifiers", false, false, "New setting."},
             {"min_free_disk_bytes_to_perform_insert", 0, 0, "New setting."},
             {"min_free_disk_ratio_to_perform_insert", 0.0, 0.0, "New setting."},
             {"cloud_mode_database_engine", 1, 1, "A setting for ClickHouse Cloud"},
@@ -98,8 +99,12 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"distributed_cache_read_alignment", 0, 0, "A setting for ClickHouse Cloud"},
             {"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"},
             {"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"},
+            {"input_format_orc_dictionary_as_low_cardinality", false, true, "Treat ORC dictionary encoded columns as LowCardinality columns while reading ORC files"},
             {"allow_experimental_refreshable_materialized_view", false, true, "Not experimental anymore"},
             {"max_parts_to_move", 1000, 1000, "New setting"},
+            {"allow_reorder_prewhere_conditions", false, true, "New setting"},
+            {"input_format_parquet_bloom_filter_push_down", false, true, "When reading Parquet files, skip whole row groups based on the WHERE/PREWHERE expressions and bloom filter in the Parquet metadata."},
+            {"date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands", false, false, "Dynamically trim the trailing zeros of datetime64 values to adjust the output scale to (0, 3, 6), corresponding to 'seconds', 'milliseconds', and 'microseconds'."}
         }
     },
     {"24.9",
@@ -112,7 +117,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
             {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
             {"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},
             {"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
-            {"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
+            {"database_replicated_allow_explicit_uuid", 1, 0, "Added a new setting to disallow explicitly specifying table UUID"},
             {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
             {"join_to_sort_minimum_perkey_rows", 0, 40, "The lower limit of per-key average rows in the right table to determine whether to rerange the right table by key in left or inner join. This setting ensures that the optimization is not applied for sparse table keys"},
             {"join_to_sort_maximum_table_rows", 0, 10000, "The maximum number of rows in the right table to determine whether to rerange the right table by key in left or inner join"},
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 99f9162867b..7c0d2bf9aa1 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -1,7 +1,6 @@
 #include <Core/SettingsEnums.h>
 #include <magic_enum.hpp>
 #include <Access/Common/SQLSecurityDefs.h>
-
 #include <boost/range/adaptor/map.hpp>
 
 
@@ -273,4 +272,11 @@ IMPLEMENT_SETTING_ENUM(
     {{"user_display", IdentifierQuotingRule::UserDisplay},
      {"when_necessary", IdentifierQuotingRule::WhenNecessary},
      {"always", IdentifierQuotingRule::Always}})
+
+IMPLEMENT_SETTING_ENUM(
+    MergeSelectorAlgorithm,
+    ErrorCodes::BAD_ARGUMENTS,
+    {{"Simple", MergeSelectorAlgorithm::SIMPLE},
+     {"StochasticSimple", MergeSelectorAlgorithm::STOCHASTIC_SIMPLE}})
+
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 0ed0e2aef0e..35d6e14a632 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -14,6 +14,7 @@
 #include <Parsers/IdentifierQuotingStyle.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <Common/ShellCommandSettings.h>
+#include <Core/MergeSelectorAlgorithm.h>
 
 
 namespace DB
@@ -363,4 +364,6 @@ enum class GroupArrayActionWhenLimitReached : uint8_t
 };
 DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached)
 
+DECLARE_SETTING_ENUM(MergeSelectorAlgorithm)
+
 }
diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h
index 161ca2425c3..ec5991abff1 100644
--- a/src/DataTypes/EnumValues.h
+++ b/src/DataTypes/EnumValues.h
@@ -36,8 +36,8 @@ public:
 
     auto findByValue(const T & value) const
     {
-        const auto it = value_to_name_map.find(value);
-        if (it == std::end(value_to_name_map))
+        auto it = value_to_name_map.find(value);
+        if (it == value_to_name_map.end())
             throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected value {} in enum", toString(value));
 
         return it;
@@ -58,7 +58,7 @@ public:
     bool getNameForValue(const T & value, StringRef & result) const
     {
         const auto it = value_to_name_map.find(value);
-        if (it == std::end(value_to_name_map))
+        if (it == value_to_name_map.end())
             return false;
 
         result = it->second;
diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
index 945d36dbb92..7165ad18999 100644
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@@ -321,6 +321,8 @@ bool isUInt8(TYPE data_type) { return WhichDataType(data_type).isUInt8(); } \
 bool isUInt16(TYPE data_type) { return WhichDataType(data_type).isUInt16(); } \
 bool isUInt32(TYPE data_type) { return WhichDataType(data_type).isUInt32(); } \
 bool isUInt64(TYPE data_type) { return WhichDataType(data_type).isUInt64(); } \
+bool isUInt128(TYPE data_type) { return WhichDataType(data_type).isUInt128(); } \
+bool isUInt256(TYPE data_type) { return WhichDataType(data_type).isUInt256(); } \
 bool isNativeUInt(TYPE data_type) { return WhichDataType(data_type).isNativeUInt(); } \
 bool isUInt(TYPE data_type) { return WhichDataType(data_type).isUInt(); } \
 \
@@ -328,6 +330,8 @@ bool isInt8(TYPE data_type) { return WhichDataType(data_type).isInt8(); } \
 bool isInt16(TYPE data_type) { return WhichDataType(data_type).isInt16(); } \
 bool isInt32(TYPE data_type) { return WhichDataType(data_type).isInt32(); } \
 bool isInt64(TYPE data_type) { return WhichDataType(data_type).isInt64(); } \
+bool isInt128(TYPE data_type) { return WhichDataType(data_type).isInt128(); } \
+bool isInt256(TYPE data_type) { return WhichDataType(data_type).isInt256(); } \
 bool isNativeInt(TYPE data_type) { return WhichDataType(data_type).isNativeInt(); } \
 bool isInt(TYPE data_type) { return WhichDataType(data_type).isInt(); } \
 \
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index b9b0029fd94..2d1e1b9bc76 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -457,7 +457,9 @@ struct WhichDataType
 bool isUInt8(TYPE data_type); \
 bool isUInt16(TYPE data_type); \
 bool isUInt32(TYPE data_type); \
-bool isUInt64(TYPE data_type); \
+bool isUInt64(TYPE data_type);\
+bool isUInt128(TYPE data_type);\
+bool isUInt256(TYPE data_type); \
 bool isNativeUInt(TYPE data_type); \
 bool isUInt(TYPE data_type); \
 \
@@ -465,6 +467,8 @@ bool isInt8(TYPE data_type); \
 bool isInt16(TYPE data_type); \
 bool isInt32(TYPE data_type); \
 bool isInt64(TYPE data_type); \
+bool isInt128(TYPE data_type); \
+bool isInt256(TYPE data_type); \
 bool isNativeInt(TYPE data_type); \
 bool isInt(TYPE data_type); \
 \
diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp
index 442e29edd52..de8cb4bd0d6 100644
--- a/src/DataTypes/Serializations/SerializationDateTime64.cpp
+++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp
@@ -26,7 +26,10 @@ void SerializationDateTime64::serializeText(const IColumn & column, size_t row_n
     switch (settings.date_time_output_format)
     {
         case FormatSettings::DateTimeOutputFormat::Simple:
-            writeDateTimeText(value, scale, ostr, time_zone);
+            if (settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
+                writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(value, scale, ostr, time_zone);
+            else
+                writeDateTimeText(value, scale, ostr, time_zone);
             return;
         case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
             writeDateTimeUnixTimestamp(value, scale, ostr);
diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp
index 5adf7351428..f914d9024e1 100644
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@@ -370,7 +370,7 @@ void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_na
 
     for (const auto & [disk_name, disk] : getContext()->getDisksMap())
     {
-        if (disk->isReadOnly() || !disk->exists(table_data_path_relative))
+        if (disk->isReadOnly() || !disk->existsDirectory(table_data_path_relative))
             continue;
 
         LOG_INFO(log, "Removing data directory from disk {} with path {} for dropped table {} ", disk_name, table_data_path_relative, table_name);
diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h
index 990ce7ae0b0..caba4184a73 100644
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@@ -31,22 +31,22 @@ public:
 
     ReservationPtr reserve(UInt64 bytes) override;
 
-    bool exists(const String & path) const override
+    bool existsFile(const String & path) const override
     {
         auto wrapped_path = wrappedPath(path);
-        return delegate->exists(wrapped_path);
+        return delegate->existsFile(wrapped_path);
     }
 
-    bool isFile(const String & path) const override
+    bool existsDirectory(const String & path) const override
     {
         auto wrapped_path = wrappedPath(path);
-        return delegate->isFile(wrapped_path);
+        return delegate->existsDirectory(wrapped_path);
     }
 
-    bool isDirectory(const String & path) const override
+    bool existsFileOrDirectory(const String & path) const override
     {
         auto wrapped_path = wrappedPath(path);
-        return delegate->isDirectory(wrapped_path);
+        return delegate->existsFileOrDirectory(wrapped_path);
     }
 
     size_t getFileSize(const String & path) const override;
diff --git a/src/Disks/DiskEncryptedTransaction.cpp b/src/Disks/DiskEncryptedTransaction.cpp
index 827b42cf0cd..2660051e1d3 100644
--- a/src/Disks/DiskEncryptedTransaction.cpp
+++ b/src/Disks/DiskEncryptedTransaction.cpp
@@ -71,7 +71,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskEncryptedTransaction::writeFile( //
     FileEncryption::Header header;
     String key;
     UInt64 old_file_size = 0;
-    if (mode == WriteMode::Append && delegate_disk->exists(wrapped_path))
+    if (mode == WriteMode::Append && delegate_disk->existsFile(wrapped_path))
     {
         size_t size = delegate_disk->getFileSize(wrapped_path);
         old_file_size = size > FileEncryption::Header::kSize ? (size - FileEncryption::Header::kSize) : 0;
diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp
index 5363c36e7f8..732d2552628 100644
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@@ -262,17 +262,17 @@ std::optional<UInt64> DiskLocal::getUnreservedSpace() const
     return available_space;
 }
 
-bool DiskLocal::exists(const String & path) const
+bool DiskLocal::existsFileOrDirectory(const String & path) const
 {
     return fs::exists(fs::path(disk_path) / path);
 }
 
-bool DiskLocal::isFile(const String & path) const
+bool DiskLocal::existsFile(const String & path) const
 {
     return fs::is_regular_file(fs::path(disk_path) / path);
 }
 
-bool DiskLocal::isDirectory(const String & path) const
+bool DiskLocal::existsDirectory(const String & path) const
 {
     return fs::is_directory(fs::path(disk_path) / path);
 }
@@ -369,8 +369,11 @@ void DiskLocal::removeFile(const String & path)
 void DiskLocal::removeFileIfExists(const String & path)
 {
     auto fs_path = fs::path(disk_path) / path;
-    if (0 != unlink(fs_path.c_str()) && errno != ENOENT)
-        ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, fs_path, "Cannot unlink file {}", fs_path);
+    if (0 != unlink(fs_path.c_str()))
+    {
+        if (errno != ENOENT)
+            ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, fs_path, "Cannot unlink file {}", fs_path);
+    }
 }
 
 void DiskLocal::removeDirectory(const String & path)
@@ -638,7 +641,7 @@ void DiskLocal::setup()
 
     try
     {
-        if (exists(disk_checker_path))
+        if (existsFile(disk_checker_path))
         {
             auto magic_number = readDiskCheckerMagicNumber();
             if (magic_number)
diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h
index e6088e21a3b..1edff4cec34 100644
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@@ -42,11 +42,9 @@ public:
 
     UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; }
 
-    bool exists(const String & path) const override;
-
-    bool isFile(const String & path) const override;
-
-    bool isDirectory(const String & path) const override;
+    bool existsFile(const String & path) const override;
+    bool existsDirectory(const String & path) const override;
+    bool existsFileOrDirectory(const String & path) const override;
 
     size_t getFileSize(const String & path) const override;
 
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index baeb2197aad..0c3ba9dcff2 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -43,6 +43,18 @@ void IDisk::copyFile( /// NOLINT
     out->finalize();
 }
 
+std::unique_ptr<ReadBufferFromFileBase> IDisk::readFileIfExists( /// NOLINT
+    const String & path,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size) const
+{
+    if (existsFile(path))
+        return readFile(path, settings, read_hint, file_size);
+    else
+        return {};
+}
+
 DiskTransactionPtr IDisk::createTransaction()
 {
     return std::make_shared<FakeDiskTransaction>(*this);
@@ -96,7 +108,7 @@ void asyncCopy(
     const WriteSettings & write_settings,
     const std::function<void()> & cancellation_hook)
 {
-    if (from_disk.isFile(from_path))
+    if (from_disk.existsFile(from_path))
     {
         runner(
             [&from_disk, from_path, &to_disk, to_path, &read_settings, &write_settings, &cancellation_hook] {
@@ -149,7 +161,7 @@ void IDisk::copyDirectoryContent(
     const WriteSettings & write_settings,
     const std::function<void()> & cancellation_hook)
 {
-    if (!to_disk->exists(to_dir))
+    if (!to_disk->existsDirectory(to_dir))
         to_disk->createDirectories(to_dir);
 
     copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir= */ false, read_settings, write_settings, cancellation_hook);
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index fbe1cad9912..59f58a816e9 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -154,14 +154,12 @@ public:
     /// Amount of bytes which should be kept free on the disk.
     virtual UInt64 getKeepingFreeSpace() const { return 0; }
 
-    /// Return `true` if the specified file exists.
-    virtual bool exists(const String & path) const = 0;
+    /// Return `true` if the specified file/directory exists.
+    virtual bool existsFile(const String & path) const = 0;
+    virtual bool existsDirectory(const String & path) const = 0;
 
-    /// Return `true` if the specified file exists and it's a regular file (not a directory or special file type).
-    virtual bool isFile(const String & path) const = 0;
-
-    /// Return `true` if the specified file exists and it's a directory.
-    virtual bool isDirectory(const String & path) const = 0;
+    /// This method can be less efficient than the above.
+    virtual bool existsFileOrDirectory(const String & path) const = 0;
 
     /// Return size of the specified file.
     virtual size_t getFileSize(const String & path) const = 0;
@@ -223,6 +221,14 @@ public:
         std::optional<size_t> read_hint = {},
         std::optional<size_t> file_size = {}) const = 0;
 
+    /// Returns nullptr if the file does not exist, otherwise opens it for reading.
+    /// This method can save a request. The default implementation will do a separate `exists` call.
+    virtual std::unique_ptr<ReadBufferFromFileBase> readFileIfExists( /// NOLINT
+        const String & path,
+        const ReadSettings & settings = ReadSettings{},
+        std::optional<size_t> read_hint = {},
+        std::optional<size_t> file_size = {}) const;
+
     /// Open the file for write and return WriteBufferFromFileBase object.
     virtual std::unique_ptr<WriteBufferFromFileBase> writeFile( /// NOLINT
         const String & path,
@@ -308,6 +314,13 @@ public:
             getDataSourceDescription().toString());
     }
 
+    virtual std::optional<StoredObjects> getStorageObjectsIfExist(const String & path) const
+    {
+        if (existsFile(path))
+            return getStorageObjects(path);
+        return std::nullopt;
+    }
+
     /// For one local path there might be multiple remote paths in case of Log family engines.
     struct LocalPathWithObjectStoragePaths
     {
@@ -385,8 +398,8 @@ public:
 
     /// Check file exists and ClickHouse has an access to it
     /// Overrode in remote FS disks (s3/hdfs)
-    /// Required for remote disk to ensure that replica has access to data written by other node
-    virtual bool checkUniqueId(const String & id) const { return exists(id); }
+    /// Required for remote disk to ensure that the replica has access to data written by other node
+    virtual bool checkUniqueId(const String & id) const { return existsFile(id); }
 
     /// Invoked on partitions freeze query.
     virtual void onFreeze(const String &) { }
diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp
index a7d567930bd..d0677c4d7be 100644
--- a/src/Disks/IO/ThreadPoolReader.cpp
+++ b/src/Disks/IO/ThreadPoolReader.cpp
@@ -111,9 +111,9 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
     /// RWF_NOWAIT flag may return 0 even when not at end of file.
     /// It can't be distinguished from the real eof, so we have to
     /// disable pread with nowait.
-    static std::atomic<bool> has_pread_nowait_support = !hasBugInPreadV2();
+    static const bool has_pread_nowait_support = !hasBugInPreadV2();
 
-    if (has_pread_nowait_support.load(std::memory_order_relaxed))
+    if (has_pread_nowait_support)
     {
         /// It reports real time spent including the time spent while thread was preempted doing nothing.
         /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables).
@@ -161,7 +161,8 @@ std::future<IAsynchronousReader::Result> ThreadPoolReader::submit(Request reques
                 if (errno == ENOSYS || errno == EOPNOTSUPP)
                 {
                     /// No support for the syscall or the flag in the Linux kernel.
-                    has_pread_nowait_support.store(false, std::memory_order_relaxed);
+                    /// It shouldn't happen because we check the kernel version but let's
+                    /// fallback to the thread pool.
                     break;
                 }
                 if (errno == EAGAIN)
diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
index a59ee615454..163ff3a9c68 100644
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@@ -31,7 +31,7 @@ CachedObjectStorage::CachedObjectStorage(
 
 FileCache::Key CachedObjectStorage::getCacheKey(const std::string & path) const
 {
-    return cache->createKeyForPath(path);
+    return FileCacheKey::fromPath(path);
 }
 
 ObjectStorageKey
@@ -71,7 +71,7 @@ std::unique_ptr<ReadBufferFromFileBase> CachedObjectStorage::readObject( /// NOL
     {
         if (cache->isInitialized())
         {
-            auto cache_key = cache->createKeyForPath(object.remote_path);
+            auto cache_key = FileCacheKey::fromPath(object.remote_path);
             auto global_context = Context::getGlobalContextInstance();
             auto modified_read_settings = read_settings.withNestedBuffer();
 
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index 2330833c70b..fbab25490c1 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -91,15 +91,19 @@ StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) co
 }
 
 
-bool DiskObjectStorage::exists(const String & path) const
+bool DiskObjectStorage::existsFile(const String & path) const
 {
-    return metadata_storage->exists(path);
+    return metadata_storage->existsFile(path);
 }
 
-
-bool DiskObjectStorage::isFile(const String & path) const
+bool DiskObjectStorage::existsDirectory(const String & path) const
 {
-    return metadata_storage->isFile(path);
+    return metadata_storage->existsDirectory(path);
+}
+
+bool DiskObjectStorage::existsFileOrDirectory(const String & path) const
+{
+    return metadata_storage->existsFileOrDirectory(path);
 }
 
 
@@ -175,7 +179,7 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat
 
 void DiskObjectStorage::replaceFile(const String & from_path, const String & to_path)
 {
-    if (exists(to_path))
+    if (existsFile(to_path))
     {
         auto transaction = createObjectStorageTransaction();
         transaction->replaceFile(from_path, to_path);
@@ -258,12 +262,6 @@ void DiskObjectStorage::setReadOnly(const String & path)
 }
 
 
-bool DiskObjectStorage::isDirectory(const String & path) const
-{
-    return metadata_storage->isDirectory(path);
-}
-
-
 void DiskObjectStorage::createDirectory(const String & path)
 {
     auto transaction = createObjectStorageTransaction();
@@ -554,6 +552,18 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
     return impl;
 }
 
+std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFileIfExists(
+    const String & path,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size) const
+{
+    if (auto storage_objects = metadata_storage->getStorageObjectsIfExist(path))
+        return readFile(path, settings, read_hint, file_size);
+    else
+        return {};
+}
+
 std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorage::writeFile(
     const String & path,
     size_t buf_size,
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index 602a61c9a99..b4cdf620555 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -58,9 +58,9 @@ public:
 
     UInt64 getKeepingFreeSpace() const override { return 0; }
 
-    bool exists(const String & path) const override;
-
-    bool isFile(const String & path) const override;
+    bool existsFile(const String & path) const override;
+    bool existsDirectory(const String & path) const override;
+    bool existsFileOrDirectory(const String & path) const override;
 
     void createFile(const String & path) override;
 
@@ -108,8 +108,6 @@ public:
 
     void setReadOnly(const String & path) override;
 
-    bool isDirectory(const String & path) const override;
-
     void createDirectory(const String & path) override;
 
     void createDirectories(const String & path) override;
@@ -142,6 +140,12 @@ public:
         std::optional<size_t> read_hint,
         std::optional<size_t> file_size) const override;
 
+    std::unique_ptr<ReadBufferFromFileBase> readFileIfExists(
+        const String & path,
+        const ReadSettings & settings,
+        std::optional<size_t> read_hint,
+        std::optional<size_t> file_size) const override;
+
     std::unique_ptr<WriteBufferFromFileBase> writeFile(
         const String & path,
         size_t buf_size,
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
index 0eb58cba4bd..b9f963c4590 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
@@ -117,7 +117,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu
     bool dir_contains_only_files = true;
     for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
     {
-        if (disk->isDirectory(it->path()))
+        if (disk->existsDirectory(it->path()))
         {
             dir_contains_only_files = false;
             break;
@@ -138,7 +138,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu
     {
         for (auto it = disk->iterateDirectory(path); it->isValid(); it->next())
         {
-            if (disk->isDirectory(it->path()))
+            if (disk->existsDirectory(it->path()))
             {
                 migrateToRestorableSchemaRecursive(it->path(), pool);
             }
@@ -161,7 +161,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchema()
         ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled};
 
         for (const auto & root : data_roots)
-            if (disk->exists(root))
+            if (disk->existsDirectory(root))
                 migrateToRestorableSchemaRecursive(root + '/', pool);
 
         pool.wait();
@@ -180,7 +180,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restore(const Poco::Util::Abs
 {
     LOG_INFO(disk->log, "Restore operation for disk {} called", disk->name);
 
-    if (!disk->exists(RESTORE_FILE_NAME))
+    if (!disk->existsFile(RESTORE_FILE_NAME))
     {
         LOG_INFO(disk->log, "No restore file '{}' exists, finishing restore", RESTORE_FILE_NAME);
         return;
@@ -228,7 +228,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restore(const Poco::Util::Abs
 
         bool cleanup_s3 = information.source_path != disk->object_key_prefix;
         for (const auto & root : data_roots)
-            if (disk->exists(root))
+            if (disk->existsDirectory(root))
                 disk->removeSharedRecursive(root + '/', !cleanup_s3, {});
 
         LOG_INFO(disk->log, "Old metadata removed, restoring new one");
@@ -326,7 +326,7 @@ static std::tuple<UInt64, String> extractRevisionAndOperationFromKey(const Strin
 
 void DiskObjectStorageRemoteMetadataRestoreHelper::moveRecursiveOrRemove(const String & from_path, const String & to_path, bool send_metadata)
 {
-    if (disk->exists(to_path))
+    if (disk->existsFileOrDirectory(to_path))
     {
         if (send_metadata)
         {
@@ -337,7 +337,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::moveRecursiveOrRemove(const S
             };
             createFileOperationObject("rename", revision, object_metadata);
         }
-        if (disk->isDirectory(from_path))
+        if (disk->existsDirectory(from_path))
         {
             for (auto it = disk->iterateDirectory(from_path); it->isValid(); it->next())
                 moveRecursiveOrRemove(it->path(), fs::path(to_path) / it->name(), false);
@@ -490,13 +490,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
             {
                 auto from_path = object_attributes["from_path"];
                 auto to_path = object_attributes["to_path"];
-                if (disk->exists(from_path))
+                if (disk->existsFileOrDirectory(from_path))
                 {
                     moveRecursiveOrRemove(from_path, to_path, send_metadata);
 
                     LOG_TRACE(disk->log, "Revision {}. Restored rename {} -> {}", revision, from_path, to_path);
 
-                    if (restore_information.detached && disk->isDirectory(to_path))
+                    if (restore_information.detached && disk->existsDirectory(to_path))
                     {
                         /// Sometimes directory paths are passed without trailing '/'. We should keep them in one consistent way.
                         if (!from_path.ends_with('/'))
@@ -517,7 +517,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
             {
                 auto src_path = object_attributes["src_path"];
                 auto dst_path = object_attributes["dst_path"];
-                if (disk->exists(src_path))
+                if (disk->existsFile(src_path))
                 {
                     disk->createDirectories(directoryPath(dst_path));
                     disk->createHardLink(src_path, dst_path, send_metadata);
@@ -564,7 +564,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
                 to_path /= from_path.filename();
 
             /// to_path may exist and non-empty in case for example abrupt restart, so remove it before rename
-            if (disk->metadata_storage->exists(to_path))
+            if (disk->metadata_storage->existsFileOrDirectory(to_path))
                 tx->removeRecursive(to_path);
 
             disk->createDirectories(directoryPath(to_path));
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index 880911b9958..64323fb6f3c 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -22,7 +22,6 @@ namespace ErrorCodes
     extern const int CANNOT_READ_ALL_DATA;
     extern const int CANNOT_OPEN_FILE;
     extern const int FILE_DOESNT_EXIST;
-    extern const int BAD_FILE_TYPE;
     extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
     extern const int LOGICAL_ERROR;
 }
@@ -126,17 +125,14 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation
 
     void execute(MetadataTransactionPtr tx) override
     {
-        if (!metadata_storage.exists(path))
+        if (!metadata_storage.existsFile(path))
         {
             if (if_exists)
                 return;
 
-            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path);
+            throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist or isn't a regular file", path);
         }
 
-        if (!metadata_storage.isFile(path))
-            throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path);
-
         try
         {
             auto objects = metadata_storage.getStorageObjects(path);
@@ -211,17 +207,14 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
     {
         for (const auto & [path, if_exists] : remove_paths)
         {
-            if (!metadata_storage.exists(path))
+            if (!metadata_storage.existsFile(path))
             {
                 if (if_exists)
                     continue;
 
-                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist", path);
+                throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Metadata path '{}' doesn't exist or isn't a regular file", path);
             }
 
-            if (!metadata_storage.isFile(path))
-                throw Exception(ErrorCodes::BAD_FILE_TYPE, "Path '{}' is not a regular file", path);
-
             try
             {
                 auto objects = metadata_storage.getStorageObjects(path);
@@ -318,7 +311,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
     {
         checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
 
-        if (metadata_storage.isFile(path_to_remove))
+        if (metadata_storage.existsFile(path_to_remove))
         {
             try
             {
@@ -367,7 +360,7 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
     void execute(MetadataTransactionPtr tx) override
     {
         /// Similar to DiskLocal and https://en.cppreference.com/w/cpp/filesystem/remove
-        if (metadata_storage.exists(path))
+        if (metadata_storage.existsFileOrDirectory(path))
             removeMetadataRecursive(tx, path);
     }
 
@@ -433,7 +426,7 @@ struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperat
 
     void execute(MetadataTransactionPtr tx) override
     {
-        if (metadata_storage.exists(path_to))
+        if (metadata_storage.existsFile(path_to))
         {
             objects_to_remove = metadata_storage.getStorageObjects(path_to);
             tx->replaceFile(path_from, path_to);
@@ -583,13 +576,8 @@ struct TruncateFileObjectStorageOperation final : public IDiskObjectStorageOpera
 
     void execute(MetadataTransactionPtr tx) override
     {
-        if (metadata_storage.exists(path))
-        {
-            if (!metadata_storage.isFile(path))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Path {} is not a file", path);
-
+        if (metadata_storage.existsFile(path))
             truncate_outcome = tx->truncateFile(path, size);
-        }
     }
 
     void undo() override
@@ -663,7 +651,7 @@ void DiskObjectStorageTransaction::clearDirectory(const std::string & path)
 {
     for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next())
     {
-        if (metadata_storage.isFile(it->path()))
+        if (metadata_storage.existsFile(it->path()))
             removeFile(it->path());
     }
 }
@@ -769,7 +757,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
             {
                 /// Otherwise we will produce lost blobs which nobody points to
                 /// WriteOnce storages are not affected by the issue
-                if (!tx->object_storage.isPlain() && tx->metadata_storage.exists(path))
+                if (!tx->object_storage.isPlain() && tx->metadata_storage.existsFile(path))
                     tx->object_storage.removeObjectsIfExist(tx->metadata_storage.getStorageObjects(path));
 
                 tx->metadata_transaction->createMetadataFile(path, key_, count);
@@ -802,7 +790,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile
                 {
                     /// Otherwise we will produce lost blobs which nobody points to
                     /// WriteOnce storages are not affected by the issue
-                    if (!object_storage_tx->object_storage.isPlain() && object_storage_tx->metadata_storage.exists(path))
+                    if (!object_storage_tx->object_storage.isPlain() && object_storage_tx->metadata_storage.existsFile(path))
                     {
                         object_storage_tx->object_storage.removeObjectsIfExist(object_storage_tx->metadata_storage.getStorageObjects(path));
                     }
@@ -876,7 +864,7 @@ void DiskObjectStorageTransaction::writeFileUsingBlobWritingFunction(
     {
         /// Otherwise we will produce lost blobs which nobody points to
         /// WriteOnce storages are not affected by the issue
-        if (!object_storage.isPlain() && metadata_storage.exists(path))
+        if (!object_storage.isPlain() && metadata_storage.existsFile(path))
             object_storage.removeObjectsIfExist(metadata_storage.getStorageObjects(path));
 
         metadata_transaction->createMetadataFile(path, std::move(object_key), object_size);
diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h
index bed24849ed6..bc3d1ff7849 100644
--- a/src/Disks/ObjectStorages/IMetadataStorage.h
+++ b/src/Disks/ObjectStorages/IMetadataStorage.h
@@ -182,14 +182,19 @@ public:
 
     /// ==== General purpose methods. Define properties of object storage file based on metadata files ====
 
-    virtual bool exists(const std::string & path) const = 0;
-
-    virtual bool isFile(const std::string & path) const = 0;
-
-    virtual bool isDirectory(const std::string & path) const = 0;
+    virtual bool existsFile(const std::string & path) const = 0;
+    virtual bool existsDirectory(const std::string & path) const = 0;
+    virtual bool existsFileOrDirectory(const std::string & path) const = 0;
 
     virtual uint64_t getFileSize(const std::string & path) const = 0;
 
+    virtual std::optional<uint64_t> getFileSizeIfExists(const std::string & path) const
+    {
+        if (existsFile(path))
+            return getFileSize(path);
+        return std::nullopt;
+    }
+
     virtual Poco::Timestamp getLastModified(const std::string & path) const = 0;
 
     virtual time_t getLastChanged(const std::string & /* path */) const
@@ -242,6 +247,13 @@ public:
     /// object_storage_path is absolute.
     virtual StoredObjects getStorageObjects(const std::string & path) const = 0;
 
+    virtual std::optional<StoredObjects> getStorageObjectsIfExist(const std::string & path) const
+    {
+        if (existsFile(path))
+            return getStorageObjects(path);
+        return std::nullopt;
+    }
+
 protected:
     [[noreturn]] static void throwNotImplemented()
     {
diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
index d371f862334..0fdd927a4a8 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp
@@ -116,7 +116,7 @@ void registerPlainMetadataStorage(MetadataStorageFactory & factory)
         ObjectStoragePtr object_storage) -> MetadataStoragePtr
     {
         auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix);
-        return std::make_shared<MetadataStorageFromPlainObjectStorage>(object_storage, key_compatibility_prefix);
+        return std::make_shared<MetadataStorageFromPlainObjectStorage>(object_storage, key_compatibility_prefix, config.getUInt64(config_prefix + ".file_sizes_cache_size", 0));
     });
 }
 
@@ -130,7 +130,7 @@ void registerPlainRewritableMetadataStorage(MetadataStorageFactory & factory)
            ObjectStoragePtr object_storage) -> MetadataStoragePtr
         {
             auto key_compatibility_prefix = getObjectKeyCompatiblePrefix(*object_storage, config, config_prefix);
-            return std::make_shared<MetadataStorageFromPlainRewritableObjectStorage>(object_storage, key_compatibility_prefix);
+            return std::make_shared<MetadataStorageFromPlainRewritableObjectStorage>(object_storage, key_compatibility_prefix, config.getUInt64(config_prefix + ".file_sizes_cache_size", 0));
         });
 }
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
index e5240334fa8..7e7ca4e6981 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp
@@ -20,19 +20,19 @@ const std::string & MetadataStorageFromDisk::getPath() const
     return disk->getPath();
 }
 
-bool MetadataStorageFromDisk::exists(const std::string & path) const
+bool MetadataStorageFromDisk::existsFile(const std::string & path) const
 {
-    return disk->exists(path);
+    return disk->existsFile(path);
 }
 
-bool MetadataStorageFromDisk::isFile(const std::string & path) const
+bool MetadataStorageFromDisk::existsDirectory(const std::string & path) const
 {
-    return disk->isFile(path);
+    return disk->existsDirectory(path);
 }
 
-bool MetadataStorageFromDisk::isDirectory(const std::string & path) const
+bool MetadataStorageFromDisk::existsFileOrDirectory(const std::string & path) const
 {
-    return disk->isDirectory(path);
+    return disk->existsFileOrDirectory(path);
 }
 
 Poco::Timestamp MetadataStorageFromDisk::getLastModified(const std::string & path) const
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
index 8096b3b4565..5d56580a57b 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h
@@ -15,7 +15,7 @@ namespace DB
 struct UnlinkMetadataFileOperationOutcome;
 using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr<UnlinkMetadataFileOperationOutcome>;
 
-/// Store metadata on a separate disk
+/// Stores metadata on a separate disk
 /// (used for object storages, like S3 and related).
 class MetadataStorageFromDisk final : public IMetadataStorage
 {
@@ -35,11 +35,9 @@ public:
 
     MetadataStorageType getType() const override { return MetadataStorageType::Local; }
 
-    bool exists(const std::string & path) const override;
-
-    bool isFile(const std::string & path) const override;
-
-    bool isDirectory(const std::string & path) const override;
+    bool existsFile(const std::string & path) const override;
+    bool existsDirectory(const std::string & path) const override;
+    bool existsFileOrDirectory(const std::string & path) const override;
 
     uint64_t getFileSize(const String & path) const override;
 
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
index 935d126f3b8..83248004339 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp
@@ -109,7 +109,7 @@ void CreateDirectoryRecursiveOperation::execute(std::unique_lock<SharedMutex> &)
 {
     namespace fs = std::filesystem;
     fs::path p(path);
-    while (!disk.exists(p))
+    while (!disk.existsFileOrDirectory(p))
     {
         paths_created.push_back(p);
         if (!p.has_parent_path())
@@ -151,26 +151,26 @@ RemoveRecursiveOperation::RemoveRecursiveOperation(const std::string & path_, ID
 
 void RemoveRecursiveOperation::execute(std::unique_lock<SharedMutex> &)
 {
-    if (disk.isFile(path))
+    if (disk.existsFile(path))
         disk.moveFile(path, temp_path);
-    else if (disk.isDirectory(path))
+    else if (disk.existsDirectory(path))
         disk.moveDirectory(path, temp_path);
 }
 
 void RemoveRecursiveOperation::undo(std::unique_lock<SharedMutex> &)
 {
-    if (disk.isFile(temp_path))
+    if (disk.existsFile(temp_path))
         disk.moveFile(temp_path, path);
-    else if (disk.isDirectory(temp_path))
+    else if (disk.existsDirectory(temp_path))
         disk.moveDirectory(temp_path, path);
 }
 
 void RemoveRecursiveOperation::finalize()
 {
-    if (disk.exists(temp_path))
+    if (disk.existsFileOrDirectory(temp_path))
         disk.removeRecursive(temp_path);
 
-    if (disk.exists(path))
+    if (disk.existsFileOrDirectory(path))
         disk.removeRecursive(path);
 }
 
@@ -246,7 +246,7 @@ ReplaceFileOperation::ReplaceFileOperation(const std::string & path_from_, const
 
 void ReplaceFileOperation::execute(std::unique_lock<SharedMutex> &)
 {
-    if (disk.exists(path_to))
+    if (disk.existsFile(path_to))
         disk.moveFile(path_to, temp_path_to);
 
     disk.replaceFile(path_from, path_to);
@@ -272,10 +272,9 @@ WriteFileOperation::WriteFileOperation(const std::string & path_, IDisk & disk_,
 
 void WriteFileOperation::execute(std::unique_lock<SharedMutex> &)
 {
-    if (disk.exists(path))
+    if (auto buf = disk.readFileIfExists(path, ReadSettings{}))
     {
         existed = true;
-        auto buf = disk.readFile(path, ReadSettings{});
         readStringUntilEOF(prev_data, *buf);
     }
     auto buf = disk.writeFile(path);
@@ -299,7 +298,7 @@ void WriteFileOperation::undo(std::unique_lock<SharedMutex> &)
 void AddBlobOperation::execute(std::unique_lock<SharedMutex> & metadata_lock)
 {
     DiskObjectStorageMetadataPtr metadata;
-    if (metadata_storage.exists(path))
+    if (metadata_storage.existsFile(path))
         metadata = metadata_storage.readMetadataUnlocked(path, metadata_lock);
     else
         metadata = std::make_unique<DiskObjectStorageMetadata>(disk.getPath(), path);
@@ -351,7 +350,7 @@ void UnlinkMetadataFileOperation::undo(std::unique_lock<SharedMutex> & lock)
 
 void TruncateMetadataFileOperation::execute(std::unique_lock<SharedMutex> & metadata_lock)
 {
-    if (metadata_storage.exists(path))
+    if (metadata_storage.existsFile(path))
     {
         auto metadata = metadata_storage.readMetadataUnlocked(path, metadata_lock);
         while (metadata->getTotalSizeBytes() > target_size)
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
index 2036208c389..ea5d46041f5 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp
@@ -3,6 +3,7 @@
 #include <Disks/ObjectStorages/InMemoryPathMap.h>
 #include <Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.h>
 #include <Disks/ObjectStorages/StaticDirectoryIterator.h>
+#include <Disks/ObjectStorages/StoredObject.h>
 
 #include <Common/filesystemHelpers.h>
 
@@ -10,9 +11,15 @@
 #include <tuple>
 #include <unordered_set>
 
+
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int FILE_DOESNT_EXIST;
+}
+
 namespace
 {
 
@@ -23,10 +30,12 @@ std::filesystem::path normalizeDirectoryPath(const std::filesystem::path & path)
 
 }
 
-MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_)
+MetadataStorageFromPlainObjectStorage::MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_, size_t file_sizes_cache_size)
     : object_storage(object_storage_)
     , storage_path_prefix(std::move(storage_path_prefix_))
 {
+    if (file_sizes_cache_size)
+        file_sizes_cache.emplace(file_sizes_cache_size);
 }
 
 MetadataTransactionPtr MetadataStorageFromPlainObjectStorage::createTransaction()
@@ -39,35 +48,61 @@ const std::string & MetadataStorageFromPlainObjectStorage::getPath() const
     return storage_path_prefix;
 }
 
-bool MetadataStorageFromPlainObjectStorage::exists(const std::string & path) const
+bool MetadataStorageFromPlainObjectStorage::existsFile(const std::string & path) const
 {
-    /// NOTE: exists() cannot be used here since it works only for existing
-    /// key, and does not work for some intermediate path.
-    auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
-    return object_storage->existsOrHasAnyChild(object_key.serialize());
+    ObjectStorageKey object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
+    StoredObject object(object_key.serialize(), path);
+    if (!object_storage->exists(object))
+        return false;
+
+    /// The path does not correspond to a directory.
+    auto directory = std::filesystem::path(object_key.serialize()) / "";
+    return !object_storage->existsOrHasAnyChild(directory);
 }
 
-bool MetadataStorageFromPlainObjectStorage::isFile(const std::string & path) const
-{
-    /// NOTE: This check is inaccurate and has excessive API calls
-    return exists(path) && !isDirectory(path);
-}
-
-bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path) const
+bool MetadataStorageFromPlainObjectStorage::existsDirectory(const std::string & path) const
 {
     auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize();
     auto directory = std::filesystem::path(std::move(key_prefix)) / "";
-
     return object_storage->existsOrHasAnyChild(directory);
 }
 
+bool MetadataStorageFromPlainObjectStorage::existsFileOrDirectory(const std::string & path) const
+{
+    /// NOTE: exists() cannot be used here since it works only for existing
+    /// key, and does not work for some intermediate path.
+    auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize();
+    return object_storage->existsOrHasAnyChild(key_prefix);
+}
+
+
 uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const
 {
-    auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
-    auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize());
-    if (metadata)
-        return metadata->size_bytes;
-    return 0;
+    if (auto res = getFileSizeIfExists(path))
+        return *res;
+    throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} does not exist on plain object storage", path);
+}
+
+std::optional<uint64_t> MetadataStorageFromPlainObjectStorage::getFileSizeIfExists(const String & path) const
+{
+    auto get = [&] -> std::shared_ptr<uint64_t>
+    {
+        auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
+        auto metadata = object_storage->tryGetObjectMetadata(object_key.serialize());
+        if (metadata)
+            return std::make_shared<uint64_t>(metadata->size_bytes);
+        return nullptr;
+    };
+
+    std::shared_ptr<uint64_t> res;
+    if (file_sizes_cache)
+        res = file_sizes_cache->getOrSet(path, get).first;
+    else
+        res = get();
+
+    if (res)
+        return *res;
+    return std::nullopt;
 }
 
 std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(const std::string & path) const
@@ -75,18 +110,18 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::listDirectory(co
     auto key_prefix = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */).serialize();
 
     RelativePathsWithMetadata files;
-    std::string abs_key = key_prefix;
-    if (!abs_key.ends_with('/'))
-        abs_key += '/';
+    std::string absolute_key = key_prefix;
+    if (!absolute_key.ends_with('/'))
+        absolute_key += '/';
 
-    object_storage->listObjects(abs_key, files, 0);
+    object_storage->listObjects(absolute_key, files, 0);
 
     std::unordered_set<std::string> result;
     for (const auto & elem : files)
     {
         const auto & p = elem->relative_path;
-        chassert(p.find(abs_key) == 0);
-        const auto child_pos = abs_key.size();
+        chassert(p.find(absolute_key) == 0);
+        const auto child_pos = absolute_key.size();
         /// string::npos is ok.
         const auto slash_pos = p.find('/', child_pos);
         if (slash_pos == std::string::npos)
@@ -114,6 +149,16 @@ StoredObjects MetadataStorageFromPlainObjectStorage::getStorageObjects(const std
     return {StoredObject(object_key.serialize(), path, object_size)};
 }
 
+std::optional<StoredObjects> MetadataStorageFromPlainObjectStorage::getStorageObjectsIfExist(const std::string & path) const
+{
+    if (auto object_size = getFileSizeIfExists(path))
+    {
+        auto object_key = object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */);
+        return StoredObjects{StoredObject(object_key.serialize(), path, *object_size)};
+    }
+    return std::nullopt;
+}
+
 const IMetadataStorage & MetadataStorageFromPlainObjectStorageTransaction::getStorageForNonTransactionalReads() const
 {
     return metadata_storage;
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
index 2aac7158bd5..131f2dc099c 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h
@@ -5,11 +5,13 @@
 #include <Disks/ObjectStorages/InMemoryPathMap.h>
 #include <Disks/ObjectStorages/MetadataOperationsHolder.h>
 #include <Disks/ObjectStorages/MetadataStorageTransactionState.h>
+#include <Common/CacheBase.h>
 
 #include <map>
 #include <string>
 #include <unordered_set>
 
+
 namespace DB
 {
 
@@ -31,6 +33,7 @@ class MetadataStorageFromPlainObjectStorage : public IMetadataStorage
 {
 private:
     friend class MetadataStorageFromPlainObjectStorageTransaction;
+    mutable std::optional<CacheBase<String, uint64_t>> file_sizes_cache;
 
 protected:
     ObjectStoragePtr object_storage;
@@ -39,7 +42,7 @@ protected:
     mutable SharedMutex metadata_mutex;
 
 public:
-    MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_);
+    MetadataStorageFromPlainObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_, size_t file_sizes_cache_size);
 
     MetadataTransactionPtr createTransaction() override;
 
@@ -47,13 +50,12 @@ public:
 
     MetadataStorageType getType() const override { return MetadataStorageType::Plain; }
 
-    bool exists(const std::string & path) const override;
-
-    bool isFile(const std::string & path) const override;
-
-    bool isDirectory(const std::string & path) const override;
+    bool existsFile(const std::string & path) const override;
+    bool existsDirectory(const std::string & path) const override;
+    bool existsFileOrDirectory(const std::string & path) const override;
 
     uint64_t getFileSize(const String & path) const override;
+    std::optional<uint64_t> getFileSizeIfExists(const String & path) const override;
 
     std::vector<std::string> listDirectory(const std::string & path) const override;
 
@@ -62,6 +64,7 @@ public:
     DiskPtr getDisk() const { return {}; }
 
     StoredObjects getStorageObjects(const std::string & path) const override;
+    std::optional<StoredObjects> getStorageObjectsIfExist(const std::string & path) const override;
 
     Poco::Timestamp getLastModified(const std::string & /* path */) const override
     {
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp
index ef12615b2d8..7fd0251e7bd 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.cpp
@@ -7,9 +7,8 @@
 #include <IO/ReadHelpers.h>
 #include <IO/S3Common.h>
 #include <IO/SharedThreadPools.h>
-#include "Common/SharedLockGuard.h"
-#include "Common/SharedMutex.h"
-#include <Common/ErrorCodes.h>
+#include <Common/SharedLockGuard.h>
+#include <Common/SharedMutex.h>
 #include <Common/logger_useful.h>
 #include "CommonPathPrefixKeyGenerator.h"
 
@@ -181,8 +180,8 @@ void getDirectChildrenOnDiskImpl(
 }
 
 MetadataStorageFromPlainRewritableObjectStorage::MetadataStorageFromPlainRewritableObjectStorage(
-    ObjectStoragePtr object_storage_, String storage_path_prefix_)
-    : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_)
+    ObjectStoragePtr object_storage_, String storage_path_prefix_, size_t file_sizes_cache_size)
+    : MetadataStorageFromPlainObjectStorage(object_storage_, storage_path_prefix_, file_sizes_cache_size)
     , metadata_key_prefix(DB::getMetadataKeyPrefix(object_storage))
     , path_map(loadPathPrefixMap(metadata_key_prefix, object_storage))
 {
@@ -211,9 +210,9 @@ MetadataStorageFromPlainRewritableObjectStorage::~MetadataStorageFromPlainRewrit
     CurrentMetrics::sub(metric, path_map->map.size());
 }
 
-bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string & path) const
+bool MetadataStorageFromPlainRewritableObjectStorage::existsFileOrDirectory(const std::string & path) const
 {
-    if (MetadataStorageFromPlainObjectStorage::exists(path))
+    if (MetadataStorageFromPlainObjectStorage::existsFileOrDirectory(path))
         return true;
 
     if (useSeparateLayoutForMetadata())
@@ -225,14 +224,19 @@ bool MetadataStorageFromPlainRewritableObjectStorage::exists(const std::string &
     return false;
 }
 
-bool MetadataStorageFromPlainRewritableObjectStorage::isDirectory(const std::string & path) const
+bool MetadataStorageFromPlainRewritableObjectStorage::existsFile(const std::string & path) const
+{
+    return MetadataStorageFromPlainObjectStorage::existsFile(path);
+}
+
+bool MetadataStorageFromPlainRewritableObjectStorage::existsDirectory(const std::string & path) const
 {
     if (useSeparateLayoutForMetadata())
     {
         auto directory = std::filesystem::path(object_storage->generateObjectKeyForPath(path, getMetadataKeyPrefix()).serialize()) / "";
         return object_storage->existsOrHasAnyChild(directory);
     }
-    return MetadataStorageFromPlainObjectStorage::isDirectory(path);
+    return MetadataStorageFromPlainObjectStorage::existsDirectory(path);
 }
 
 std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDirectory(const std::string & path) const
@@ -240,21 +244,12 @@ std::vector<std::string> MetadataStorageFromPlainRewritableObjectStorage::listDi
     auto key_prefix = object_storage->generateObjectKeyForPath(path, "" /* key_prefix */).serialize();
 
     RelativePathsWithMetadata files;
-    auto abs_key = std::filesystem::path(object_storage->getCommonKeyPrefix()) / key_prefix / "";
+    auto absolute_key = std::filesystem::path(object_storage->getCommonKeyPrefix()) / key_prefix / "";
 
-    object_storage->listObjects(abs_key, files, 0);
+    object_storage->listObjects(absolute_key, files, 0);
 
     std::unordered_set<std::string> directories;
-    getDirectChildrenOnDisk(abs_key, files, std::filesystem::path(path) / "", directories);
-    /// List empty directories that are identified by the `prefix.path` metadata files. This is required to, e.g., remove
-    /// metadata along with regular files.
-    if (useSeparateLayoutForMetadata())
-    {
-        auto metadata_key = std::filesystem::path(getMetadataKeyPrefix()) / key_prefix / "";
-        RelativePathsWithMetadata metadata_files;
-        object_storage->listObjects(metadata_key, metadata_files, 0);
-        getDirectChildrenOnDisk(metadata_key, metadata_files, std::filesystem::path(path) / "", directories);
-    }
+    getDirectChildrenOnDisk(absolute_key, files, std::filesystem::path(path) / "", directories);
 
     return std::vector<std::string>(std::make_move_iterator(directories.begin()), std::make_move_iterator(directories.end()));
 }
diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h
index 82d93e3e7ae..c76baf93245 100644
--- a/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h
+++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainRewritableObjectStorage.h
@@ -16,12 +16,13 @@ private:
     std::shared_ptr<InMemoryPathMap> path_map;
 
 public:
-    MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_);
+    MetadataStorageFromPlainRewritableObjectStorage(ObjectStoragePtr object_storage_, String storage_path_prefix_, size_t file_sizes_cache_size);
     ~MetadataStorageFromPlainRewritableObjectStorage() override;
 
     MetadataStorageType getType() const override { return MetadataStorageType::PlainRewritable; }
-    bool exists(const std::string & path) const override;
-    bool isDirectory(const std::string & path) const override;
+    bool existsFile(const std::string & path) const override;
+    bool existsDirectory(const std::string & path) const override;
+    bool existsFileOrDirectory(const std::string & path) const override;
     std::vector<std::string> listDirectory(const std::string & path) const override;
 
 protected:
diff --git a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
index dcea5964fc5..7ddf046797d 100644
--- a/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
+++ b/src/Disks/ObjectStorages/PlainRewritableObjectStorage.h
@@ -17,7 +17,7 @@ template <typename BaseObjectStorage>
 class PlainRewritableObjectStorage : public BaseObjectStorage
 {
 public:
-    template <class... Args>
+    template <typename... Args>
     explicit PlainRewritableObjectStorage(MetadataStorageMetrics && metadata_storage_metrics_, Args &&... args)
         : BaseObjectStorage(std::forward<Args>(args)...)
         , metadata_storage_metrics(std::move(metadata_storage_metrics_))
diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp
index f91c2afe416..490f9769b54 100644
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@@ -92,12 +92,26 @@ std::unique_ptr<S3::Client> getClient(
             "Region should be explicitly specified for directory buckets");
     }
 
+    const Settings & local_settings = context->getSettingsRef();
+
+    int s3_max_redirects = static_cast<int>(global_settings[Setting::s3_max_redirects]);
+    if (!for_disk_s3 && local_settings.isChanged("s3_max_redirects"))
+        s3_max_redirects = static_cast<int>(local_settings[Setting::s3_max_redirects]);
+
+    int s3_retry_attempts = static_cast<int>(global_settings[Setting::s3_retry_attempts]);
+    if (!for_disk_s3 && local_settings.isChanged("s3_retry_attempts"))
+        s3_retry_attempts = static_cast<int>(local_settings[Setting::s3_retry_attempts]);
+
+    bool enable_s3_requests_logging = global_settings[Setting::enable_s3_requests_logging];
+    if (!for_disk_s3 && local_settings.isChanged("enable_s3_requests_logging"))
+        enable_s3_requests_logging = local_settings[Setting::enable_s3_requests_logging];
+
     S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
         auth_settings.region,
         context->getRemoteHostFilter(),
-        static_cast<int>(global_settings[Setting::s3_max_redirects]),
-        static_cast<int>(global_settings[Setting::s3_retry_attempts]),
-        global_settings[Setting::enable_s3_requests_logging],
+        s3_max_redirects,
+        s3_retry_attempts,
+        enable_s3_requests_logging,
         for_disk_s3,
         request_settings.get_request_throttler,
         request_settings.put_request_throttler,
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
index 5ab9d3f3631..d44cd319b4c 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.cpp
@@ -32,14 +32,14 @@ const std::string & MetadataStorageFromStaticFilesWebServer::getPath() const
     return no_root;
 }
 
-bool MetadataStorageFromStaticFilesWebServer::exists(const std::string & path) const
+bool MetadataStorageFromStaticFilesWebServer::existsFileOrDirectory(const std::string & path) const
 {
     return object_storage.exists(path);
 }
 
 void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & path) const
 {
-    if (!exists(path))
+    if (!existsFileOrDirectory(path))
 #ifdef NDEBUG
         throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no path {}", path);
 #else
@@ -57,18 +57,16 @@ void MetadataStorageFromStaticFilesWebServer::assertExists(const std::string & p
 #endif
 }
 
-bool MetadataStorageFromStaticFilesWebServer::isFile(const std::string & path) const
+bool MetadataStorageFromStaticFilesWebServer::existsFile(const std::string & path) const
 {
-    assertExists(path);
-    auto file_info = object_storage.getFileInfo(path);
-    return file_info->type == WebObjectStorage::FileType::File;
+    auto file_info = object_storage.tryGetFileInfo(path);
+    return file_info && file_info->type == WebObjectStorage::FileType::File;
 }
 
-bool MetadataStorageFromStaticFilesWebServer::isDirectory(const std::string & path) const
+bool MetadataStorageFromStaticFilesWebServer::existsDirectory(const std::string & path) const
 {
-    assertExists(path);
-    auto file_info = object_storage.getFileInfo(path);
-    return file_info->type == WebObjectStorage::FileType::Directory;
+    auto file_info = object_storage.tryGetFileInfo(path);
+    return file_info && file_info->type == WebObjectStorage::FileType::Directory;
 }
 
 uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & path) const
@@ -78,6 +76,15 @@ uint64_t MetadataStorageFromStaticFilesWebServer::getFileSize(const String & pat
     return file_info->size;
 }
 
+std::optional<uint64_t> MetadataStorageFromStaticFilesWebServer::getFileSizeIfExists(const String & path) const
+{
+    auto file_info = object_storage.tryGetFileInfo(path);
+    if (file_info)
+        return file_info->size;
+    else
+        return std::nullopt;
+}
+
 StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const std::string & path) const
 {
     assertExists(path);
@@ -90,6 +97,17 @@ StoredObjects MetadataStorageFromStaticFilesWebServer::getStorageObjects(const s
     return {StoredObject(remote_path, path, file_info->size)};
 }
 
+std::optional<StoredObjects> MetadataStorageFromStaticFilesWebServer::getStorageObjectsIfExist(const std::string & path) const
+{
+    auto fs_path = fs::path(object_storage.url) / path;
+    std::string remote_path = fs_path.parent_path() / (escapeForFileName(fs_path.stem()) + fs_path.extension().string());
+    remote_path = remote_path.substr(object_storage.url.size());
+
+    if (auto file_info = object_storage.tryGetFileInfo(path))
+        return StoredObjects{StoredObject(remote_path, path, file_info->size)};
+    return std::nullopt;
+}
+
 std::vector<std::string> MetadataStorageFromStaticFilesWebServer::listDirectory(const std::string & path) const
 {
     std::vector<std::string> result;
@@ -106,7 +124,7 @@ DirectoryIteratorPtr MetadataStorageFromStaticFilesWebServer::iterateDirectory(c
 {
     std::vector<fs::path> dir_file_paths;
 
-    if (!exists(path))
+    if (!existsDirectory(path))
         return std::make_unique<StaticDirectoryIterator>(std::move(dir_file_paths));
 
     dir_file_paths = object_storage.listDirectory(path);
diff --git a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
index 35271d7192c..5eb4152d41b 100644
--- a/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
+++ b/src/Disks/ObjectStorages/Web/MetadataStorageFromStaticFilesWebServer.h
@@ -28,19 +28,19 @@ public:
 
     MetadataStorageType getType() const override { return MetadataStorageType::StaticWeb; }
 
-    bool exists(const std::string & path) const override;
-
-    bool isFile(const std::string & path) const override;
-
-    bool isDirectory(const std::string & path) const override;
+    bool existsFile(const std::string & path) const override;
+    bool existsDirectory(const std::string & path) const override;
+    bool existsFileOrDirectory(const std::string & path) const override;
 
     uint64_t getFileSize(const String & path) const override;
+    std::optional<uint64_t> getFileSizeIfExists(const String & path) const override;
 
     std::vector<std::string> listDirectory(const std::string & path) const override;
 
     DirectoryIteratorPtr iterateDirectory(const std::string & path) const override;
 
     StoredObjects getStorageObjects(const std::string & path) const override;
+    std::optional<StoredObjects> getStorageObjectsIfExist(const std::string & path) const override;
 
     struct stat stat(const String & /* path */) const override { return {}; }
 
diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp
index 9a6e562ff65..b6238eee99c 100644
--- a/src/Disks/TemporaryFileOnDisk.cpp
+++ b/src/Disks/TemporaryFileOnDisk.cpp
@@ -57,7 +57,7 @@ TemporaryFileOnDisk::~TemporaryFileOnDisk()
         if (!disk || relative_path.empty())
             return;
 
-        if (!disk->exists(relative_path))
+        if (!disk->existsFileOrDirectory(relative_path))
         {
             if (show_warning_if_removed)
                 LOG_WARNING(getLogger("TemporaryFileOnDisk"), "Temporary path '{}' does not exist in '{}'", relative_path, disk->getPath());
diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp
index 374dcc1d474..aed8e193a9d 100644
--- a/src/Disks/tests/gtest_disk.cpp
+++ b/src/Disks/tests/gtest_disk.cpp
@@ -33,10 +33,10 @@ public:
 TEST_F(DiskTest, createDirectories)
 {
     disk->createDirectories("test_dir1/");
-    EXPECT_TRUE(disk->isDirectory("test_dir1/"));
+    EXPECT_TRUE(disk->existsDirectory("test_dir1/"));
 
     disk->createDirectories("test_dir2/nested_dir/");
-    EXPECT_TRUE(disk->isDirectory("test_dir2/nested_dir/"));
+    EXPECT_TRUE(disk->existsDirectory("test_dir2/nested_dir/"));
 }
 
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index f2142b857cf..f5f1d616e0f 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -142,6 +142,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.custom.allow_variable_number_of_columns = settings[Setting::input_format_custom_allow_variable_number_of_columns];
     format_settings.date_time_input_format = settings[Setting::date_time_input_format];
     format_settings.date_time_output_format = settings[Setting::date_time_output_format];
+    format_settings.date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = settings[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
     format_settings.interval.output_format = settings[Setting::interval_output_format];
     format_settings.input_format_ipv4_default_on_conversion_error = settings[Setting::input_format_ipv4_default_on_conversion_error];
     format_settings.input_format_ipv6_default_on_conversion_error = settings[Setting::input_format_ipv6_default_on_conversion_error];
@@ -191,6 +192,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.parquet.case_insensitive_column_matching = settings[Setting::input_format_parquet_case_insensitive_column_matching];
     format_settings.parquet.preserve_order = settings[Setting::input_format_parquet_preserve_order];
     format_settings.parquet.filter_push_down = settings[Setting::input_format_parquet_filter_push_down];
+    format_settings.parquet.bloom_filter_push_down = settings[Setting::input_format_parquet_bloom_filter_push_down];
     format_settings.parquet.use_native_reader = settings[Setting::input_format_parquet_use_native_reader];
     format_settings.parquet.allow_missing_columns = settings[Setting::input_format_parquet_allow_missing_columns];
     format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference];
@@ -265,9 +267,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
     format_settings.orc.allow_missing_columns = settings[Setting::input_format_orc_allow_missing_columns];
     format_settings.orc.row_batch_size = settings[Setting::input_format_orc_row_batch_size];
     format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_orc_skip_columns_with_unsupported_types_in_schema_inference];
-    format_settings.orc.allow_missing_columns = settings[Setting::input_format_orc_allow_missing_columns];
-    format_settings.orc.row_batch_size = settings[Setting::input_format_orc_row_batch_size];
-    format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings[Setting::input_format_orc_skip_columns_with_unsupported_types_in_schema_inference];
+    format_settings.orc.dictionary_as_low_cardinality = settings[Setting::input_format_orc_dictionary_as_low_cardinality];
     format_settings.orc.case_insensitive_column_matching = settings[Setting::input_format_orc_case_insensitive_column_matching];
     format_settings.orc.output_string_as_string = settings[Setting::output_format_orc_string_as_string];
     format_settings.orc.output_compression_method = settings[Setting::output_format_orc_compression_method];
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 8f551f54e7f..b112649323d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -99,6 +99,8 @@ struct FormatSettings
         Saturate
     };
 
+    bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false;
+
     DateTimeOverflowBehavior date_time_overflow_behavior = DateTimeOverflowBehavior::Ignore;
 
     bool input_format_ipv4_default_on_conversion_error = false;
@@ -274,6 +276,7 @@ struct FormatSettings
         bool skip_columns_with_unsupported_types_in_schema_inference = false;
         bool case_insensitive_column_matching = false;
         bool filter_push_down = true;
+        bool bloom_filter_push_down = true;
         bool use_native_reader = false;
         std::unordered_set<int> skip_row_groups = {};
         bool output_string_as_string = false;
@@ -417,6 +420,7 @@ struct FormatSettings
         bool filter_push_down = true;
         UInt64 output_row_index_stride = 10'000;
         String reader_time_zone_name = "GMT";
+        bool dictionary_as_low_cardinality = true;
         double output_dictionary_key_size_threshold = 0.0;
     } orc{};
 
diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp
index 8da5b53253a..4dd24086466 100644
--- a/src/Formats/ProtobufSchemas.cpp
+++ b/src/Formats/ProtobufSchemas.cpp
@@ -110,7 +110,7 @@ private:
 };
 
 
-const google::protobuf::Descriptor *
+ProtobufSchemas::DescriptorHolder
 ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path)
 {
     std::lock_guard lock(mutex);
@@ -119,10 +119,10 @@ ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info, Wi
         it = importers
                  .emplace(
                      info.schemaDirectory(),
-                     std::make_unique<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
+                     std::make_shared<ImporterWithSourceTree>(info.schemaDirectory(), google_protos_path, with_envelope))
                  .first;
     auto * importer = it->second.get();
-    return importer->import(info.schemaPath(), info.messageName());
+    return DescriptorHolder(it->second, importer->import(info.schemaPath(), info.messageName()));
 }
 
 }
diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h
index 066f89d5176..378d53eb489 100644
--- a/src/Formats/ProtobufSchemas.h
+++ b/src/Formats/ProtobufSchemas.h
@@ -57,14 +57,31 @@ public:
     // Clear cached protobuf schemas
     void clear();
 
-    /// Parses the format schema, then parses the corresponding proto file, and returns the descriptor of the message type.
-    /// The function never returns nullptr, it throws an exception if it cannot load or parse the file.
-    const google::protobuf::Descriptor *
+    class ImporterWithSourceTree;
+    struct DescriptorHolder
+    {
+        DescriptorHolder(std::shared_ptr<ImporterWithSourceTree> importer_, const google::protobuf::Descriptor * message_descriptor_)
+            : importer(std::move(importer_))
+            , message_descriptor(message_descriptor_)
+        {}
+    private:
+        std::shared_ptr<ImporterWithSourceTree> importer;
+    public:
+        const google::protobuf::Descriptor * message_descriptor;
+    };
+
+    /// Parses the format schema, then parses the corresponding proto file, and
+    /// returns holder (since the descriptor only valid if
+    /// ImporterWithSourceTree is valid):
+    ///
+    ///     {ImporterWithSourceTree, protobuf::Descriptor - descriptor of the message type}.
+    ///
+    /// The function always return valid message descriptor, it throws an exception if it cannot load or parse the file.
+    DescriptorHolder
     getMessageTypeForFormatSchema(const FormatSchemaInfo & info, WithEnvelope with_envelope, const String & google_protos_path);
 
 private:
-    class ImporterWithSourceTree;
-    std::unordered_map<String, std::unique_ptr<ImporterWithSourceTree>> importers;
+    std::unordered_map<String, std::shared_ptr<ImporterWithSourceTree>> importers;
     std::mutex mutex;
 };
 
diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp
index 8aee284f42c..2224af6579f 100644
--- a/src/Formats/ProtobufSerializer.cpp
+++ b/src/Formats/ProtobufSerializer.cpp
@@ -3735,20 +3735,14 @@ namespace
         const google::protobuf::FieldDescriptor * field_descriptor,
         bool skip_unsupported_fields,
         bool allow_repeat,
-        std::unordered_set<const google::protobuf::FieldDescriptor *> & pending_resolution)
+        std::unordered_set<const google::protobuf::FieldDescriptor *> & unresolved_descriptors)
     {
-        if (pending_resolution.contains(field_descriptor))
-        {
-            if (skip_unsupported_fields)
-                return std::nullopt;
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouse doesn't support type recursion ({})", field_descriptor->full_name());
-        }
-        pending_resolution.emplace(field_descriptor);
-        SCOPE_EXIT({ pending_resolution.erase(field_descriptor); });
-
+        chassert(unresolved_descriptors.contains(field_descriptor));
         if (allow_repeat && field_descriptor->is_map())
         {
-            auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false);
+            /// We don't add the same unresolved descriptor again since we are trying to re-resolve and put in under a Tuple
+            auto name_and_type
+                = getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, false, unresolved_descriptors);
             if (!name_and_type)
                 return std::nullopt;
             const auto * tuple_type = assert_cast<const DataTypeTuple *>(name_and_type->type.get());
@@ -3757,7 +3751,9 @@ namespace
 
         if (allow_repeat && field_descriptor->is_repeated())
         {
-            auto name_and_type = getNameAndDataTypeFromField(field_descriptor, skip_unsupported_fields, false);
+            /// We don't add the same unresolved descriptor again since we are trying to re-resolve and put in under an Array
+            auto name_and_type
+                = getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, false, unresolved_descriptors);
             if (!name_and_type)
                 return std::nullopt;
             return NameAndTypePair{name_and_type->name, std::make_shared<DataTypeArray>(name_and_type->type)};
@@ -3822,10 +3818,21 @@ namespace
                 if (message_descriptor->field_count() == 1)
                 {
                     const auto * nested_field_descriptor = message_descriptor->field(0);
-                    auto nested_name_and_type
-                        = getNameAndDataTypeFromFieldRecursive(nested_field_descriptor, skip_unsupported_fields, true, pending_resolution);
+                    if (auto p = unresolved_descriptors.emplace(nested_field_descriptor); !p.second)
+                    {
+                        if (skip_unsupported_fields)
+                            return std::nullopt;
+                        throw Exception(
+                            ErrorCodes::BAD_ARGUMENTS,
+                            "ClickHouse doesn't support type recursion ({})",
+                            nested_field_descriptor->full_name());
+                    }
+
+                    auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive(
+                        nested_field_descriptor, skip_unsupported_fields, true, unresolved_descriptors);
                     if (!nested_name_and_type)
                         return std::nullopt;
+                    unresolved_descriptors.erase(nested_field_descriptor);
                     return NameAndTypePair{field_descriptor->name() + "_" + nested_name_and_type->name, nested_name_and_type->type};
                 }
 
@@ -3833,16 +3840,27 @@ namespace
                 Strings nested_names;
                 for (int i = 0; i != message_descriptor->field_count(); ++i)
                 {
+                    if (auto p = unresolved_descriptors.emplace(message_descriptor->field(i)); !p.second)
+                    {
+                        if (skip_unsupported_fields)
+                            continue;
+                        throw Exception(
+                            ErrorCodes::BAD_ARGUMENTS,
+                            "ClickHouse doesn't support type recursion ({})",
+                            message_descriptor->field(i)->full_name());
+                    }
                     auto nested_name_and_type = getNameAndDataTypeFromFieldRecursive(
-                        message_descriptor->field(i), skip_unsupported_fields, true, pending_resolution);
+                        message_descriptor->field(i), skip_unsupported_fields, true, unresolved_descriptors);
                     if (!nested_name_and_type)
                         continue;
+                    unresolved_descriptors.erase(message_descriptor->field(i));
                     nested_types.push_back(nested_name_and_type->type);
                     nested_names.push_back(nested_name_and_type->name);
                 }
 
                 if (nested_types.empty())
                     return std::nullopt;
+
                 return NameAndTypePair{
                     field_descriptor->name(), std::make_shared<DataTypeTuple>(std::move(nested_types), std::move(nested_names))};
             }
@@ -3855,8 +3873,9 @@ namespace
         const google::protobuf::FieldDescriptor * field_descriptor, bool skip_unsupported_fields, bool allow_repeat = true)
     {
         /// Keep track of the fields that are pending resolution to avoid recursive types, which are unsupported
-        std::unordered_set<const google::protobuf::FieldDescriptor *> pending_resolution{};
-        return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, pending_resolution);
+        std::unordered_set<const google::protobuf::FieldDescriptor *> unresolved_descriptors{};
+        unresolved_descriptors.emplace(field_descriptor);
+        return getNameAndDataTypeFromFieldRecursive(field_descriptor, skip_unsupported_fields, allow_repeat, unresolved_descriptors);
     }
 }
 
@@ -3864,26 +3883,32 @@ std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
     const Strings & column_names,
     const DataTypes & data_types,
     std::vector<size_t> & missing_column_indices,
-    const google::protobuf::Descriptor & message_descriptor,
+    const ProtobufSchemas::DescriptorHolder & descriptor,
     bool with_length_delimiter,
     bool with_envelope,
     bool flatten_google_wrappers,
     ProtobufReader & reader)
 {
-    return ProtobufSerializerBuilder(reader).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, flatten_google_wrappers);
+    return ProtobufSerializerBuilder(reader).buildMessageSerializer(
+        column_names, data_types, missing_column_indices,
+        *descriptor.message_descriptor,
+        with_length_delimiter, with_envelope, flatten_google_wrappers);
 }
 
 std::unique_ptr<ProtobufSerializer> ProtobufSerializer::create(
     const Strings & column_names,
     const DataTypes & data_types,
-    const google::protobuf::Descriptor & message_descriptor,
+    const ProtobufSchemas::DescriptorHolder & descriptor,
     bool with_length_delimiter,
     bool with_envelope,
     bool defaults_for_nullable_google_wrappers,
     ProtobufWriter & writer)
 {
     std::vector<size_t> missing_column_indices;
-    return ProtobufSerializerBuilder(writer).buildMessageSerializer(column_names, data_types, missing_column_indices, message_descriptor, with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
+    return ProtobufSerializerBuilder(writer).buildMessageSerializer(
+        column_names, data_types, missing_column_indices,
+        *descriptor.message_descriptor,
+        with_length_delimiter, with_envelope, defaults_for_nullable_google_wrappers);
 }
 
 NamesAndTypesList protobufSchemaToCHSchema(const google::protobuf::Descriptor * message_descriptor, bool skip_unsupported_fields)
diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h
index d50f7e4956e..f40deb0c6d1 100644
--- a/src/Formats/ProtobufSerializer.h
+++ b/src/Formats/ProtobufSerializer.h
@@ -4,7 +4,8 @@
 
 #if USE_PROTOBUF
 #   include <Columns/IColumn.h>
-#include <Core/NamesAndTypes.h>
+#   include <Core/NamesAndTypes.h>
+#   include <Formats/ProtobufSchemas.h>
 
 
 namespace google::protobuf { class Descriptor; }
@@ -39,7 +40,7 @@ public:
         const Strings & column_names,
         const DataTypes & data_types,
         std::vector<size_t> & missing_column_indices,
-        const google::protobuf::Descriptor & message_descriptor,
+        const ProtobufSchemas::DescriptorHolder & descriptor,
         bool with_length_delimiter,
         bool with_envelope,
         bool flatten_google_wrappers,
@@ -48,7 +49,7 @@ public:
     static std::unique_ptr<ProtobufSerializer> create(
         const Strings & column_names,
         const DataTypes & data_types,
-        const google::protobuf::Descriptor & message_descriptor,
+        const ProtobufSchemas::DescriptorHolder & descriptor,
         bool with_length_delimiter,
         bool with_envelope,
         bool defaults_for_nullable_google_wrappers,
diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp
index 615589a7d43..0f6311c9716 100644
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@@ -83,6 +83,7 @@ namespace Setting
     extern const SettingsBool input_format_ipv4_default_on_conversion_error;
     extern const SettingsBool input_format_ipv6_default_on_conversion_error;
     extern const SettingsBool precise_float_parsing;
+    extern const SettingsBool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands;
 }
 
 namespace ErrorCodes
@@ -1397,10 +1398,19 @@ struct ConvertImpl
                 offsets_to.resize(size);
 
                 WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
-                const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
+                const FromDataType & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
 
                 ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column);
 
+                bool cut_trailing_zeros_align_to_groups_of_thousands = false;
+                if (DB::CurrentThread::isInitialized())
+                {
+                    const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext();
+
+                    if (query_context)
+                        cut_trailing_zeros_align_to_groups_of_thousands = query_context->getSettingsRef()[Setting::date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands];
+                }
+
                 if (!null_map && arguments.size() > 1)
                     null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst());
 
@@ -1415,7 +1425,18 @@ struct ConvertImpl
                             else
                                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
                         }
-                        bool is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
+                        bool is_ok = true;
+                        if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
+                        {
+                            if (cut_trailing_zeros_align_to_groups_of_thousands)
+                                writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
+                            else
+                                is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
+                        }
+                        else
+                        {
+                            is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
+                        }
                         null_map->getData()[i] |= !is_ok;
                         writeChar(0, write_buffer);
                         offsets_to[i] = write_buffer.count();
@@ -1432,7 +1453,17 @@ struct ConvertImpl
                             else
                                 throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty");
                         }
-                        FormatImpl<FromDataType>::template execute<void>(vec_from[i], write_buffer, &type, time_zone);
+                        if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
+                        {
+                            if (cut_trailing_zeros_align_to_groups_of_thousands)
+                                writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64(vec_from[i]), type.getScale(), write_buffer, *time_zone);
+                            else
+                                FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
+                        }
+                        else
+                        {
+                            FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
+                        }
                         writeChar(0, write_buffer);
                         offsets_to[i] = write_buffer.count();
                     }
diff --git a/src/Functions/array/FunctionsMapMiscellaneous.cpp b/src/Functions/array/FunctionsMapMiscellaneous.cpp
index c3586a57161..368c0ad620f 100644
--- a/src/Functions/array/FunctionsMapMiscellaneous.cpp
+++ b/src/Functions/array/FunctionsMapMiscellaneous.cpp
@@ -349,14 +349,19 @@ struct MapKeyLikeAdapter
     }
 };
 
+struct FunctionIdentityMap : public FunctionIdentity
+{
+    bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
+};
+
 struct NameMapConcat { static constexpr auto name = "mapConcat"; };
 using FunctionMapConcat = FunctionMapToArrayAdapter<FunctionArrayConcat, MapToNestedAdapter<NameMapConcat>, NameMapConcat>;
 
 struct NameMapKeys { static constexpr auto name = "mapKeys"; };
-using FunctionMapKeys = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapKeys>;
+using FunctionMapKeys = FunctionMapToArrayAdapter<FunctionIdentityMap, MapToSubcolumnAdapter<NameMapKeys, 0>, NameMapKeys>;
 
 struct NameMapValues { static constexpr auto name = "mapValues"; };
-using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentity, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
+using FunctionMapValues = FunctionMapToArrayAdapter<FunctionIdentityMap, MapToSubcolumnAdapter<NameMapValues, 1>, NameMapValues>;
 
 struct NameMapContains { static constexpr auto name = "mapContains"; };
 using FunctionMapContains = FunctionMapToArrayAdapter<FunctionArrayIndex<HasAction, NameMapContains>, MapToSubcolumnAdapter<NameMapContains, 0>, NameMapContains>;
diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp
index fcd96e97b4e..b2cccfe92aa 100644
--- a/src/IO/HTTPCommon.cpp
+++ b/src/IO/HTTPCommon.cpp
@@ -84,11 +84,9 @@ void assertResponseIsOk(const String & uri, Poco::Net::HTTPResponse & response,
             ? ErrorCodes::RECEIVED_ERROR_TOO_MANY_REQUESTS
             : ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
 
-        std::stringstream body; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-        body.exceptions(std::ios::failbit);
-        body << istr.rdbuf();
-
-        throw HTTPException(code, uri, status, response.getReason(), body.str());
+        istr.seekg(0, std::ios::end);
+        size_t body_length = istr.tellg();
+        throw HTTPException(code, uri, status, response.getReason(), body_length);
     }
 }
 
@@ -97,13 +95,13 @@ Exception HTTPException::makeExceptionMessage(
     const std::string & uri,
     Poco::Net::HTTPResponse::HTTPStatus http_status,
     const std::string & reason,
-    const std::string & body)
+    size_t body_length)
 {
     return Exception(code,
         "Received error from remote server {}. "
-        "HTTP status code: {} {}, "
-        "body: {}",
-        uri, static_cast<int>(http_status), reason, body);
+        "HTTP status code: {} '{}', "
+        "body length: {} bytes",
+        uri, static_cast<int>(http_status), reason, body_length);
 }
 
 }
diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h
index 4d0580acaba..309aebd4b9f 100644
--- a/src/IO/HTTPCommon.h
+++ b/src/IO/HTTPCommon.h
@@ -27,9 +27,9 @@ public:
         const std::string & uri,
         Poco::Net::HTTPResponse::HTTPStatus http_status_,
         const std::string & reason,
-        const std::string & body
+        size_t body_length = 0
     )
-        : Exception(makeExceptionMessage(code, uri, http_status_, reason, body))
+        : Exception(makeExceptionMessage(code, uri, http_status_, reason, body_length))
         , http_status(http_status_)
     {}
 
@@ -46,7 +46,7 @@ private:
         const std::string & uri,
         Poco::Net::HTTPResponse::HTTPStatus http_status,
         const std::string & reason,
-        const std::string & body);
+        size_t body_length);
 
     const char * name() const noexcept override { return "DB::HTTPException"; }
     const char * className() const noexcept override { return "DB::HTTPException"; }
diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp
index eb2d5128454..3bad6a80786 100644
--- a/src/IO/ReadWriteBufferFromHTTP.cpp
+++ b/src/IO/ReadWriteBufferFromHTTP.cpp
@@ -423,8 +423,7 @@ std::unique_ptr<ReadBuffer> ReadWriteBufferFromHTTP::initialize()
                     ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
                     current_uri.toString(),
                     Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE,
-                    reason,
-                    "");
+                    reason);
             }
             throw Exception(
                 ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
@@ -549,8 +548,7 @@ size_t ReadWriteBufferFromHTTP::readBigAt(char * to, size_t n, size_t offset, co
                     ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
                     current_uri.toString(),
                     Poco::Net::HTTPResponse::HTTP_REQUESTED_RANGE_NOT_SATISFIABLE,
-                    reason,
-                    "");
+                    reason);
             }
 
             copyFromIStreamWithProgressCallback(*result.response_stream, to, n, progress_callback, &bytes_copied, &is_canceled);
diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h
index a890248b3f4..27363093549 100644
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@@ -811,7 +811,7 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf)
 void writeIPv4Text(const IPv4 & ip, WriteBuffer & buf);
 void writeIPv6Text(const IPv6 & ip, WriteBuffer & buf);
 
-template <typename DecimalType>
+template <typename DecimalType, bool cut_trailing_zeros_align_to_groups_of_thousands = false>
 inline void writeDateTime64FractionalText(typename DecimalType::NativeType fractional, UInt32 scale, WriteBuffer & buf)
 {
     static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DecimalType>;
@@ -822,7 +822,23 @@ inline void writeDateTime64FractionalText(typename DecimalType::NativeType fract
     for (Int32 pos = scale - 1; pos >= 0 && fractional; --pos, fractional /= DateTime64(10))
         data[pos] += fractional % DateTime64(10);
 
-    writeString(&data[0], static_cast<size_t>(scale), buf);
+    if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
+    {
+        UInt32 last_none_zero_pos = 0;
+        for (UInt32 pos = 0; pos < scale; ++pos)
+        {
+            if (data[pos] != '0')
+            {
+                last_none_zero_pos = pos;
+            }
+        }
+        size_t new_scale = (last_none_zero_pos >= 3 ? 6 : 3);
+        writeString(&data[0], new_scale, buf);
+    }
+    else
+    {
+        writeString(&data[0], static_cast<size_t>(scale), buf);
+    }
 }
 
 static const char digits100[201] =
@@ -935,7 +951,12 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
 }
 
 /// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone.
-template <char date_delimeter = '-', char time_delimeter = ':', char between_date_time_delimiter = ' ', char fractional_time_delimiter = '.'>
+template <
+    char date_delimeter = '-',
+    char time_delimeter = ':',
+    char between_date_time_delimiter = ' ',
+    char fractional_time_delimiter = '.',
+    bool cut_trailing_zeros_align_to_groups_of_thousands = false>
 inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
 {
     static constexpr UInt32 MaxScale = DecimalUtils::max_precision<DateTime64>;
@@ -960,12 +981,27 @@ inline void writeDateTimeText(DateTime64 datetime64, UInt32 scale, WriteBuffer &
     }
 
     writeDateTimeText<date_delimeter, time_delimeter, between_date_time_delimiter>(LocalDateTime(components.whole, time_zone), buf);
-
-    if (scale > 0)
+    if constexpr (cut_trailing_zeros_align_to_groups_of_thousands)
     {
-        buf.write(fractional_time_delimiter);
-        writeDateTime64FractionalText<DateTime64>(components.fractional, scale, buf);
+        if (scale > 0 && components.fractional != 0)
+        {
+            buf.write(fractional_time_delimiter);
+            writeDateTime64FractionalText<DateTime64, true>(components.fractional, scale, buf);
+        }
     }
+    else
+    {
+        if (scale > 0)
+        {
+            buf.write(fractional_time_delimiter);
+            writeDateTime64FractionalText<DateTime64, false>(components.fractional, scale, buf);
+        }
+    }
+}
+
+inline void writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(DateTime64 datetime64, UInt32 scale, WriteBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance())
+{
+    writeDateTimeText<'-', ':', ' ', '.', true>(datetime64, scale, buf, time_zone);
 }
 
 /// In the RFC 1123 format: "Tue, 03 Dec 2019 00:11:50 GMT". You must provide GMT DateLUT.
diff --git a/src/IO/tests/gtest_DateTimeToString.cpp b/src/IO/tests/gtest_DateTimeToString.cpp
index 078ca655d95..0b66959cf43 100644
--- a/src/IO/tests/gtest_DateTimeToString.cpp
+++ b/src/IO/tests/gtest_DateTimeToString.cpp
@@ -78,7 +78,7 @@ TEST(DateTimeToStringTest, RFC1123)
     ASSERT_EQ(out.str(), "Fri, 18 Mar 2005 01:58:31 GMT");
 }
 
-template <typename ValueType>
+template <typename ValueType, bool date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = false>
 class DateTimeToStringParamTestBase : public ::testing::TestWithParam<DateTimeToStringParamTestCase<ValueType>>
 {
 public:
@@ -99,7 +99,10 @@ public:
         }
         else if constexpr (std::is_same_v<ValueType, DateTime64WithScale>)
         {
-            writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
+            if constexpr (date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands)
+                writeDateTimeTextCutTrailingZerosAlignToGroupOfThousands(input.value, input.scale, out, DateLUT::instance(timezone_name));
+            else
+                writeDateTimeText(input.value, input.scale, out, DateLUT::instance(timezone_name));
         }
 
         ASSERT_EQ(expected, out.str());
@@ -130,6 +133,14 @@ TEST_P(DateTimeToStringParamTestDateTime64, writeDateText)
     ASSERT_NO_FATAL_FAILURE(test(GetParam()));
 }
 
+class DateTimeToStringParamTestDateTime64TrimZeros : public DateTimeToStringParamTestBase<DateTime64WithScale, true>
+{};
+
+TEST_P(DateTimeToStringParamTestDateTime64TrimZeros, writeDateText)
+{
+    ASSERT_NO_FATAL_FAILURE(test(GetParam()));
+}
+
 static const Int32 NON_ZERO_TIME_T = 10 * 365 * 3600 * 24 + 123456; /// NOTE This arithmetic is obviously wrong but it's ok for test.
 
 INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDayNum,
@@ -212,3 +223,36 @@ INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64,
 //        },
     })
 );
+
+
+INSTANTIATE_TEST_SUITE_P(DateTimeToString, DateTimeToStringParamTestDateTime64TrimZeros,
+    ::testing::ValuesIn(std::initializer_list<DateTimeToStringParamTestCase<DateTime64WithScale>>
+    {
+         /// Inside basic LUT boundaries
+         {
+             "Zero DateTime64 with scale 0",
+             DateTime64WithScale{0, 0},
+             "1970-01-01 00:00:00"
+         },
+         {
+             "Zero DateTime64 with scale 6, fractional is trimmed",
+             DateTime64WithScale{0, 6},
+             "1970-01-01 00:00:00"
+         },
+         {
+             "DateTime64 with scale 3, fractional is trimmed",
+             DateTime64WithScale{NON_ZERO_TIME_T * 1000LL, 3},
+             "1979-12-31 10:17:36"
+         },
+         {
+             "DateTime64 with scale 6, fractional is partially trimmed",
+             DateTime64WithScale{120000, 6},
+             "1970-01-01 00:00:00.120"
+         },
+         {
+             "DateTime64 with scale 6, fractional is kept",
+             DateTime64WithScale{123456, 6},
+             "1970-01-01 00:00:00.123456"
+         },
+    })
+ );
diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp
index ffe9a611014..f7b7ffc5aea 100644
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@@ -122,11 +122,6 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s
         query_limit = std::make_unique<FileCacheQueryLimit>();
 }
 
-FileCache::Key FileCache::createKeyForPath(const String & path)
-{
-    return Key(path);
-}
-
 const FileCache::UserInfo & FileCache::getCommonUser()
 {
     static UserInfo user(getCommonUserID(), 0);
@@ -1083,7 +1078,7 @@ void FileCache::freeSpaceRatioKeepingThreadFunc()
         if (eviction_candidates.size() > 0)
         {
             LOG_TRACE(log, "Current usage {}/{} in size, {}/{} in elements count "
-                    "(trying to keep size ration at {} and elements ratio at {}). "
+                    "(trying to keep size ratio at {} and elements ratio at {}). "
                     "Collected {} eviction candidates, "
                     "skipped {} candidates while iterating",
                     main_priority->getSize(lock), size_limit,
@@ -1168,7 +1163,7 @@ void FileCache::removeFileSegment(const Key & key, size_t offset, const UserID &
 
 void FileCache::removePathIfExists(const String & path, const UserID & user_id)
 {
-    removeKeyIfExists(createKeyForPath(path), user_id);
+    removeKeyIfExists(Key::fromPath(path), user_id);
 }
 
 void FileCache::removeAllReleasable(const UserID & user_id)
diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h
index 8e8f01ff39e..a25c945cdf7 100644
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@@ -88,8 +88,6 @@ public:
 
     const String & getBasePath() const;
 
-    static Key createKeyForPath(const String & path);
-
     static const UserInfo & getCommonUser();
 
     static const UserInfo & getInternalUser();
diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp
index 6f0ba7bffaf..c638f445c36 100644
--- a/src/Interpreters/Cache/FileCacheFactory.cpp
+++ b/src/Interpreters/Cache/FileCacheFactory.cpp
@@ -1,5 +1,6 @@
 #include "FileCacheFactory.h"
 #include "FileCache.h"
+#include <Poco/Util/AbstractConfiguration.h>
 
 namespace DB
 {
@@ -43,6 +44,16 @@ FileCacheFactory::CacheByName FileCacheFactory::getAll()
     return caches_by_name;
 }
 
+FileCachePtr FileCacheFactory::get(const std::string & cache_name)
+{
+    std::lock_guard lock(mutex);
+
+    auto it = caches_by_name.find(cache_name);
+    if (it == caches_by_name.end())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no cache by name `{}`", cache_name);
+    return it->second->cache;
+}
+
 FileCachePtr FileCacheFactory::getOrCreate(
     const std::string & cache_name,
     const FileCacheSettings & file_cache_settings,
@@ -202,4 +213,20 @@ void FileCacheFactory::clear()
     caches_by_name.clear();
 }
 
+void FileCacheFactory::loadDefaultCaches(const Poco::Util::AbstractConfiguration & config)
+{
+    Poco::Util::AbstractConfiguration::Keys cache_names;
+    config.keys(FILECACHE_DEFAULT_CONFIG_PATH, cache_names);
+    auto * log = &Poco::Logger::get("FileCacheFactory");
+    LOG_DEBUG(log, "Will load {} caches from default cache config", cache_names.size());
+    for (const auto & name : cache_names)
+    {
+        FileCacheSettings settings;
+        const auto & config_path = fmt::format("{}.{}", FILECACHE_DEFAULT_CONFIG_PATH, name);
+        settings.loadFromConfig(config, config_path);
+        auto cache = getOrCreate(name, settings, config_path);
+        cache->initialize();
+        LOG_DEBUG(log, "Loaded cache `{}` from default cache config", name);
+    }
+}
 }
diff --git a/src/Interpreters/Cache/FileCacheFactory.h b/src/Interpreters/Cache/FileCacheFactory.h
index 350932dce36..d770823782e 100644
--- a/src/Interpreters/Cache/FileCacheFactory.h
+++ b/src/Interpreters/Cache/FileCacheFactory.h
@@ -44,6 +44,8 @@ public:
         const FileCacheSettings & file_cache_settings,
         const std::string & config_path);
 
+    FileCachePtr get(const std::string & cache_name);
+
     FileCachePtr create(
         const std::string & cache_name,
         const FileCacheSettings & file_cache_settings,
@@ -53,8 +55,12 @@ public:
 
     FileCacheDataPtr getByName(const std::string & cache_name);
 
+    void loadDefaultCaches(const Poco::Util::AbstractConfiguration & config);
+
     void updateSettingsFromConfig(const Poco::Util::AbstractConfiguration & config);
+
     void remove(FileCachePtr cache);
+
     void clear();
 
 private:
diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp
index 75a8ac2934e..ffa1bb4b96a 100644
--- a/src/Interpreters/Cache/FileCacheKey.cpp
+++ b/src/Interpreters/Cache/FileCacheKey.cpp
@@ -12,11 +12,6 @@ namespace ErrorCodes
     extern const int BAD_ARGUMENTS;
 }
 
-FileCacheKey::FileCacheKey(const std::string & path)
-    : key(sipHash128(path.data(), path.size()))
-{
-}
-
 FileCacheKey::FileCacheKey(const UInt128 & key_)
     : key(key_)
 {
@@ -32,6 +27,16 @@ FileCacheKey FileCacheKey::random()
     return FileCacheKey(UUIDHelpers::generateV4().toUnderType());
 }
 
+FileCacheKey FileCacheKey::fromPath(const std::string & path)
+{
+    return FileCacheKey(sipHash128(path.data(), path.size()));
+}
+
+FileCacheKey FileCacheKey::fromKey(const UInt128 & key)
+{
+    return FileCacheKey(key);
+}
+
 FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str)
 {
     if (key_str.size() != 32)
diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h
index 7de2a8f7800..71b8a8745ee 100644
--- a/src/Interpreters/Cache/FileCacheKey.h
+++ b/src/Interpreters/Cache/FileCacheKey.h
@@ -14,16 +14,16 @@ struct FileCacheKey
 
     FileCacheKey() = default;
 
-    explicit FileCacheKey(const std::string & path);
-
-    explicit FileCacheKey(const UInt128 & key_);
-
     static FileCacheKey random();
+    static FileCacheKey fromPath(const std::string & path);
+    static FileCacheKey fromKey(const UInt128 & key);
+    static FileCacheKey fromKeyString(const std::string & key_str);
 
     bool operator==(const FileCacheKey & other) const { return key == other.key; }
     bool operator<(const FileCacheKey & other) const { return key < other.key; }
 
-    static FileCacheKey fromKeyString(const std::string & key_str);
+private:
+    explicit FileCacheKey(const UInt128 & key_);
 };
 
 using FileCacheKeyAndOffset = std::pair<FileCacheKey, size_t>;
diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h
index 3bb334dbf05..bdd591d75da 100644
--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@@ -15,10 +15,12 @@ static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
 static constexpr double FILECACHE_DEFAULT_FREE_SPACE_SIZE_RATIO = 0; /// Disabled.
 static constexpr double FILECACHE_DEFAULT_FREE_SPACE_ELEMENTS_RATIO = 0; /// Disabled.
 static constexpr int FILECACHE_DEFAULT_FREE_SPACE_REMOVE_BATCH = 10;
+static constexpr auto FILECACHE_DEFAULT_CONFIG_PATH = "filesystem_caches";
 
 class FileCache;
 using FileCachePtr = std::shared_ptr<FileCache>;
 
 struct FileCacheSettings;
+struct FileCacheKey;
 
 }
diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp
index e34bf9138ac..cc5d8fc255a 100644
--- a/src/Interpreters/ClusterProxy/executeQuery.cpp
+++ b/src/Interpreters/ClusterProxy/executeQuery.cpp
@@ -451,12 +451,12 @@ void executeQuery(
         return;
     }
 
-    DataStreams input_streams;
-    input_streams.reserve(plans.size());
+    Headers input_headers;
+    input_headers.reserve(plans.size());
     for (auto & plan : plans)
-        input_streams.emplace_back(plan->getCurrentDataStream());
+        input_headers.emplace_back(plan->getCurrentHeader());
 
-    auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+    auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
     query_plan.unitePlans(std::move(union_step), std::move(plans));
 }
 
@@ -653,16 +653,16 @@ void executeQueryWithParallelReplicas(
         auto remote_plan = std::make_unique<QueryPlan>();
         remote_plan->addStep(std::move(read_from_remote));
 
-        DataStreams input_streams;
-        input_streams.reserve(2);
-        input_streams.emplace_back(local_plan->getCurrentDataStream());
-        input_streams.emplace_back(remote_plan->getCurrentDataStream());
+        Headers input_headers;
+        input_headers.reserve(2);
+        input_headers.emplace_back(local_plan->getCurrentHeader());
+        input_headers.emplace_back(remote_plan->getCurrentHeader());
 
         std::vector<QueryPlanPtr> plans;
         plans.emplace_back(std::move(local_plan));
         plans.emplace_back(std::move(remote_plan));
 
-        auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+        auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
         query_plan.unitePlans(std::move(union_step), std::move(plans));
     }
     else
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 8402dc74c1e..8962be59f86 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1192,7 +1192,7 @@ void Context::setFilesystemCachesPath(const String & path)
 {
     std::lock_guard lock(shared->mutex);
 
-    if (!fs::path(path).is_absolute())
+    if (getApplicationType() != ApplicationType::LOCAL && !fs::path(path).is_absolute())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filesystem caches path must be absolute: {}", path);
 
     shared->filesystem_caches_path = path;
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index ee20c9452f6..f1a2b2a3f8b 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -1380,7 +1380,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)
     for (const auto & [disk_name, disk] : getContext()->getDisksMap())
     {
         String data_path = "store/" + getPathForUUID(table.table_id.uuid);
-        if (disk->isReadOnly() || !disk->exists(data_path))
+        if (disk->isReadOnly() || !disk->existsDirectory(data_path))
             continue;
 
         LOG_INFO(log, "Removing data directory {} of dropped table {} from disk {}", data_path, table.table_id.getNameForLogs(), disk_name);
@@ -1663,7 +1663,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask()
         for (auto it = disk->iterateDirectory("store"); it->isValid(); it->next())
         {
             String prefix = it->name();
-            bool expected_prefix_dir = disk->isDirectory(it->path()) && prefix.size() == 3 && isHexDigit(prefix[0]) && isHexDigit(prefix[1])
+            bool expected_prefix_dir = disk->existsDirectory(it->path()) && prefix.size() == 3 && isHexDigit(prefix[0]) && isHexDigit(prefix[1])
                 && isHexDigit(prefix[2]);
 
             if (!expected_prefix_dir)
@@ -1680,7 +1680,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask()
                 UUID uuid;
                 bool parsed = tryParse(uuid, uuid_str);
 
-                bool expected_dir = disk->isDirectory(jt->path()) && parsed && uuid != UUIDHelpers::Nil && uuid_str.starts_with(prefix);
+                bool expected_dir = disk->existsDirectory(jt->path()) && parsed && uuid != UUIDHelpers::Nil && uuid_str.starts_with(prefix);
 
                 if (!expected_dir)
                 {
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 70b734f4b9f..6bf5e1d5845 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1056,7 +1056,7 @@ static std::shared_ptr<IJoin> tryCreateJoin(
 static std::shared_ptr<IJoin> chooseJoinAlgorithm(
     std::shared_ptr<TableJoin> analyzed_join, const ColumnsWithTypeAndName & left_sample_columns, std::unique_ptr<QueryPlan> & joined_plan, ContextPtr context)
 {
-    Block right_sample_block = joined_plan->getCurrentDataStream().header;
+    Block right_sample_block = joined_plan->getCurrentHeader();
     const auto & join_algorithms = analyzed_join->getEnabledJoinAlgorithms();
     for (const auto alg : join_algorithms)
     {
@@ -1109,13 +1109,13 @@ static std::unique_ptr<QueryPlan> buildJoinedPlan(
                 rename_dag.getOutputs()[pos] = &alias;
             }
         }
-        rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentDataStream().header);
-        auto rename_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(rename_dag));
+        rename_dag.appendInputsForUnusedColumns(joined_plan->getCurrentHeader());
+        auto rename_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(rename_dag));
         rename_step->setStepDescription("Rename joined columns");
         joined_plan->addStep(std::move(rename_step));
     }
 
-    auto joined_actions_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(joined_block_actions));
+    auto joined_actions_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(joined_block_actions));
     joined_actions_step->setStepDescription("Joined actions");
     joined_plan->addStep(std::move(joined_actions_step));
 
@@ -1197,11 +1197,11 @@ JoinPtr SelectQueryExpressionAnalyzer::makeJoin(
 
     joined_plan = buildJoinedPlan(getContext(), join_element, *analyzed_join, query_options);
 
-    const ColumnsWithTypeAndName & right_columns = joined_plan->getCurrentDataStream().header.getColumnsWithTypeAndName();
+    const ColumnsWithTypeAndName & right_columns = joined_plan->getCurrentHeader().getColumnsWithTypeAndName();
     std::tie(left_convert_actions, right_convert_actions) = analyzed_join->createConvertingActions(left_columns, right_columns);
     if (right_convert_actions)
     {
-        auto converting_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentDataStream(), std::move(*right_convert_actions));
+        auto converting_step = std::make_unique<ExpressionStep>(joined_plan->getCurrentHeader(), std::move(*right_convert_actions));
         converting_step->setStepDescription("Convert joined columns");
         joined_plan->addStep(std::move(converting_step));
     }
diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp
index 602c79ee332..a4fb7615120 100644
--- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp
+++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp
@@ -182,10 +182,10 @@ void IInterpreterUnionOrSelectQuery::addAdditionalPostFilter(QueryPlan & plan) c
     if (!ast)
         return;
 
-    auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentDataStream().header);
+    auto dag = makeAdditionalPostFilter(ast, context, plan.getCurrentHeader());
     std::string filter_name = dag.getOutputs().back()->result_name;
     auto filter_step = std::make_unique<FilterStep>(
-        plan.getCurrentDataStream(), std::move(dag), std::move(filter_name), true);
+        plan.getCurrentHeader(), std::move(dag), std::move(filter_name), true);
     filter_step->setStepDescription("Additional result filter");
     plan.addStep(std::move(filter_step));
 }
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index e804da8710f..ab712f8d55d 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -16,6 +16,7 @@
 #include <Common/StringUtils.h>
 #include <Common/atomicRename.h>
 #include <Common/escapeForFileName.h>
+#include <Common/getRandomASCIIString.h>
 #include <Common/logger_useful.h>
 #include <Common/randomSeed.h>
 #include <Common/typeid_cast.h>
@@ -1663,7 +1664,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
     if (need_add_to_database && !database)
         throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} does not exist", backQuoteIfNeed(database_name));
 
-    if (create.replace_table)
+    if (create.replace_table
+        || (create.replace_view && (database->getEngineName() == "Atomic" || database->getEngineName() == "Replicated")))
     {
         chassert(!ddl_guard);
         return doCreateOrReplaceTable(create, properties, mode);
@@ -1973,15 +1975,19 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
 
 
         UInt64 name_hash = sipHash64(create.getDatabase() + create.getTable());
-        UInt16 random_suffix = thread_local_rng();
+        String random_suffix;
         if (auto txn = current_context->getZooKeeperMetadataTransaction())
         {
             /// Avoid different table name on database replicas
-            random_suffix = sipHash64(txn->getTaskZooKeeperPath());
+            UInt16 hashed_zk_path = sipHash64(txn->getTaskZooKeeperPath());
+            random_suffix = getHexUIntLowercase(hashed_zk_path);
         }
-        create.setTable(fmt::format("_tmp_replace_{}_{}",
-                            getHexUIntLowercase(name_hash),
-                            getHexUIntLowercase(random_suffix)));
+        else
+        {
+            random_suffix = getRandomASCIIString(/*length=*/4);
+        }
+
+        create.setTable(fmt::format("_tmp_replace_{}_{}", getHexUIntLowercase(name_hash), random_suffix));
 
         ast_drop->setTable(create.getTable());
         ast_drop->is_dictionary = create.is_dictionary;
@@ -2024,16 +2030,16 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
 
         auto ast_rename = std::make_shared<ASTRenameQuery>(ASTRenameQuery::Elements{std::move(elem)});
         ast_rename->dictionary = create.is_dictionary;
-        if (create.create_or_replace)
+        if (create.create_or_replace || create.replace_view)
         {
-            /// CREATE OR REPLACE TABLE
+            /// CREATE OR REPLACE TABLE/VIEW
             /// Will execute ordinary RENAME instead of EXCHANGE if the target table does not exist
             ast_rename->rename_if_cannot_exchange = true;
             ast_rename->exchange = false;
         }
         else
         {
-            /// REPLACE TABLE
+            /// REPLACE TABLE/VIEW
             /// Will execute EXCHANGE query and fail if the target table does not exist
             ast_rename->exchange = true;
         }
diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h
index 25ff32a09d3..cb7af25383e 100644
--- a/src/Interpreters/InterpreterCreateQuery.h
+++ b/src/Interpreters/InterpreterCreateQuery.h
@@ -85,7 +85,8 @@ public:
     void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const override;
 
     /// Check access right, validate definer statement and replace `CURRENT USER` with actual name.
-    static void processSQLSecurityOption(ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false);
+    static void processSQLSecurityOption(
+        ContextPtr context_, ASTSQLSecurity & sql_security, bool is_materialized_view = false, bool skip_check_permissions = false);
 
 private:
     struct TableProperties
diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp
index 6bccd1d3cf7..f420411995c 100644
--- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp
+++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp
@@ -128,29 +128,29 @@ void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_pla
 
     size_t num_plans = nested_interpreters.size();
     std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
-    DataStreams data_streams(num_plans);
+    Headers headers(num_plans);
 
     for (size_t i = 0; i < num_plans; ++i)
     {
         plans[i] = std::make_unique<QueryPlan>();
         nested_interpreters[i]->buildQueryPlan(*plans[i]);
 
-        if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header))
+        if (!blocksHaveEqualStructure(plans[i]->getCurrentHeader(), result_header))
         {
             auto actions_dag = ActionsDAG::makeConvertingActions(
-                    plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(),
+                    plans[i]->getCurrentHeader().getColumnsWithTypeAndName(),
                     result_header.getColumnsWithTypeAndName(),
                     ActionsDAG::MatchColumnsMode::Position);
-            auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentDataStream(), std::move(actions_dag));
+            auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentHeader(), std::move(actions_dag));
             converting_step->setStepDescription("Conversion before UNION");
             plans[i]->addStep(std::move(converting_step));
         }
 
-        data_streams[i] = plans[i]->getCurrentDataStream();
+        headers[i] = plans[i]->getCurrentHeader();
     }
 
     const Settings & settings = context->getSettingsRef();
-    auto step = std::make_unique<IntersectOrExceptStep>(std::move(data_streams), final_operator, settings[Setting::max_threads]);
+    auto step = std::make_unique<IntersectOrExceptStep>(std::move(headers), final_operator, settings[Setting::max_threads]);
     query_plan.unitePlans(std::move(step), std::move(plans));
 
     const auto & query = query_ptr->as<ASTSelectIntersectExceptQuery &>();
@@ -161,7 +161,7 @@ void InterpreterSelectIntersectExceptQuery::buildQueryPlan(QueryPlan & query_pla
         SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
 
         auto distinct_step = std::make_unique<DistinctStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             limits,
             0,
             result_header.getNames(),
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 30260ebbea9..fe851ed7261 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -1087,15 +1087,15 @@ void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan)
     executeImpl(query_plan, std::move(input_pipe));
 
     /// We must guarantee that result structure is the same as in getSampleBlock()
-    if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
+    if (!blocksHaveEqualStructure(query_plan.getCurrentHeader(), result_header))
     {
         auto convert_actions_dag = ActionsDAG::makeConvertingActions(
-            query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+            query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
             result_header.getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Name,
             true);
 
-        auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(convert_actions_dag));
+        auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(convert_actions_dag));
         query_plan.addStep(std::move(converting));
     }
 
@@ -1606,7 +1606,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
         if (expressions.filter_info)
         {
             auto row_level_security_step = std::make_unique<FilterStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 expressions.filter_info->actions.clone(),
                 expressions.filter_info->column_name,
                 expressions.filter_info->do_remove_column);
@@ -1620,7 +1620,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             if (expressions.prewhere_info->row_level_filter)
             {
                 auto row_level_filter_step = std::make_unique<FilterStep>(
-                    query_plan.getCurrentDataStream(),
+                    query_plan.getCurrentHeader(),
                     expressions.prewhere_info->row_level_filter->clone(),
                     expressions.prewhere_info->row_level_column_name,
                     true);
@@ -1630,7 +1630,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             }
 
             auto prewhere_step = std::make_unique<FilterStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 expressions.prewhere_info->prewhere_actions.clone(),
                 expressions.prewhere_info->prewhere_column_name,
                 expressions.prewhere_info->remove_prewhere_column);
@@ -1732,7 +1732,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             if (expressions.filter_info)
             {
                 auto row_level_security_step = std::make_unique<FilterStep>(
-                    query_plan.getCurrentDataStream(),
+                    query_plan.getCurrentHeader(),
                     expressions.filter_info->actions.clone(),
                     expressions.filter_info->column_name,
                     expressions.filter_info->do_remove_column);
@@ -1744,7 +1744,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             const auto add_filter_step = [&](auto & new_filter_info, const std::string & description)
             {
                 auto filter_step = std::make_unique<FilterStep>(
-                    query_plan.getCurrentDataStream(),
+                    query_plan.getCurrentHeader(),
                     std::move(new_filter_info->actions),
                     new_filter_info->column_name,
                     new_filter_info->do_remove_column);
@@ -1766,7 +1766,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
             {
                 QueryPlanStepPtr array_join_step
                     = std::make_unique<ArrayJoinStep>(
-                        query_plan.getCurrentDataStream(),
+                        query_plan.getCurrentHeader(),
                         *expressions.array_join,
                         settings[Setting::enable_unaligned_array_join],
                         settings[Setting::max_block_size]);
@@ -1787,7 +1787,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                 if (expressions.join->isFilled())
                 {
                     QueryPlanStepPtr filled_join_step
-                        = std::make_unique<FilledJoinStep>(query_plan.getCurrentDataStream(), expressions.join, settings[Setting::max_block_size]);
+                        = std::make_unique<FilledJoinStep>(query_plan.getCurrentHeader(), expressions.join, settings[Setting::max_block_size]);
 
                     filled_join_step->setStepDescription("JOIN");
                     query_plan.addStep(std::move(filled_join_step));
@@ -1809,7 +1809,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         SortingStep::Settings sort_settings(*context);
 
                         auto sorting_step = std::make_unique<SortingStep>(
-                            plan.getCurrentDataStream(),
+                            plan.getCurrentHeader(),
                             std::move(order_descr),
                             0 /* LIMIT */, sort_settings);
                         sorting_step->setStepDescription(fmt::format("Sort {} before JOIN", join_pos));
@@ -1821,7 +1821,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         = [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_pos)
                     {
                         auto creating_set_step = std::make_unique<CreateSetAndFilterOnTheFlyStep>(
-                            plan.getCurrentDataStream(),
+                            plan.getCurrentHeader(),
                             key_names,
                             settings[Setting::max_rows_in_set_to_optimize_join],
                             crosswise_connection,
@@ -1858,8 +1858,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                         /// It doesn't hold such a guarantee for streams with const keys.
                         /// Note: it's also doesn't work with the read-in-order optimization.
                         /// No checks here because read in order is not applied if we have `CreateSetAndFilterOnTheFlyStep` in the pipeline between the reading and sorting steps.
-                        bool has_non_const_keys = has_non_const(query_plan.getCurrentDataStream().header, join_clause.key_names_left)
-                            && has_non_const(joined_plan->getCurrentDataStream().header, join_clause.key_names_right);
+                        bool has_non_const_keys = has_non_const(query_plan.getCurrentHeader(), join_clause.key_names_left)
+                            && has_non_const(joined_plan->getCurrentHeader(), join_clause.key_names_right);
 
                         if (settings[Setting::max_rows_in_set_to_optimize_join] > 0 && join_type_allows_filtering && has_non_const_keys)
                         {
@@ -1878,8 +1878,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
                     }
 
                     QueryPlanStepPtr join_step = std::make_unique<JoinStep>(
-                        query_plan.getCurrentDataStream(),
-                        joined_plan->getCurrentDataStream(),
+                        query_plan.getCurrentHeader(),
+                        joined_plan->getCurrentHeader(),
                         expressions.join,
                         settings[Setting::max_block_size],
                         max_streams,
@@ -2152,7 +2152,7 @@ static void executeMergeAggregatedImpl(
     auto grouping_sets_params = getAggregatorGroupingSetsParams(aggregation_keys_list, keys);
 
     auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         params,
         grouping_sets_params,
         final,
@@ -2710,7 +2710,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
     /// Aliases in table declaration.
     if (processing_stage == QueryProcessingStage::FetchColumns && alias_actions)
     {
-        auto table_aliases = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), alias_actions->clone());
+        auto table_aliases = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), alias_actions->clone());
         table_aliases->setStepDescription("Add table aliases");
         query_plan.addStep(std::move(table_aliases));
     }
@@ -2720,10 +2720,10 @@ void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsA
 {
     auto dag = expression->dag.clone();
     if (expression->project_input)
-        dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+        dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
 
     auto where_step = std::make_unique<FilterStep>(
-        query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter);
+        query_plan.getCurrentHeader(), std::move(dag), getSelectQuery().where()->getColumnName(), remove_filter);
 
     where_step->setStepDescription("WHERE");
     query_plan.addStep(std::move(where_step));
@@ -2826,7 +2826,7 @@ void InterpreterSelectQuery::executeAggregation(
         && (settings[Setting::distributed_aggregation_memory_efficient] || settings[Setting::enable_memory_bound_merging_of_aggregation_results]);
 
     auto aggregating_step = std::make_unique<AggregatingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         std::move(aggregator_params),
         std::move(grouping_sets_params),
         final,
@@ -2870,10 +2870,10 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions
 {
     auto dag = expression->dag.clone();
     if (expression->project_input)
-        dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+        dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
 
     auto having_step
-        = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter);
+        = std::make_unique<FilterStep>(query_plan.getCurrentHeader(), std::move(dag), getSelectQuery().having()->getColumnName(), remove_filter);
 
     having_step->setStepDescription("HAVING");
     query_plan.addStep(std::move(having_step));
@@ -2893,13 +2893,13 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
     {
         dag = expression->dag.clone();
         if (expression->project_input)
-            dag->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+            dag->appendInputsForUnusedColumns(query_plan.getCurrentHeader());
     }
 
     const Settings & settings = context->getSettingsRef();
 
     auto totals_having_step = std::make_unique<TotalsHavingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         query_analyzer->aggregates(),
         overflow_row,
         std::move(dag),
@@ -2928,9 +2928,9 @@ void InterpreterSelectQuery::executeRollupOrCube(QueryPlan & query_plan, Modific
 
     QueryPlanStepPtr step;
     if (modificator == Modificator::ROLLUP)
-        step = std::make_unique<RollupStep>(query_plan.getCurrentDataStream(), std::move(params), final, settings[Setting::group_by_use_nulls]);
+        step = std::make_unique<RollupStep>(query_plan.getCurrentHeader(), std::move(params), final, settings[Setting::group_by_use_nulls]);
     else if (modificator == Modificator::CUBE)
-        step = std::make_unique<CubeStep>(query_plan.getCurrentDataStream(), std::move(params), final, settings[Setting::group_by_use_nulls]);
+        step = std::make_unique<CubeStep>(query_plan.getCurrentHeader(), std::move(params), final, settings[Setting::group_by_use_nulls]);
 
     query_plan.addStep(std::move(step));
 }
@@ -2942,9 +2942,9 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act
 
     auto dag = expression->dag.clone();
     if (expression->project_input)
-        dag.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+        dag.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
 
-    auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(dag));
+    auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(dag));
 
     expression_step->setStepDescription(description);
     query_plan.addStep(std::move(expression_step));
@@ -3028,7 +3028,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
             SortingStep::Settings sort_settings(*context);
 
             auto sorting_step = std::make_unique<SortingStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 window.full_sort_description,
                 window.partition_by,
                 0 /* LIMIT */,
@@ -3042,7 +3042,7 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
         const bool streams_fan_out
             = settings[Setting::query_plan_enable_multithreading_after_window_functions] && ((i + 1) == windows_sorted.size());
 
-        auto window_step = std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window, window.window_functions, streams_fan_out);
+        auto window_step = std::make_unique<WindowStep>(query_plan.getCurrentHeader(), window, window.window_functions, streams_fan_out);
         window_step->setStepDescription("Window step for window '" + window.window_name + "'");
 
         query_plan.addStep(std::move(window_step));
@@ -3055,7 +3055,7 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input
     const Settings & settings = context->getSettingsRef();
 
     auto finish_sorting_step = std::make_unique<SortingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         input_sorting_info->sort_description_for_merging,
         output_order_descr,
         settings[Setting::max_block_size],
@@ -3086,7 +3086,7 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
 
     /// Merge the sorted blocks.
     auto sorting_step = std::make_unique<SortingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         output_order_descr,
         limit,
         sort_settings);
@@ -3105,7 +3105,7 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st
     const auto exact_rows_before_limit = context->getSettingsRef()[Setting::exact_rows_before_limit];
 
     auto merging_sorted = std::make_unique<SortingStep>(
-        query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
+        query_plan.getCurrentHeader(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit);
     merging_sorted->setStepDescription("Merge sorted streams " + description);
     query_plan.addStep(std::move(merging_sorted));
 }
@@ -3140,7 +3140,7 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
         SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
 
         auto distinct_step = std::make_unique<DistinctStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             limits,
             limit_for_distinct,
             columns,
@@ -3175,7 +3175,7 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
         const Settings & settings = context->getSettingsRef();
 
         auto limit
-            = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
+            = std::make_unique<LimitStep>(query_plan.getCurrentHeader(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
         if (do_not_skip_offset)
             limit->setStepDescription("preliminary LIMIT (with OFFSET)");
         else
@@ -3199,7 +3199,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)
     UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
     UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);
 
-    auto limit_by = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(), length, offset, columns);
+    auto limit_by = std::make_unique<LimitByStep>(query_plan.getCurrentHeader(), length, offset, columns);
     query_plan.addStep(std::move(limit_by));
 }
 
@@ -3224,7 +3224,7 @@ void InterpreterSelectQuery::executeWithFill(QueryPlan & query_plan)
 
         const Settings & settings = context->getSettingsRef();
         auto filling_step = std::make_unique<FillingStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             std::move(sort_description),
             std::move(fill_description),
             interpolate_descr,
@@ -3271,7 +3271,7 @@ void InterpreterSelectQuery::executeLimit(QueryPlan & query_plan)
         }
 
         auto limit = std::make_unique<LimitStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 limit_length, limit_offset, always_read_till_end, query.limit_with_ties, order_descr);
 
         if (query.limit_with_ties)
@@ -3292,7 +3292,7 @@ void InterpreterSelectQuery::executeOffset(QueryPlan & query_plan)
         UInt64 limit_offset;
         std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, context);
 
-        auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), limit_offset);
+        auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), limit_offset);
         query_plan.addStep(std::move(offsets_step));
     }
 }
@@ -3302,7 +3302,7 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
     if (!context->getSettingsRef()[Setting::extremes])
         return;
 
-    auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
+    auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentHeader());
     query_plan.addStep(std::move(extremes_step));
 }
 
@@ -3313,7 +3313,7 @@ void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_p
     if (!subqueries.empty())
     {
         auto step = std::make_unique<DelayedCreatingSetsStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 std::move(subqueries),
                 context);
 
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index d4af111eec0..c6170f6e7e2 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -213,7 +213,7 @@ Block InterpreterSelectQueryAnalyzer::getSampleBlock(const QueryTreeNodePtr & qu
 Block InterpreterSelectQueryAnalyzer::getSampleBlock()
 {
     planner.buildQueryPlanIfNeeded();
-    return planner.getQueryPlan().getCurrentDataStream().header;
+    return planner.getQueryPlan().getCurrentHeader();
 }
 
 BlockIO InterpreterSelectQueryAnalyzer::execute()
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index eec0e1f27c9..a6df05f3ab5 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -316,29 +316,29 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
     else
     {
         std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
-        DataStreams data_streams(num_plans);
+        Headers headers(num_plans);
 
         for (size_t i = 0; i < num_plans; ++i)
         {
             plans[i] = std::make_unique<QueryPlan>();
             nested_interpreters[i]->buildQueryPlan(*plans[i]);
 
-            if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header))
+            if (!blocksHaveEqualStructure(plans[i]->getCurrentHeader(), result_header))
             {
                 auto actions_dag = ActionsDAG::makeConvertingActions(
-                        plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(),
+                        plans[i]->getCurrentHeader().getColumnsWithTypeAndName(),
                         result_header.getColumnsWithTypeAndName(),
                         ActionsDAG::MatchColumnsMode::Position);
-                auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentDataStream(), std::move(actions_dag));
+                auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentHeader(), std::move(actions_dag));
                 converting_step->setStepDescription("Conversion before UNION");
                 plans[i]->addStep(std::move(converting_step));
             }
 
-            data_streams[i] = plans[i]->getCurrentDataStream();
+            headers[i] = plans[i]->getCurrentHeader();
         }
 
         auto max_threads = settings[Setting::max_threads];
-        auto union_step = std::make_unique<UnionStep>(std::move(data_streams), max_threads);
+        auto union_step = std::make_unique<UnionStep>(std::move(headers), max_threads);
 
         query_plan.unitePlans(std::move(union_step), std::move(plans));
 
@@ -349,7 +349,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
             SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
 
             auto distinct_step = std::make_unique<DistinctStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 limits,
                 0,
                 result_header.getNames(),
@@ -364,13 +364,13 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
         if (settings[Setting::limit] > 0)
         {
             auto limit = std::make_unique<LimitStep>(
-                query_plan.getCurrentDataStream(), settings[Setting::limit], settings[Setting::offset], settings[Setting::exact_rows_before_limit]);
+                query_plan.getCurrentHeader(), settings[Setting::limit], settings[Setting::offset], settings[Setting::exact_rows_before_limit]);
             limit->setStepDescription("LIMIT OFFSET for SETTINGS");
             query_plan.addStep(std::move(limit));
         }
         else
         {
-            auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), settings[Setting::offset]);
+            auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), settings[Setting::offset]);
             offset->setStepDescription("OFFSET for SETTINGS");
             query_plan.addStep(std::move(offset));
         }
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 676133e8328..b9c60874a89 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -1313,17 +1313,17 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v
             {
                 auto dag = step->actions()->dag.clone();
                 if (step->actions()->project_input)
-                    dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
+                    dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
                 /// Execute DELETEs.
-                plan.addStep(std::make_unique<FilterStep>(plan.getCurrentDataStream(), std::move(dag), stage.filter_column_names[i], false));
+                plan.addStep(std::make_unique<FilterStep>(plan.getCurrentHeader(), std::move(dag), stage.filter_column_names[i], false));
             }
             else
             {
                 auto dag = step->actions()->dag.clone();
                 if (step->actions()->project_input)
-                    dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
+                    dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
                 /// Execute UPDATE or final projection.
-                plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag)));
+                plan.addStep(std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(dag)));
             }
         }
 
diff --git a/src/Interpreters/PreparedSets.cpp b/src/Interpreters/PreparedSets.cpp
index 3e37b2191a0..538108165fb 100644
--- a/src/Interpreters/PreparedSets.cpp
+++ b/src/Interpreters/PreparedSets.cpp
@@ -119,7 +119,7 @@ FutureSetFromSubquery::FutureSetFromSubquery(
     auto size_limits = getSizeLimitsForSet(settings);
     set_and_key->set
         = std::make_shared<Set>(size_limits, settings[Setting::use_index_for_in_with_subqueries_max_values], settings[Setting::transform_null_in]);
-    set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName());
+    set_and_key->set->setHeader(source->getCurrentHeader().getColumnsWithTypeAndName());
 }
 
 FutureSetFromSubquery::FutureSetFromSubquery(
@@ -149,7 +149,7 @@ SetPtr FutureSetFromSubquery::get() const
 void FutureSetFromSubquery::setQueryPlan(std::unique_ptr<QueryPlan> source_)
 {
     source = std::move(source_);
-    set_and_key->set->setHeader(source->getCurrentDataStream().header.getColumnsWithTypeAndName());
+    set_and_key->set->setHeader(source->getCurrentHeader().getColumnsWithTypeAndName());
 }
 
 DataTypes FutureSetFromSubquery::getTypes() const
@@ -170,7 +170,7 @@ std::unique_ptr<QueryPlan> FutureSetFromSubquery::build(const ContextPtr & conte
         return nullptr;
 
     auto creating_set = std::make_unique<CreatingSetStep>(
-        plan->getCurrentDataStream(),
+        plan->getCurrentHeader(),
         set_and_key,
         external_table,
         SizeLimits(settings[Setting::max_rows_to_transfer], settings[Setting::max_bytes_to_transfer], settings[Setting::transfer_overflow_mode]),
diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h
index 044de20e163..8a821d87dfb 100644
--- a/src/Interpreters/Set.h
+++ b/src/Interpreters/Set.h
@@ -238,6 +238,8 @@ public:
 
     const Columns & getOrderedSet() const { return ordered_set; }
 
+    const std::vector<KeyTuplePositionMapping> & getIndexesMapping() const { return indexes_mapping; }
+
 private:
     // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
     bool has_all_keys;
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 77f3f3a51f2..f9db928ad3d 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -5,6 +5,7 @@
 #include <Common/MemoryTrackerBlockerInThread.h>
 #include <Common/SensitiveDataMasker.h>
 #include <Common/FailPoint.h>
+#include <Common/FieldVisitorToString.h>
 
 #include <Interpreters/AsynchronousInsertQueue.h>
 #include <Interpreters/Cache/QueryCache.h>
@@ -155,7 +156,7 @@ namespace Setting
     extern const SettingsBool use_query_cache;
     extern const SettingsBool wait_for_async_insert;
     extern const SettingsSeconds wait_for_async_insert_timeout;
-    extern const SettingsBool enable_secure_identifiers;
+    extern const SettingsBool enforce_strict_identifier_format;
 }
 
 namespace ErrorCodes
@@ -565,6 +566,25 @@ void logQueryFinish(
         query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows);
         query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes);
         query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage);
+
+        if (context)
+        {
+            std::string user_name = context->getUserName();
+            query_span->addAttribute("clickhouse.user", user_name);
+        }
+
+        if (settings[Setting::log_query_settings])
+        {
+            auto changed_settings_names = settings.getChangedNames();
+            for (const auto & name : changed_settings_names)
+            {
+                Field value = settings.get(name);
+                String value_str = convertFieldToString(value);
+
+                query_span->addAttribute(fmt::format("clickhouse.setting.{}", name), value_str);
+
+            }
+        }
         query_span->finish();
     }
 }
@@ -999,12 +1019,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         InterpreterSetQuery::applySettingsFromQuery(ast, context);
         validateAnalyzerSettings(ast, settings[Setting::allow_experimental_analyzer]);
 
-        if (settings[Setting::enable_secure_identifiers])
+        if (settings[Setting::enforce_strict_identifier_format])
         {
             WriteBufferFromOwnString buf;
-            IAST::FormatSettings enable_secure_identifiers_settings(buf, true);
-            enable_secure_identifiers_settings.enable_secure_identifiers = true;
-            ast->format(enable_secure_identifiers_settings);
+            IAST::FormatSettings enforce_strict_identifier_format_settings(buf, true);
+            enforce_strict_identifier_format_settings.enforce_strict_identifier_format = true;
+            ast->format(enforce_strict_identifier_format_settings);
         }
 
         if (auto * insert_query = ast->as<ASTInsertQuery>())
diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp
index ce5f3b45781..007b31d9fdc 100644
--- a/src/Interpreters/tests/gtest_filecache.cpp
+++ b/src/Interpreters/tests/gtest_filecache.cpp
@@ -372,7 +372,7 @@ TEST_F(FileCacheTest, LRUPolicy)
         std::cerr << "Step 1\n";
         auto cache = DB::FileCache("1", settings);
         cache.initialize();
-        auto key = DB::FileCache::createKeyForPath("key1");
+        auto key = DB::FileCacheKey::fromPath("key1");
 
         auto get_or_set = [&](size_t offset, size_t size)
         {
@@ -736,7 +736,7 @@ TEST_F(FileCacheTest, LRUPolicy)
 
         auto cache2 = DB::FileCache("2", settings);
         cache2.initialize();
-        auto key = DB::FileCache::createKeyForPath("key1");
+        auto key = DB::FileCacheKey::fromPath("key1");
 
         /// Get [2, 29]
         assertEqual(
@@ -755,7 +755,7 @@ TEST_F(FileCacheTest, LRUPolicy)
         fs::create_directories(settings2.base_path);
         auto cache2 = DB::FileCache("3", settings2);
         cache2.initialize();
-        auto key = DB::FileCache::createKeyForPath("key1");
+        auto key = DB::FileCacheKey::fromPath("key1");
 
         /// Get [0, 24]
         assertEqual(
@@ -770,7 +770,7 @@ TEST_F(FileCacheTest, LRUPolicy)
 
         auto cache = FileCache("4", settings);
         cache.initialize();
-        const auto key = FileCache::createKeyForPath("key10");
+        const auto key = FileCacheKey::fromPath("key10");
         const auto key_path = cache.getKeyPath(key, user);
 
         cache.removeAllReleasable(user.user_id);
@@ -794,7 +794,7 @@ TEST_F(FileCacheTest, LRUPolicy)
 
         auto cache = DB::FileCache("5", settings);
         cache.initialize();
-        const auto key = FileCache::createKeyForPath("key10");
+        const auto key = FileCacheKey::fromPath("key10");
         const auto key_path = cache.getKeyPath(key, user);
 
         cache.removeAllReleasable(user.user_id);
@@ -833,7 +833,7 @@ TEST_F(FileCacheTest, writeBuffer)
         segment_settings.kind = FileSegmentKind::Ephemeral;
         segment_settings.unbounded = true;
 
-        auto cache_key = FileCache::createKeyForPath(key);
+        auto cache_key = FileCacheKey::fromPath(key);
         auto holder = cache.set(cache_key, 0, 3, segment_settings, user);
         /// The same is done in TemporaryDataOnDisk::createStreamToCacheFile.
         std::filesystem::create_directories(cache.getKeyPath(cache_key, user));
@@ -961,7 +961,7 @@ TEST_F(FileCacheTest, temporaryData)
     const auto user = FileCache::getCommonUser();
     auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, TemporaryDataOnDiskSettings{});
 
-    auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
+    auto some_data_holder = file_cache.getOrSet(FileCacheKey::fromPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user);
 
     {
         ASSERT_EQ(some_data_holder->size(), 5);
@@ -1103,7 +1103,7 @@ TEST_F(FileCacheTest, CachedReadBuffer)
     auto cache = std::make_shared<DB::FileCache>("8", settings);
     cache->initialize();
 
-    auto key = cache->createKeyForPath(file_path);
+    auto key = DB::FileCacheKey::fromPath(file_path);
     const auto user = FileCache::getCommonUser();
 
     {
@@ -1219,7 +1219,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
     {
         auto cache = DB::FileCache(std::to_string(++file_cache_name), settings);
         cache.initialize();
-        auto key = FileCache::createKeyForPath("key1");
+        auto key = FileCacheKey::fromPath("key1");
 
         auto add_range = [&](size_t offset, size_t size)
         {
@@ -1342,7 +1342,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
 
         std::string data1(15, '*');
         auto file1 = write_file("test1", data1);
-        auto key1 = cache->createKeyForPath(file1);
+        auto key1 = DB::FileCacheKey::fromPath(file1);
 
         read_and_check(file1, key1, data1);
 
@@ -1358,7 +1358,7 @@ TEST_F(FileCacheTest, SLRUPolicy)
 
         std::string data2(10, '*');
         auto file2 = write_file("test2", data2);
-        auto key2 = cache->createKeyForPath(file2);
+        auto key2 = DB::FileCacheKey::fromPath(file2);
 
         read_and_check(file2, key2, data2);
 
diff --git a/src/Parsers/ASTCheckQuery.h b/src/Parsers/ASTCheckQuery.h
index 9dc4155c39d..919555eb336 100644
--- a/src/Parsers/ASTCheckQuery.h
+++ b/src/Parsers/ASTCheckQuery.h
@@ -58,9 +58,16 @@ protected:
             settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PARTITION " << (settings.hilite ? hilite_none : "");
             partition->formatImpl(settings, state, frame);
         }
+
+        if (!part_name.empty())
+        {
+            settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " PART " << (settings.hilite ? hilite_none : "")
+                << quoteString(part_name);
+        }
     }
 };
 
+
 struct ASTCheckAllTablesQuery : public ASTQueryWithOutput
 {
 
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index 8aac0c4ce4f..2058c7c60cf 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -8,6 +8,7 @@
 #include <Poco/String.h>
 #include <Common/SensitiveDataMasker.h>
 #include <Common/SipHash.h>
+#include <Common/StringUtils.h>
 #include <algorithm>
 
 namespace DB
@@ -265,14 +266,14 @@ void IAST::FormatSettings::writeIdentifier(const String & name, bool ambiguous)
 
 void IAST::FormatSettings::checkIdentifier(const String & name) const
 {
-    if (enable_secure_identifiers)
+    if (enforce_strict_identifier_format)
     {
-        bool is_secure_identifier = std::all_of(name.begin(), name.end(), [](char ch) { return std::isalnum(ch) || ch == '_'; });
-        if (!is_secure_identifier)
+        bool is_word_char_identifier = std::all_of(name.begin(), name.end(), isWordCharASCII);
+        if (!is_word_char_identifier)
         {
             throw Exception(
                 ErrorCodes::BAD_ARGUMENTS,
-                "Not a secure identifier: `{}`, a secure identifier must contain only underscore and alphanumeric characters",
+                "Identifier '{}' contains characters other than alphanumeric and cannot be when enforce_strict_identifier_format is enabled",
                 name);
         }
     }
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 8e307e0383b..eb6ddcc5d8b 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -202,7 +202,7 @@ public:
         char nl_or_ws; /// Newline or whitespace.
         LiteralEscapingStyle literal_escaping_style;
         bool print_pretty_type_names;
-        bool enable_secure_identifiers;
+        bool enforce_strict_identifier_format;
 
         explicit FormatSettings(
             WriteBuffer & ostr_,
@@ -213,7 +213,7 @@ public:
             bool show_secrets_ = true,
             LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular,
             bool print_pretty_type_names_ = false,
-            bool enable_secure_identifiers_ = false)
+            bool enforce_strict_identifier_format_ = false)
             : ostr(ostr_)
             , one_line(one_line_)
             , hilite(hilite_)
@@ -223,7 +223,7 @@ public:
             , nl_or_ws(one_line ? ' ' : '\n')
             , literal_escaping_style(literal_escaping_style_)
             , print_pretty_type_names(print_pretty_type_names_)
-            , enable_secure_identifiers(enable_secure_identifiers_)
+            , enforce_strict_identifier_format(enforce_strict_identifier_format_)
         {
         }
 
@@ -237,7 +237,7 @@ public:
             , nl_or_ws(other.nl_or_ws)
             , literal_escaping_style(other.literal_escaping_style)
             , print_pretty_type_names(other.print_pretty_type_names)
-            , enable_secure_identifiers(other.enable_secure_identifiers)
+            , enforce_strict_identifier_format(other.enforce_strict_identifier_format)
         {
         }
 
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 44dca952a10..02bd9279921 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -384,9 +384,9 @@ void addExpressionStep(QueryPlan & query_plan,
 {
     auto actions = std::move(expression_actions->dag);
     if (expression_actions->project_input)
-        actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+        actions.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
 
-    auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(actions));
+    auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(actions));
     appendSetsFromActionsDAG(expression_step->getExpression(), useful_sets);
     expression_step->setStepDescription(step_description);
     query_plan.addStep(std::move(expression_step));
@@ -399,9 +399,9 @@ void addFilterStep(QueryPlan & query_plan,
 {
     auto actions = std::move(filter_analysis_result.filter_actions->dag);
     if (filter_analysis_result.filter_actions->project_input)
-        actions.appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+        actions.appendInputsForUnusedColumns(query_plan.getCurrentHeader());
 
-    auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
+    auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
         std::move(actions),
         filter_analysis_result.filter_column_name,
         filter_analysis_result.remove_filter_column);
@@ -507,7 +507,7 @@ void addAggregationStep(QueryPlan & query_plan,
     }
 
     auto aggregating_step = std::make_unique<AggregatingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         aggregator_params,
         aggregation_analysis_result.grouping_sets_parameters_list,
         query_analysis_result.aggregate_final,
@@ -570,7 +570,7 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
     }
 
     auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         params,
         aggregation_analysis_result.grouping_sets_parameters_list,
         query_analysis_result.aggregate_final,
@@ -605,11 +605,11 @@ void addTotalsHavingStep(QueryPlan & query_plan,
     {
         actions = std::move(having_analysis_result.filter_actions->dag);
         if (having_analysis_result.filter_actions->project_input)
-            actions->appendInputsForUnusedColumns(query_plan.getCurrentDataStream().header);
+            actions->appendInputsForUnusedColumns(query_plan.getCurrentHeader());
     }
 
     auto totals_having_step = std::make_unique<TotalsHavingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         aggregation_analysis_result.aggregate_descriptions,
         query_analysis_result.aggregate_overflow_row,
         std::move(actions),
@@ -647,13 +647,13 @@ void addCubeOrRollupStepIfNeeded(QueryPlan & query_plan,
     if (query_node.isGroupByWithRollup())
     {
         auto rollup_step = std::make_unique<RollupStep>(
-            query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
+            query_plan.getCurrentHeader(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
         query_plan.addStep(std::move(rollup_step));
     }
     else if (query_node.isGroupByWithCube())
     {
         auto cube_step = std::make_unique<CubeStep>(
-            query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
+            query_plan.getCurrentHeader(), std::move(aggregator_params), true /*final*/, settings[Setting::group_by_use_nulls]);
         query_plan.addStep(std::move(cube_step));
     }
 }
@@ -687,7 +687,7 @@ void addDistinctStep(QueryPlan & query_plan,
     SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
 
     auto distinct_step = std::make_unique<DistinctStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         limits,
         limit_hint_for_distinct,
         column_names,
@@ -706,7 +706,7 @@ void addSortingStep(QueryPlan & query_plan,
     SortingStep::Settings sort_settings(*query_context);
 
     auto sorting_step = std::make_unique<SortingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         sort_description,
         query_analysis_result.partial_sorting_limit,
         sort_settings);
@@ -725,7 +725,7 @@ void addMergeSortingStep(QueryPlan & query_plan,
     const auto & sort_description = query_analysis_result.sort_description;
 
     auto merging_sorted = std::make_unique<SortingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         sort_description,
         settings[Setting::max_block_size],
         query_analysis_result.partial_sorting_limit,
@@ -761,7 +761,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
     if (query_node.hasInterpolate())
     {
         ActionsDAG interpolate_actions_dag;
-        auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+        auto query_plan_columns = query_plan.getCurrentHeader().getColumnsWithTypeAndName();
         for (auto & query_plan_column : query_plan_columns)
         {
             /// INTERPOLATE actions dag input columns must be non constant
@@ -846,7 +846,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
     const auto & query_context = planner_context->getQueryContext();
     const Settings & settings = query_context->getSettingsRef();
     auto filling_step = std::make_unique<FillingStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         sort_description,
         std::move(fill_description),
         interpolate_description,
@@ -868,7 +868,7 @@ void addLimitByStep(QueryPlan & query_plan,
         limit_by_offset = query_node.getLimitByOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
     }
 
-    auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(),
+    auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentHeader(),
         limit_by_limit,
         limit_by_offset,
         limit_by_analysis_result.limit_by_column_names);
@@ -896,7 +896,7 @@ void addPreliminaryLimitStep(QueryPlan & query_plan,
     const Settings & settings = query_context->getSettingsRef();
 
     auto limit
-        = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
+        = std::make_unique<LimitStep>(query_plan.getCurrentHeader(), limit_length, limit_offset, settings[Setting::exact_rows_before_limit]);
     limit->setStepDescription(do_not_skip_offset ? "preliminary LIMIT (with OFFSET)" : "preliminary LIMIT (without OFFSET)");
     query_plan.addStep(std::move(limit));
 }
@@ -1023,7 +1023,7 @@ void addWindowSteps(QueryPlan & query_plan,
             SortingStep::Settings sort_settings(*query_context);
 
             auto sorting_step = std::make_unique<SortingStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 window_description.full_sort_description,
                 window_description.partition_by,
                 0 /*limit*/,
@@ -1038,7 +1038,7 @@ void addWindowSteps(QueryPlan & query_plan,
             = settings[Setting::query_plan_enable_multithreading_after_window_functions] && ((i + 1) == window_descriptions_size);
 
         auto window_step
-            = std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window_description, window_description.window_functions, streams_fan_out);
+            = std::make_unique<WindowStep>(query_plan.getCurrentHeader(), window_description, window_description.window_functions, streams_fan_out);
         window_step->setStepDescription("Window step for window '" + window_description.window_name + "'");
         query_plan.addStep(std::move(window_step));
     }
@@ -1084,7 +1084,7 @@ void addLimitStep(QueryPlan & query_plan,
     UInt64 limit_offset = query_analysis_result.limit_offset;
 
     auto limit = std::make_unique<LimitStep>(
-        query_plan.getCurrentDataStream(),
+        query_plan.getCurrentHeader(),
         limit_length,
         limit_offset,
         always_read_till_end,
@@ -1103,7 +1103,7 @@ void addExtremesStepIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & p
     if (!query_context->getSettingsRef()[Setting::extremes])
         return;
 
-    auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
+    auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentHeader());
     query_plan.addStep(std::move(extremes_step));
 }
 
@@ -1112,7 +1112,7 @@ void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_ana
     /// If there is not a LIMIT but an offset
     if (!query_analysis_result.limit_length && query_analysis_result.limit_offset)
     {
-        auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), query_analysis_result.limit_offset);
+        auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentHeader(), query_analysis_result.limit_offset);
         query_plan.addStep(std::move(offsets_step));
     }
 }
@@ -1150,7 +1150,7 @@ void addBuildSubqueriesForSetsStepIfNeeded(
     if (!subqueries.empty())
     {
         auto step = std::make_unique<DelayedCreatingSetsStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             std::move(subqueries),
             planner_context->getQueryContext());
 
@@ -1190,7 +1190,7 @@ void addAdditionalFilterStepIfNeeded(QueryPlan & query_plan,
     if (!query_plan.isInitialized())
         return;
 
-    auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
+    auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
         std::move(filter_info.actions),
         filter_info.column_name,
         filter_info.do_remove_column);
@@ -1329,31 +1329,27 @@ void Planner::buildPlanForUnionNode()
         const auto & mapping = query_planner.getQueryNodeToPlanStepMapping();
         query_node_to_plan_step_mapping.insert(mapping.begin(), mapping.end());
         auto query_node_plan = std::make_unique<QueryPlan>(std::move(query_planner).extractQueryPlan());
-        query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
+        query_plans_headers.push_back(query_node_plan->getCurrentHeader());
         query_plans.push_back(std::move(query_node_plan));
     }
 
     Block union_common_header = buildCommonHeaderForUnion(query_plans_headers, union_mode);
-    DataStreams query_plans_streams;
-    query_plans_streams.reserve(query_plans.size());
 
-    for (auto & query_node_plan : query_plans)
+    for (size_t i = 0; i < queries_size; ++i)
     {
-        if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
-        {
-            query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
+        auto & query_node_plan = query_plans[i];
+        if (blocksHaveEqualStructure(query_node_plan->getCurrentHeader(), union_common_header))
             continue;
-        }
 
         auto actions_dag = ActionsDAG::makeConvertingActions(
-            query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
+            query_node_plan->getCurrentHeader().getColumnsWithTypeAndName(),
             union_common_header.getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Position);
-        auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
+        auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentHeader(), std::move(actions_dag));
         converting_step->setStepDescription("Conversion before UNION");
         query_node_plan->addStep(std::move(converting_step));
 
-        query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
+        query_plans_headers[i] = query_node_plan->getCurrentHeader();
     }
 
     const auto & query_context = planner_context->getQueryContext();
@@ -1365,7 +1361,7 @@ void Planner::buildPlanForUnionNode()
 
     if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT)
     {
-        auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
+        auto union_step = std::make_unique<UnionStep>(std::move(query_plans_headers), max_threads);
         query_plan.unitePlans(std::move(union_step), std::move(query_plans));
     }
     else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT
@@ -1383,7 +1379,7 @@ void Planner::buildPlanForUnionNode()
             intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT;
 
         auto union_step
-            = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
+            = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_headers), intersect_or_except_operator, max_threads);
         query_plan.unitePlans(std::move(union_step), std::move(query_plans));
     }
 
@@ -1393,10 +1389,10 @@ void Planner::buildPlanForUnionNode()
         SizeLimits limits(settings[Setting::max_rows_in_distinct], settings[Setting::max_bytes_in_distinct], settings[Setting::distinct_overflow_mode]);
 
         auto distinct_step = std::make_unique<DistinctStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             limits,
             0 /*limit hint*/,
-            query_plan.getCurrentDataStream().header.getNames(),
+            query_plan.getCurrentHeader().getNames(),
             false /*pre distinct*/);
         query_plan.addStep(std::move(distinct_step));
     }
@@ -1558,7 +1554,7 @@ void Planner::buildPlanForQueryNode()
     PlannerQueryProcessingInfo query_processing_info(from_stage, select_query_options.to_stage);
     QueryAnalysisResult query_analysis_result(query_tree, query_processing_info, planner_context);
     auto expression_analysis_result = buildExpressionAnalysisResult(query_tree,
-        query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+        query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
         planner_context,
         query_processing_info);
 
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 805745fe5f9..39c1352c9cf 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -637,9 +637,9 @@ UInt64 mainQueryNodeBlockSizeByLimit(const SelectQueryInfo & select_query_info)
 }
 
 std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
-    std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const DataStream & current_data_stream)
+    std::unordered_map<std::string, ActionsDAG> & alias_column_expressions, const Header & current_header)
 {
-    ActionsDAG merged_alias_columns_actions_dag(current_data_stream.header.getColumnsWithTypeAndName());
+    ActionsDAG merged_alias_columns_actions_dag(current_header.getColumnsWithTypeAndName());
     ActionsDAG::NodeRawConstPtrs action_dag_outputs = merged_alias_columns_actions_dag.getInputs();
 
     for (auto & [column_name, alias_column_actions_dag] : alias_column_expressions)
@@ -653,7 +653,7 @@ std::unique_ptr<ExpressionStep> createComputeAliasColumnsStep(
         merged_alias_columns_actions_dag.addOrReplaceInOutputs(*output_node);
     merged_alias_columns_actions_dag.removeUnusedActions(false);
 
-    auto alias_column_step = std::make_unique<ExpressionStep>(current_data_stream, std::move(merged_alias_columns_actions_dag));
+    auto alias_column_step = std::make_unique<ExpressionStep>(current_header, std::move(merged_alias_columns_actions_dag));
     alias_column_step->setStepDescription("Compute alias columns");
     return alias_column_step;
 }
@@ -1065,7 +1065,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                 auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
                 if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns)
                 {
-                    auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream());
+                    auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentHeader());
                     query_plan.addStep(std::move(alias_column_step));
                 }
 
@@ -1074,7 +1074,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
                     if (query_plan.isInitialized() &&
                         from_stage == QueryProcessingStage::FetchColumns)
                     {
-                        auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
+                        auto filter_step = std::make_unique<FilterStep>(query_plan.getCurrentHeader(),
                             std::move(filter_info.actions),
                             filter_info.column_name,
                             filter_info.do_remove_column);
@@ -1154,7 +1154,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
         auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
         if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns)
         {
-            auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentDataStream());
+            auto alias_column_step = createComputeAliasColumnsStep(alias_column_expressions, query_plan.getCurrentHeader());
             query_plan.addStep(std::move(alias_column_step));
         }
     }
@@ -1166,7 +1166,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
 
     if (from_stage == QueryProcessingStage::FetchColumns)
     {
-        ActionsDAG rename_actions_dag(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+        ActionsDAG rename_actions_dag(query_plan.getCurrentHeader().getColumnsWithTypeAndName());
         ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs;
 
         for (auto & output_node : rename_actions_dag.getOutputs())
@@ -1180,7 +1180,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
 
         rename_actions_dag.getOutputs() = std::move(updated_actions_dag_outputs);
 
-        auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
+        auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(rename_actions_dag));
         rename_step->setStepDescription("Change column names to column identifiers");
         query_plan.addStep(std::move(rename_step));
     }
@@ -1192,18 +1192,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
             select_query_info.planner_context);
         planner.buildQueryPlanIfNeeded();
 
-        auto expected_header = planner.getQueryPlan().getCurrentDataStream().header;
+        auto expected_header = planner.getQueryPlan().getCurrentHeader();
 
-        if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, expected_header))
+        if (!blocksHaveEqualStructure(query_plan.getCurrentHeader(), expected_header))
         {
             materializeBlockInplace(expected_header);
 
             auto rename_actions_dag = ActionsDAG::makeConvertingActions(
-                query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+                query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
                 expected_header.getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Position,
                 true /*ignore_constant_values*/);
-            auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
+            auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(rename_actions_dag));
             std::string step_description = table_expression_data.isRemote() ? "Change remote column names to local column names" : "Change column names";
             rename_step->setStepDescription(std::move(step_description));
             query_plan.addStep(std::move(rename_step));
@@ -1220,7 +1220,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
 
 void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextPtr & planner_context, const FunctionOverloadResolverPtr & to_nullable_function)
 {
-    ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
+    ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentHeader().getColumnsWithTypeAndName());
 
     for (auto & output_node : cast_actions_dag.getOutputs())
     {
@@ -1235,8 +1235,8 @@ void joinCastPlanColumnsToNullable(QueryPlan & plan_to_add_cast, PlannerContextP
         }
     }
 
-    cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header);
-    auto cast_join_columns_step = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
+    cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentHeader());
+    auto cast_join_columns_step = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentHeader(), std::move(cast_actions_dag));
     cast_join_columns_step->setStepDescription("Cast JOIN columns to Nullable");
     plan_to_add_cast.addStep(std::move(cast_join_columns_step));
 }
@@ -1255,7 +1255,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             QueryProcessingStage::toString(left_join_tree_query_plan.from_stage));
 
     auto left_plan = std::move(left_join_tree_query_plan.query_plan);
-    auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+    auto left_plan_output_columns = left_plan.getCurrentHeader().getColumnsWithTypeAndName();
     if (right_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
         throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
             "JOIN {} right table expression expected to process query to fetch columns stage. Actual {}",
@@ -1263,7 +1263,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             QueryProcessingStage::toString(right_join_tree_query_plan.from_stage));
 
     auto right_plan = std::move(right_join_tree_query_plan.query_plan);
-    auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+    auto right_plan_output_columns = right_plan.getCurrentHeader().getColumnsWithTypeAndName();
 
     JoinClausesAndActions join_clauses_and_actions;
     JoinKind join_kind = join_node.getKind();
@@ -1281,14 +1281,14 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             join_table_expression,
             planner_context);
 
-        join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentDataStream().header);
-        auto left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.left_join_expressions_actions));
+        join_clauses_and_actions.left_join_expressions_actions.appendInputsForUnusedColumns(left_plan.getCurrentHeader());
+        auto left_join_expressions_actions_step = std::make_unique<ExpressionStep>(left_plan.getCurrentHeader(), std::move(join_clauses_and_actions.left_join_expressions_actions));
         left_join_expressions_actions_step->setStepDescription("JOIN actions");
         appendSetsFromActionsDAG(left_join_expressions_actions_step->getExpression(), left_join_tree_query_plan.useful_sets);
         left_plan.addStep(std::move(left_join_expressions_actions_step));
 
-        join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentDataStream().header);
-        auto right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentDataStream(), std::move(join_clauses_and_actions.right_join_expressions_actions));
+        join_clauses_and_actions.right_join_expressions_actions.appendInputsForUnusedColumns(right_plan.getCurrentHeader());
+        auto right_join_expressions_actions_step = std::make_unique<ExpressionStep>(right_plan.getCurrentHeader(), std::move(join_clauses_and_actions.right_join_expressions_actions));
         right_join_expressions_actions_step->setStepDescription("JOIN actions");
         appendSetsFromActionsDAG(right_join_expressions_actions_step->getExpression(), right_join_tree_query_plan.useful_sets);
         right_plan.addStep(std::move(right_join_expressions_actions_step));
@@ -1328,7 +1328,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
 
     auto join_cast_plan_output_nodes = [&](QueryPlan & plan_to_add_cast, std::unordered_map<std::string, DataTypePtr> & plan_column_name_to_cast_type)
     {
-        ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentDataStream().header.getColumnsWithTypeAndName());
+        ActionsDAG cast_actions_dag(plan_to_add_cast.getCurrentHeader().getColumnsWithTypeAndName());
 
         for (auto & output_node : cast_actions_dag.getOutputs())
         {
@@ -1340,9 +1340,9 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             output_node = &cast_actions_dag.addCast(*output_node, cast_type, output_node->result_name);
         }
 
-        cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentDataStream().header);
+        cast_actions_dag.appendInputsForUnusedColumns(plan_to_add_cast.getCurrentHeader());
         auto cast_join_columns_step
-            = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentDataStream(), std::move(cast_actions_dag));
+            = std::make_unique<ExpressionStep>(plan_to_add_cast.getCurrentHeader(), std::move(cast_actions_dag));
         cast_join_columns_step->setStepDescription("Cast JOIN USING columns");
         plan_to_add_cast.addStep(std::move(cast_join_columns_step));
     };
@@ -1512,11 +1512,11 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
         }
     }
 
-    const Block & left_header = left_plan.getCurrentDataStream().header;
+    const Block & left_header = left_plan.getCurrentHeader();
     auto left_table_names = left_header.getNames();
     NameSet left_table_names_set(left_table_names.begin(), left_table_names.end());
 
-    auto columns_from_joined_table = right_plan.getCurrentDataStream().header.getNamesAndTypesList();
+    auto columns_from_joined_table = right_plan.getCurrentHeader().getNamesAndTypesList();
     table_join->setColumnsFromJoinedTable(columns_from_joined_table, left_table_names_set, "");
 
     for (auto & column_from_joined_table : columns_from_joined_table)
@@ -1527,7 +1527,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             table_join->addJoinedColumn(column_from_joined_table);
     }
 
-    const Block & right_header = right_plan.getCurrentDataStream().header;
+    const Block & right_header = right_plan.getCurrentHeader();
     auto join_algorithm = chooseJoinAlgorithm(table_join, join_node.getRightTableExpression(), left_header, right_header, planner_context);
 
     auto result_plan = QueryPlan();
@@ -1536,7 +1536,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
     if (is_filled_join)
     {
         auto filled_join_step
-            = std::make_unique<FilledJoinStep>(left_plan.getCurrentDataStream(), join_algorithm, settings[Setting::max_block_size]);
+            = std::make_unique<FilledJoinStep>(left_plan.getCurrentHeader(), join_algorithm, settings[Setting::max_block_size]);
 
         filled_join_step->setStepDescription("Filled JOIN");
         left_plan.addStep(std::move(filled_join_step));
@@ -1555,7 +1555,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             SortingStep::Settings sort_settings(*query_context);
 
             auto sorting_step = std::make_unique<SortingStep>(
-                plan.getCurrentDataStream(),
+                plan.getCurrentHeader(),
                 std::move(sort_description),
                 0 /*limit*/,
                 sort_settings);
@@ -1567,7 +1567,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
         auto add_create_set = [&settings, crosswise_connection](QueryPlan & plan, const Names & key_names, JoinTableSide join_table_side)
         {
             auto creating_set_step = std::make_unique<CreateSetAndFilterOnTheFlyStep>(
-                plan.getCurrentDataStream(), key_names, settings[Setting::max_rows_in_set_to_optimize_join], crosswise_connection, join_table_side);
+                plan.getCurrentHeader(), key_names, settings[Setting::max_rows_in_set_to_optimize_join], crosswise_connection, join_table_side);
             creating_set_step->setStepDescription(fmt::format("Create set and filter {} joined stream", join_table_side));
 
             auto * step_raw_ptr = creating_set_step.get();
@@ -1598,8 +1598,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             /// Sorting on a stream with const keys can start returning rows immediately and pipeline may stuck.
             /// Note: it's also doesn't work with the read-in-order optimization.
             /// No checks here because read in order is not applied if we have `CreateSetAndFilterOnTheFlyStep` in the pipeline between the reading and sorting steps.
-            bool has_non_const_keys = has_non_const(left_plan.getCurrentDataStream().header, join_clause.key_names_left)
-                && has_non_const(right_plan.getCurrentDataStream().header, join_clause.key_names_right);
+            bool has_non_const_keys = has_non_const(left_plan.getCurrentHeader(), join_clause.key_names_left)
+                && has_non_const(right_plan.getCurrentHeader(), join_clause.key_names_right);
 
             if (settings[Setting::max_rows_in_set_to_optimize_join] > 0 && join_type_allows_filtering && has_non_const_keys)
             {
@@ -1619,8 +1619,8 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
 
         auto join_pipeline_type = join_algorithm->pipelineType();
         auto join_step = std::make_unique<JoinStep>(
-            left_plan.getCurrentDataStream(),
-            right_plan.getCurrentDataStream(),
+            left_plan.getCurrentHeader(),
+            right_plan.getCurrentHeader(),
             std::move(join_algorithm),
             settings[Setting::max_block_size],
             settings[Setting::max_threads],
@@ -1635,7 +1635,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
         result_plan.unitePlans(std::move(join_step), {std::move(plans)});
     }
 
-    ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+    ActionsDAG drop_unused_columns_after_join_actions_dag(result_plan.getCurrentHeader().getColumnsWithTypeAndName());
     ActionsDAG::NodeRawConstPtrs drop_unused_columns_after_join_actions_dag_updated_outputs;
     std::unordered_set<std::string_view> drop_unused_columns_after_join_actions_dag_updated_outputs_names;
     std::optional<size_t> first_skipped_column_node_index;
@@ -1672,7 +1672,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
 
     drop_unused_columns_after_join_actions_dag_outputs = std::move(drop_unused_columns_after_join_actions_dag_updated_outputs);
 
-    auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentDataStream(), std::move(drop_unused_columns_after_join_actions_dag));
+    auto drop_unused_columns_after_join_transform_step = std::make_unique<ExpressionStep>(result_plan.getCurrentHeader(), std::move(drop_unused_columns_after_join_actions_dag));
     drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN");
     result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step));
 
@@ -1710,7 +1710,7 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
             QueryProcessingStage::toString(join_tree_query_plan.from_stage));
 
     auto plan = std::move(join_tree_query_plan.query_plan);
-    auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+    auto plan_output_columns = plan.getCurrentHeader().getColumnsWithTypeAndName();
 
     ActionsDAG array_join_action_dag(plan_output_columns);
     PlannerActionsVisitor actions_visitor(planner_context);
@@ -1734,14 +1734,14 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
         }
     }
 
-    array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentDataStream().header);
+    array_join_action_dag.appendInputsForUnusedColumns(plan.getCurrentHeader());
 
-    auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(array_join_action_dag));
+    auto array_join_actions = std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(array_join_action_dag));
     array_join_actions->setStepDescription("ARRAY JOIN actions");
     appendSetsFromActionsDAG(array_join_actions->getExpression(), join_tree_query_plan.useful_sets);
     plan.addStep(std::move(array_join_actions));
 
-    ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+    ActionsDAG drop_unused_columns_before_array_join_actions_dag(plan.getCurrentHeader().getColumnsWithTypeAndName());
     ActionsDAG::NodeRawConstPtrs drop_unused_columns_before_array_join_actions_dag_updated_outputs;
     std::unordered_set<std::string_view> drop_unused_columns_before_array_join_actions_dag_updated_outputs_names;
 
@@ -1765,14 +1765,14 @@ JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_
 
     drop_unused_columns_before_array_join_actions_dag_outputs = std::move(drop_unused_columns_before_array_join_actions_dag_updated_outputs);
 
-    auto drop_unused_columns_before_array_join_transform_step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(),
+    auto drop_unused_columns_before_array_join_transform_step = std::make_unique<ExpressionStep>(plan.getCurrentHeader(),
         std::move(drop_unused_columns_before_array_join_actions_dag));
     drop_unused_columns_before_array_join_transform_step->setStepDescription("DROP unused columns before ARRAY JOIN");
     plan.addStep(std::move(drop_unused_columns_before_array_join_transform_step));
 
     const auto & settings = planner_context->getQueryContext()->getSettingsRef();
     auto array_join_step = std::make_unique<ArrayJoinStep>(
-        plan.getCurrentDataStream(),
+        plan.getCurrentHeader(),
         ArrayJoin{std::move(array_join_column_names), array_join_node.isLeft()},
         settings[Setting::enable_unaligned_array_join],
         settings[Setting::max_block_size]);
diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp
index f2f2c881af6..b97a9a36381 100644
--- a/src/Planner/findParallelReplicasQuery.cpp
+++ b/src/Planner/findParallelReplicasQuery.cpp
@@ -446,7 +446,7 @@ JoinTreeQueryPlan buildQueryPlanForParallelReplicas(
     /// header is a header which is returned by the follower.
     /// They are different because tables will have different aliases (e.g. _table1 or _table5).
     /// Here we just rename columns by position, with the hope the types would match.
-    auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting));
+    auto step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(converting));
     step->setStepDescription("Convert distributed names");
     query_plan.addStep(std::move(step));
 
diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp
index 57b9743877c..2da5118afe1 100644
--- a/src/Processors/Formats/IRowInputFormat.cpp
+++ b/src/Processors/Formats/IRowInputFormat.cpp
@@ -274,7 +274,8 @@ size_t IRowInputFormat::countRows(size_t)
 
 void IRowInputFormat::setSerializationHints(const SerializationInfoByName & hints)
 {
-    serializations = getPort().getHeader().getSerializations(hints);
+    if (supportsCustomSerializations())
+        serializations = getPort().getHeader().getSerializations(hints);
 }
 
 
diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h
index fd302204a38..b210d651e70 100644
--- a/src/Processors/Formats/IRowInputFormat.h
+++ b/src/Processors/Formats/IRowInputFormat.h
@@ -59,6 +59,7 @@ protected:
     /// `max_block_size` can be ignored.
     virtual size_t countRows(size_t max_block_size);
     virtual bool supportsCountRows() const { return false; }
+    virtual bool supportsCustomSerializations() const { return false; }
 
     virtual void readPrefix() {}                /// delimiter before begin of result
     virtual void readSuffix() {}                /// delimiter after end of result
diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
index 24ffdc10581..fca954b6cfb 100644
--- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
+++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h
@@ -15,6 +15,7 @@
 #include <arrow/type_fwd.h>
 #include <boost/algorithm/string/case_conv.hpp>
 #include <Common/Exception.h>
+#include <parquet/metadata.h>
 
 
 namespace arrow
@@ -65,11 +66,22 @@ public:
         return result;
     }
 
+    // For a parquet schema {x: {i: int, j: int}}, this should be populated as follows
+    // clickhouse_index = 0, parquet_indexes = {0, 1}
+    struct ClickHouseIndexToParquetIndex
+    {
+        std::size_t clickhouse_index;
+        std::vector<int> parquet_indexes;
+    };
+
     /// Only collect the required fields' indices. Eg. when just read a field of a struct,
     /// don't need to collect the whole indices in this struct.
-    std::vector<int> findRequiredIndices(const Block & header, const arrow::Schema & schema)
+    std::vector<ClickHouseIndexToParquetIndex> findRequiredIndices(
+        const Block & header,
+        const arrow::Schema & schema,
+        const parquet::FileMetaData & file)
     {
-        std::vector<int> required_indices;
+        std::vector<ClickHouseIndexToParquetIndex> required_indices;
         std::unordered_set<int> added_indices;
         /// Flat all named fields' index information into a map.
         auto fields_indices = calculateFieldIndices(schema);
@@ -79,7 +91,7 @@ public:
             std::string col_name = named_col.name;
             if (ignore_case)
                 boost::to_lower(col_name);
-            findRequiredIndices(col_name, named_col.type, fields_indices, added_indices, required_indices);
+            findRequiredIndices(col_name, i, named_col.type, fields_indices, added_indices, required_indices, file);
         }
         return required_indices;
     }
@@ -169,10 +181,12 @@ private:
 
     void findRequiredIndices(
         const String & name,
+        std::size_t header_index,
         DataTypePtr data_type,
         const std::unordered_map<std::string, std::pair<int, int>> & field_indices,
         std::unordered_set<int> & added_indices,
-        std::vector<int> & required_indices)
+        std::vector<ClickHouseIndexToParquetIndex> & required_indices,
+        const parquet::FileMetaData & file)
     {
         auto nested_type = removeNullable(data_type);
         if (const DB::DataTypeTuple * type_tuple = typeid_cast<const DB::DataTypeTuple *>(nested_type.get()))
@@ -187,20 +201,20 @@ private:
                     if (ignore_case)
                         boost::to_lower(field_name);
                     const auto & field_type = field_types[i];
-                    findRequiredIndices(Nested::concatenateName(name, field_name), field_type, field_indices, added_indices, required_indices);
+                    findRequiredIndices(Nested::concatenateName(name, field_name), header_index, field_type, field_indices, added_indices, required_indices, file);
                 }
                 return;
             }
         }
         else if (const auto * type_array = typeid_cast<const DB::DataTypeArray *>(nested_type.get()))
         {
-            findRequiredIndices(name, type_array->getNestedType(), field_indices, added_indices, required_indices);
+            findRequiredIndices(name, header_index, type_array->getNestedType(), field_indices, added_indices, required_indices, file);
             return;
         }
         else if (const auto * type_map = typeid_cast<const DB::DataTypeMap *>(nested_type.get()))
         {
-            findRequiredIndices(name, type_map->getKeyType(), field_indices, added_indices, required_indices);
-            findRequiredIndices(name, type_map->getValueType(), field_indices, added_indices, required_indices);
+            findRequiredIndices(name, header_index, type_map->getKeyType(), field_indices, added_indices, required_indices, file);
+            findRequiredIndices(name, header_index, type_map->getValueType(), field_indices, added_indices, required_indices, file);
             return;
         }
         auto it = field_indices.find(name);
@@ -211,14 +225,18 @@ private:
         }
         else
         {
+            ClickHouseIndexToParquetIndex index_mapping;
+            index_mapping.clickhouse_index = header_index;
             for (int j = 0; j < it->second.second; ++j)
             {
                 auto index = it->second.first + j;
                 if (added_indices.insert(index).second)
                 {
-                    required_indices.emplace_back(index);
+                    index_mapping.parquet_indexes.emplace_back(index);
                 }
             }
+
+            required_indices.emplace_back(index_mapping);
         }
     }
 };
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
index d97aa2dad8d..b1163f7e883 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h
@@ -43,6 +43,7 @@ private:
 
     size_t countRows(size_t max_block_size) override;
     bool supportsCountRows() const override { return true; }
+    bool supportsCustomSerializations() const override { return true; }
 
     const String & columnName(size_t i) const;
     size_t columnIndex(StringRef name, size_t key_index);
diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
index 182c1faa68a..9bf3c3e6cbb 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp
@@ -6,6 +6,7 @@
 #    include <Columns/ColumnMap.h>
 #    include <Columns/ColumnNullable.h>
 #    include <Columns/ColumnString.h>
+#    include <Columns/ColumnsCommon.h>
 #    include <Columns/ColumnsDateTime.h>
 #    include <Columns/ColumnsNumber.h>
 #    include <DataTypes/DataTypeArray.h>
@@ -16,13 +17,13 @@
 #    include <DataTypes/DataTypeIPv4andIPv6.h>
 #    include <DataTypes/DataTypeLowCardinality.h>
 #    include <DataTypes/DataTypeMap.h>
+#    include <DataTypes/DataTypeNested.h>
 #    include <DataTypes/DataTypeNullable.h>
 #    include <DataTypes/DataTypeString.h>
 #    include <DataTypes/DataTypeTuple.h>
 #    include <DataTypes/DataTypesDecimal.h>
 #    include <DataTypes/DataTypesNumber.h>
 #    include <DataTypes/NestedUtils.h>
-#    include <DataTypes/DataTypeNested.h>
 #    include <Formats/FormatFactory.h>
 #    include <Formats/SchemaInferenceUtils.h>
 #    include <Formats/insertNullAsDefaultIfNeeded.h>
@@ -35,6 +36,8 @@
 #    include <Common/FieldVisitorsAccurateComparison.h>
 #    include "ArrowBufferedStreams.h"
 
+#    include <orc/Vector.hh>
+
 
 namespace DB
 {
@@ -110,7 +113,21 @@ static const orc::Type * getORCTypeByName(const orc::Type & schema, const String
     return nullptr;
 }
 
-static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_with_unsupported_types, bool & skipped)
+static bool isDictionaryEncoded(const orc::StripeInformation * stripe_info, const orc::Type * orc_type)
+{
+    if (!stripe_info)
+        return false;
+
+    auto encoding = stripe_info->getColumnEncoding(orc_type->getColumnId());
+    return encoding == orc::ColumnEncodingKind_DICTIONARY || encoding == orc::ColumnEncodingKind_DICTIONARY_V2;
+}
+
+static DataTypePtr parseORCType(
+    const orc::Type * orc_type,
+    bool skip_columns_with_unsupported_types,
+    bool dictionary_as_low_cardinality,
+    const orc::StripeInformation * stripe_info,
+    bool & skipped)
 {
     assert(orc_type != nullptr);
 
@@ -137,12 +154,22 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
             return std::make_shared<DataTypeDateTime64>(9);
         case orc::TypeKind::TIMESTAMP_INSTANT:
             return std::make_shared<DataTypeDateTime64>(9, "UTC");
+        case orc::TypeKind::CHAR:
         case orc::TypeKind::VARCHAR:
         case orc::TypeKind::BINARY:
-        case orc::TypeKind::STRING:
-            return std::make_shared<DataTypeString>();
-        case orc::TypeKind::CHAR:
-            return std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
+        case orc::TypeKind::STRING: {
+            DataTypePtr type;
+            if (orc_type->getKind() == orc::TypeKind::CHAR)
+                type = std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
+            else
+                type = std::make_shared<DataTypeString>();
+
+            /// Wrap type in LowCardinality if ORC column is dictionary encoded and dictionary_as_low_cardinality is true
+            if (dictionary_as_low_cardinality && isDictionaryEncoded(stripe_info, orc_type))
+                type = std::make_shared<DataTypeLowCardinality>(type);
+
+            return type;
+        }
         case orc::TypeKind::DECIMAL: {
             UInt64 precision = orc_type->getPrecision();
             UInt64 scale = orc_type->getScale();
@@ -157,7 +184,8 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
             if (subtype_count != 1)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc List type {}", orc_type->toString());
 
-            DataTypePtr nested_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped);
+            DataTypePtr nested_type = parseORCType(
+                orc_type->getSubtype(0), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
             if (skipped)
                 return {};
 
@@ -167,11 +195,12 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
             if (subtype_count != 2)
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid Orc Map type {}", orc_type->toString());
 
-            DataTypePtr key_type = parseORCType(orc_type->getSubtype(0), skip_columns_with_unsupported_types, skipped);
+            DataTypePtr key_type = parseORCType(
+                orc_type->getSubtype(0), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
             if (skipped)
                 return {};
 
-            DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, skipped);
+            DataTypePtr value_type = parseORCType(orc_type->getSubtype(1), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
             if (skipped)
                 return {};
 
@@ -185,7 +214,8 @@ static DataTypePtr parseORCType(const orc::Type * orc_type, bool skip_columns_wi
 
             for (size_t i = 0; i < orc_type->getSubtypeCount(); ++i)
             {
-                auto parsed_type = parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, skipped);
+                auto parsed_type
+                    = parseORCType(orc_type->getSubtype(i), skip_columns_with_unsupported_types, dictionary_as_low_cardinality, stripe_info, skipped);
                 if (skipped)
                     return {};
 
@@ -487,7 +517,7 @@ static void buildORCSearchArgumentImpl(
             ///     For queries with where condition like "a > 10", if a column contains negative values such as "-1", pushing or not pushing
             ///     down filters would result in different outputs.
             bool skipped = false;
-            auto expect_type = makeNullableRecursively(parseORCType(orc_type, true, skipped));
+            auto expect_type = makeNullableRecursively(parseORCType(orc_type, true, false, nullptr, skipped));
             const ColumnWithTypeAndName * column = header.findByName(column_name, format_settings.orc.case_insensitive_column_matching);
             if (!expect_type || !column)
             {
@@ -741,7 +771,7 @@ static const orc::Type * traverseDownORCTypeByName(
     if (orc::LIST == orc_type->getKind())
     {
         /// For cases in which header contains subcolumns flattened from nested columns.
-        /// For example, "a Nested(x String, y Int64)" is flattened to "a.x Array(String), a.y Array(Int64)", and orc file schema is still "a array<struct<x string, y long>>".
+        /// For example, "a Nested(x String, y Int64)" is flattened to "a.x Array(String), a.y Array(Int64)", and ORC file schema is still "a array<struct<x string, y long>>".
         /// In this case, we should skip possible array type and traverse down to its nested struct type.
         const auto * array_type = typeid_cast<const DataTypeArray *>(removeNullable(type).get());
         const auto * orc_nested_type = orc_type->getSubtype(0);
@@ -793,7 +823,7 @@ static void updateIncludeTypeIds(
             return;
         }
         case orc::STRUCT: {
-            /// To make sure tuple field pruning work fine, we should include only the fields of orc struct type which are also contained in CH tuple types, instead of all fields of orc struct type.
+            /// To make sure tuple field pruning work fine, we should include only the fields of ORC struct type which are also contained in CH tuple types, instead of all fields of ORC struct type.
             /// For example, CH tupe type in header is "x Tuple(a String)", ORC struct type is "x struct<a:string, b:long>", then only type id of field "x.a" should be included.
             /// For tuple field pruning purpose, we should never include "x.b" for it is not required in format header.
             const auto * tuple_type = typeid_cast<const DataTypeTuple *>(non_nullable_type.get());
@@ -860,11 +890,17 @@ void NativeORCBlockInputFormat::prepareFileReader()
     total_stripes = static_cast<int>(file_reader->getNumberOfStripes());
     current_stripe = -1;
 
+
+    std::unique_ptr<orc::StripeInformation> stripe_info;
+    if (file_reader->getNumberOfStripes())
+        stripe_info = file_reader->getStripe(0);
+
     orc_column_to_ch_column = std::make_unique<ORCColumnToCHColumn>(
         getPort().getHeader(),
         format_settings.orc.allow_missing_columns,
         format_settings.null_as_default,
-        format_settings.orc.case_insensitive_column_matching);
+        format_settings.orc.case_insensitive_column_matching,
+        format_settings.orc.dictionary_as_low_cardinality);
 
     const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
     const auto & header = getPort().getHeader();
@@ -902,6 +938,7 @@ bool NativeORCBlockInputFormat::prepareStripeReader()
         throw Exception(ErrorCodes::INCORRECT_DATA, "ORC stripe {} has no rows", current_stripe);
 
     orc::RowReaderOptions row_reader_options;
+    row_reader_options.setEnableLazyDecoding(format_settings.orc.dictionary_as_low_cardinality);
     row_reader_options.includeTypes(include_indices);
     row_reader_options.setTimezoneName(format_settings.orc.reader_time_zone_name);
     row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength());
@@ -992,15 +1029,25 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
     std::atomic<int> is_stopped = 0;
     getFileReader(in, file_reader, format_settings, is_stopped);
 
+
     const auto & schema = file_reader->getType();
     Block header;
+    std::unique_ptr<orc::StripeInformation> stripe_info;
+    if (file_reader->getNumberOfStripes())
+        stripe_info = file_reader->getStripe(0);
+
     for (size_t i = 0; i < schema.getSubtypeCount(); ++i)
     {
         const std::string & name = schema.getFieldName(i);
         const orc::Type * orc_type = schema.getSubtype(i);
 
         bool skipped = false;
-        DataTypePtr type = parseORCType(orc_type, format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, skipped);
+        DataTypePtr type = parseORCType(
+            orc_type,
+            format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
+            format_settings.orc.dictionary_as_low_cardinality,
+            stripe_info.get(),
+            skipped);
         if (!skipped)
             header.insert(ColumnWithTypeAndName{type, name});
     }
@@ -1011,11 +1058,16 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
 }
 
 ORCColumnToCHColumn::ORCColumnToCHColumn(
-    const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_)
+    const Block & header_,
+    bool allow_missing_columns_,
+    bool null_as_default_,
+    bool case_insensitive_matching_,
+    bool dictionary_as_low_cardinality_)
     : header(header_)
     , allow_missing_columns(allow_missing_columns_)
     , null_as_default(null_as_default_)
     , case_insensitive_matching(case_insensitive_matching_)
+    , dictionary_as_low_cardinality(dictionary_as_low_cardinality_)
 {
 }
 
@@ -1129,6 +1181,120 @@ readColumnWithNumericDataCast(const orc::ColumnVectorBatch * orc_column, const o
     return {std::move(internal_column), std::move(internal_type), column_name};
 }
 
+template <bool fixed_string>
+static ColumnWithTypeAndName readColumnWithEncodedStringOrFixedStringData(
+    const orc::ColumnVectorBatch * orc_column, const orc::Type * orc_type, const String & column_name, bool nullable)
+{
+    /// Fill CH holder_column with ORC dictionary
+    /// Note that holder_column is always a ColumnString or ColumnFixedstring whether nullable is true or false, because ORC dictionary doesn't contain null values.
+    DataTypePtr holder_type;
+    if constexpr (fixed_string)
+        holder_type = std::make_shared<DataTypeFixedString>(orc_type->getMaximumLength());
+    else
+        holder_type = std::make_shared<DataTypeString>();
+
+    DataTypePtr nested_type = nullable ? std::make_shared<DataTypeNullable>(holder_type) : holder_type;
+    auto internal_type = std::make_shared<DataTypeLowCardinality>(std::move(nested_type));
+
+    const auto & orc_str_column = dynamic_cast<const orc::EncodedStringVectorBatch &>(*orc_column);
+    size_t rows = orc_str_column.numElements;
+    const auto & orc_dict = *orc_str_column.dictionary;
+    if (orc_dict.dictionaryOffset.size() <= 1)
+        return {internal_type->createColumn(), internal_type, column_name};
+
+    size_t dict_size = orc_dict.dictionaryOffset.size() - 1;
+    auto holder_column = holder_type->createColumn();
+    if constexpr (fixed_string)
+    {
+        const size_t n = orc_type->getMaximumLength();
+        auto & concrete_holder_column = assert_cast<ColumnFixedString &>(*holder_column);
+        PaddedPODArray<UInt8> & column_chars_t = concrete_holder_column.getChars();
+        size_t reserve_size = dict_size * n;
+        column_chars_t.resize_exact(reserve_size);
+        size_t curr_offset = 0;
+        for (size_t i = 0; i < dict_size; ++i)
+        {
+            const auto * buf = orc_dict.dictionaryBlob.data() + orc_dict.dictionaryOffset[i];
+            size_t buf_size = orc_dict.dictionaryOffset[i + 1] - orc_dict.dictionaryOffset[i];
+            memcpy(&column_chars_t[curr_offset], buf, buf_size);
+            curr_offset += n;
+        }
+    }
+    else
+    {
+        auto & concrete_holder_column = assert_cast<ColumnString &>(*holder_column);
+        PaddedPODArray<UInt8> & column_chars_t = concrete_holder_column.getChars();
+        PaddedPODArray<UInt64> & column_offsets = concrete_holder_column.getOffsets();
+
+        size_t reserve_size = orc_dict.dictionaryBlob.size() + dict_size;
+        column_chars_t.resize_exact(reserve_size);
+        column_offsets.resize_exact(dict_size);
+        size_t curr_offset = 0;
+        for (size_t i = 0; i < dict_size; ++i)
+        {
+            const auto * buf = orc_dict.dictionaryBlob.data() + orc_dict.dictionaryOffset[i];
+            size_t buf_size = orc_dict.dictionaryOffset[i + 1] - orc_dict.dictionaryOffset[i];
+            memcpy(&column_chars_t[curr_offset], buf, buf_size);
+            curr_offset += buf_size;
+
+            column_chars_t[curr_offset] = 0;
+            ++curr_offset;
+
+            column_offsets[i] = curr_offset;
+        }
+    }
+
+    /// Insert CH dictionary_column from holder_column
+    auto tmp_internal_column = internal_type->createColumn();
+    auto dictionary_column = IColumn::mutate(assert_cast<ColumnLowCardinality *>(tmp_internal_column.get())->getDictionaryPtr());
+    auto index_column
+        = dynamic_cast<IColumnUnique *>(dictionary_column.get())->uniqueInsertRangeFrom(*holder_column, 0, holder_column->size());
+
+    /// Fill index_column and wrap it with LowCardinality
+    auto call_by_type = [&](auto index_type) -> MutableColumnPtr
+    {
+        using IndexType = decltype(index_type);
+        const ColumnVector<IndexType> * concrete_index_column = checkAndGetColumn<ColumnVector<IndexType>>(index_column.get());
+        if (!concrete_index_column)
+            return nullptr;
+
+        const auto & index_data = concrete_index_column->getData();
+        auto new_index_column = ColumnVector<IndexType>::create(rows);
+        auto & new_index_data = dynamic_cast<ColumnVector<IndexType> &>(*new_index_column).getData();
+
+        if (!orc_str_column.hasNulls)
+        {
+            for (size_t i = 0; i < rows; ++i)
+            {
+                /// First map row index to ORC dictionary index, then map ORC dictionary index to CH dictionary index
+                new_index_data[i] = index_data[orc_str_column.index[i]];
+            }
+        }
+        else
+        {
+            for (size_t i = 0; i < rows; ++i)
+            {
+                /// Set index 0 if we meet null value. If dictionary_column is nullable, 0 represents null value.
+                /// Otherwise 0 represents default string value, it is reasonable because null values are converted to default values when casting nullable column to non-nullable.
+                new_index_data[i] = orc_str_column.notNull[i] ? index_data[orc_str_column.index[i]] : 0;
+            }
+        }
+
+        return ColumnLowCardinality::create(std::move(dictionary_column), std::move(new_index_column));
+    };
+
+    MutableColumnPtr internal_column;
+    if (!internal_column)
+        internal_column = call_by_type(UInt8());
+    if (!internal_column)
+        internal_column = call_by_type(UInt16());
+    if (!internal_column)
+        internal_column = call_by_type(UInt32());
+    if (!internal_column)
+        internal_column = call_by_type(UInt64());
+    return {std::move(internal_column), std::move(internal_type), column_name};
+}
+
 static ColumnWithTypeAndName
 readColumnWithStringData(const orc::ColumnVectorBatch * orc_column, const orc::Type *, const String & column_name)
 {
@@ -1372,16 +1538,16 @@ readColumnWithTimestampData(const orc::ColumnVectorBatch * orc_column, const orc
     return {std::move(internal_column), std::move(internal_type), column_name};
 }
 
-static ColumnWithTypeAndName readColumnFromORCColumn(
+ColumnWithTypeAndName ORCColumnToCHColumn::readColumnFromORCColumn(
     const orc::ColumnVectorBatch * orc_column,
     const orc::Type * orc_type,
     const std::string & column_name,
     bool inside_nullable,
-    DataTypePtr type_hint = nullptr)
+    DataTypePtr type_hint) const
 {
     bool skipped = false;
 
-    if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable()))
+    if (!inside_nullable && (orc_column->hasNulls || (type_hint && type_hint->isNullable())) && !orc_column->isEncoded
         && (orc_type->getKind() != orc::LIST && orc_type->getKind() != orc::MAP && orc_type->getKind() != orc::STRUCT))
     {
         DataTypePtr nested_type_hint;
@@ -1423,7 +1589,14 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
                     default:;
                 }
             }
-            return readColumnWithStringData(orc_column, orc_type, column_name);
+
+            if (orc_column->isEncoded && dictionary_as_low_cardinality)
+            {
+                bool nullable = type_hint ? isNullableOrLowCardinalityNullable(type_hint) : true;
+                return readColumnWithEncodedStringOrFixedStringData<false>(orc_column, orc_type, column_name, nullable);
+            }
+            else
+                return readColumnWithStringData(orc_column, orc_type, column_name);
         }
         case orc::CHAR: {
             if (type_hint)
@@ -1441,7 +1614,14 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
                     default:;
                 }
             }
-            return readColumnWithFixedStringData(orc_column, orc_type, column_name);
+
+            if (orc_column->isEncoded && dictionary_as_low_cardinality)
+            {
+                bool nullable = type_hint ? isNullableOrLowCardinalityNullable(type_hint) : true;
+                return readColumnWithEncodedStringOrFixedStringData<true>(orc_column, orc_type, column_name, nullable);
+            }
+            else
+                return readColumnWithFixedStringData(orc_column, orc_type, column_name);
         }
         case orc::BOOLEAN:
             return readColumnWithBooleanData(orc_column, orc_type, column_name);
@@ -1468,7 +1648,7 @@ static ColumnWithTypeAndName readColumnFromORCColumn(
         case orc::TIMESTAMP_INSTANT:
             return readColumnWithTimestampData(orc_column, orc_type, column_name);
         case orc::DECIMAL: {
-            auto interal_type = parseORCType(orc_type, false, skipped);
+            auto interal_type = parseORCType(orc_type, false, false, nullptr, skipped);
 
             auto precision = orc_type->getPrecision();
             if (precision == 0)
diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h
index a1b93b7b995..7eaff7f3020 100644
--- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.h
@@ -111,7 +111,12 @@ public:
     using ORCColumnWithType = std::pair<ORCColumnPtr, ORCTypePtr>;
     using NameToColumnPtr = std::unordered_map<std::string, ORCColumnWithType>;
 
-    ORCColumnToCHColumn(const Block & header_, bool allow_missing_columns_, bool null_as_default_, bool case_insensitive_matching_ = false);
+    ORCColumnToCHColumn(
+        const Block & header_,
+        bool allow_missing_columns_,
+        bool null_as_default_,
+        bool case_insensitive_matching_ = false,
+        bool dictionary_as_low_cardinality_ = false);
 
     void orcTableToCHChunk(
         Chunk & res,
@@ -124,11 +129,19 @@ public:
         Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr);
 
 private:
+    ColumnWithTypeAndName readColumnFromORCColumn(
+        const orc::ColumnVectorBatch * orc_column,
+        const orc::Type * orc_type,
+        const std::string & column_name,
+        bool inside_nullable,
+        DataTypePtr type_hint = nullptr) const;
+
     const Block & header;
     /// If false, throw exception if some columns in header not exists in arrow table.
     bool allow_missing_columns;
     bool null_as_default;
     bool case_insensitive_matching;
+    bool dictionary_as_low_cardinality;
 };
 }
 #endif
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp
new file mode 100644
index 00000000000..75eeb15a519
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.cpp
@@ -0,0 +1,525 @@
+#include <Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h>
+#include <iostream>
+
+#if USE_PARQUET
+
+#include <parquet/bloom_filter.h>
+#include <parquet/xxhasher.h>
+#include <Interpreters/convertFieldToType.h>
+#include <Columns/ColumnConst.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+
+bool isParquetStringTypeSupportedForBloomFilters(
+    const std::shared_ptr<const parquet::LogicalType> & logical_type,
+    parquet::ConvertedType::type converted_type)
+{
+    if (logical_type &&
+        !logical_type->is_none()
+        && !(logical_type->is_string() || logical_type->is_BSON() || logical_type->is_JSON()))
+    {
+        return false;
+    }
+
+    if (parquet::ConvertedType::type::NONE != converted_type &&
+        !(converted_type == parquet::ConvertedType::JSON || converted_type == parquet::ConvertedType::UTF8
+          || converted_type == parquet::ConvertedType::BSON))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool isParquetIntegerTypeSupportedForBloomFilters(const std::shared_ptr<const parquet::LogicalType> & logical_type, parquet::ConvertedType::type converted_type)
+{
+    if (logical_type && !logical_type->is_none() && !logical_type->is_int())
+    {
+        return false;
+    }
+
+    if (parquet::ConvertedType::type::NONE != converted_type && !(converted_type == parquet::ConvertedType::INT_8 || converted_type == parquet::ConvertedType::INT_16
+        || converted_type == parquet::ConvertedType::INT_32 || converted_type == parquet::ConvertedType::INT_64
+        || converted_type == parquet::ConvertedType::UINT_8 || converted_type == parquet::ConvertedType::UINT_16
+        || converted_type == parquet::ConvertedType::UINT_32 || converted_type == parquet::ConvertedType::UINT_64))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+template <typename T>
+uint64_t hashSpecialFLBATypes(const Field & field)
+{
+    const T & value = field.safeGet<T>();
+
+    parquet::FLBA flba(reinterpret_cast<const uint8_t*>(&value));
+
+    parquet::XxHasher hasher;
+
+    return hasher.Hash(&flba, sizeof(T));
+};
+
+std::optional<uint64_t> tryHashStringWithoutCompatibilityCheck(const Field & field)
+{
+    const auto field_type = field.getType();
+
+    if (field_type != Field::Types::Which::String)
+    {
+        return std::nullopt;
+    }
+
+    parquet::XxHasher hasher;
+    parquet::ByteArray ba { field.safeGet<std::string>() };
+
+    return hasher.Hash(&ba);
+}
+
+std::optional<uint64_t> tryHashString(
+    const Field & field,
+    const std::shared_ptr<const parquet::LogicalType> & logical_type,
+    parquet::ConvertedType::type converted_type)
+{
+    if (!isParquetStringTypeSupportedForBloomFilters(logical_type, converted_type))
+    {
+        return std::nullopt;
+    }
+
+    return tryHashStringWithoutCompatibilityCheck(field);
+}
+
+std::optional<uint64_t> tryHashFLBA(
+    const Field & field,
+    const std::shared_ptr<const parquet::LogicalType> & logical_type,
+    parquet::ConvertedType::type converted_type,
+    std::size_t parquet_column_length)
+{
+    if (!isParquetStringTypeSupportedForBloomFilters(logical_type, converted_type))
+    {
+        return std::nullopt;
+    }
+
+    const auto field_type = field.getType();
+
+    if (field_type == Field::Types::Which::IPv6 && parquet_column_length == sizeof(IPv6))
+    {
+        return hashSpecialFLBATypes<IPv6>(field);
+    }
+
+    return tryHashStringWithoutCompatibilityCheck(field);
+}
+
+template <typename ParquetPhysicalType>
+std::optional<uint64_t> tryHashInt(const Field & field, const std::shared_ptr<const parquet::LogicalType> & logical_type, parquet::ConvertedType::type converted_type)
+{
+    if (!isParquetIntegerTypeSupportedForBloomFilters(logical_type, converted_type))
+    {
+        return std::nullopt;
+    }
+
+    parquet::XxHasher hasher;
+
+    if (field.getType() == Field::Types::Which::Int64)
+    {
+        return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<int64_t>()));
+    }
+    else if (field.getType() == Field::Types::Which::UInt64)
+    {
+        return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<uint64_t>()));
+    }
+    else if (field.getType() == Field::Types::IPv4)
+    {
+        /*
+         * In theory, we could accept IPv4 over 64 bits variables. It would only be a problem in case it was hashed using the byte array api
+         * with a zero-ed buffer that had a 32 bits variable copied into it.
+         *
+         * To be on the safe side, accept only in case physical type is 32 bits.
+         * */
+        if constexpr (std::is_same_v<int32_t, ParquetPhysicalType>)
+        {
+            return hasher.Hash(static_cast<ParquetPhysicalType>(field.safeGet<IPv4>()));
+        }
+    }
+
+    return std::nullopt;
+}
+
+std::optional<uint64_t> tryHash(const Field & field, const parquet::ColumnDescriptor * parquet_column_descriptor)
+{
+    const auto physical_type = parquet_column_descriptor->physical_type();
+    const auto & logical_type = parquet_column_descriptor->logical_type();
+    const auto converted_type = parquet_column_descriptor->converted_type();
+
+    switch (physical_type)
+    {
+        case parquet::Type::type::INT32:
+            return tryHashInt<int32_t>(field, logical_type, converted_type);
+        case parquet::Type::type::INT64:
+            return tryHashInt<int64_t>(field, logical_type, converted_type);
+        case parquet::Type::type::BYTE_ARRAY:
+            return tryHashString(field, logical_type, converted_type);
+        case parquet::Type::type::FIXED_LEN_BYTE_ARRAY:
+            return tryHashFLBA(field, logical_type, converted_type, parquet_column_descriptor->type_length());
+        default:
+            return std::nullopt;
+    }
+}
+
+std::optional<std::vector<uint64_t>> hash(const IColumn * data_column, const parquet::ColumnDescriptor * parquet_column_descriptor)
+{
+    std::vector<uint64_t> hashes;
+
+    for (size_t i = 0u; i < data_column->size(); i++)
+    {
+        Field f;
+        data_column->get(i, f);
+
+        auto hashed_value = tryHash(f, parquet_column_descriptor);
+
+        if (!hashed_value)
+        {
+            return std::nullopt;
+        }
+
+        hashes.emplace_back(*hashed_value);
+    }
+
+    return hashes;
+}
+
+bool maybeTrueOnBloomFilter(const std::vector<uint64_t> & hashes, const std::unique_ptr<parquet::BloomFilter> & bloom_filter)
+{
+    for (const auto hash : hashes)
+    {
+        if (bloom_filter->FindHash(hash))
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+const parquet::ColumnDescriptor * getColumnDescriptorIfBloomFilterIsPresent(
+    const std::unique_ptr<parquet::RowGroupMetaData> & parquet_rg_metadata,
+    const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
+    std::size_t clickhouse_column_index)
+{
+    if (clickhouse_column_index_to_parquet_index.size() <= clickhouse_column_index)
+    {
+        return nullptr;
+    }
+
+    const auto & parquet_indexes = clickhouse_column_index_to_parquet_index[clickhouse_column_index].parquet_indexes;
+
+    // complex types like structs, tuples and maps will have more than one index.
+    // we don't support those for now
+    if (parquet_indexes.size() > 1)
+    {
+        return nullptr;
+    }
+
+    if (parquet_indexes.empty())
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Something bad happened, raise an issue and try the query with `input_format_parquet_bloom_filter_push_down=false`");
+    }
+
+    auto parquet_column_index = parquet_indexes[0];
+
+    const auto * parquet_column_descriptor = parquet_rg_metadata->schema()->Column(parquet_column_index);
+
+    bool column_has_bloom_filter = parquet_rg_metadata->ColumnChunk(parquet_column_index)->bloom_filter_offset().has_value();
+    if (!column_has_bloom_filter)
+    {
+        return nullptr;
+    }
+
+    return parquet_column_descriptor;
+}
+
+}
+
+ParquetBloomFilterCondition::ParquetBloomFilterCondition(const std::vector<ConditionElement> & condition_, const Block & header_)
+    : condition(condition_), header(header_)
+{
+}
+
+bool ParquetBloomFilterCondition::mayBeTrueOnRowGroup(const ColumnIndexToBF & column_index_to_column_bf) const
+{
+    using Function = ConditionElement::Function;
+    std::vector<BoolMask> rpn_stack;
+
+    for (const auto & element : condition)
+    {
+        if (element.function == Function::FUNCTION_IN
+            || element.function == Function::FUNCTION_NOT_IN)
+        {
+            bool maybe_true = true;
+            for (auto column_index = 0u; column_index < element.hashes_per_column.size(); column_index++)
+            {
+                // in case bloom filter is not present for this row group
+                // https://github.com/ClickHouse/ClickHouse/pull/62966#discussion_r1722361237
+                if (!column_index_to_column_bf.contains(element.key_columns[column_index]))
+                {
+                    rpn_stack.emplace_back(true, true);
+                    continue;
+                }
+
+                bool column_maybe_contains = maybeTrueOnBloomFilter(
+                    element.hashes_per_column[column_index],
+                    column_index_to_column_bf.at(element.key_columns[column_index]));
+
+                if (!column_maybe_contains)
+                {
+                    maybe_true = false;
+                    break;
+                }
+            }
+
+            rpn_stack.emplace_back(maybe_true, true);
+            if (element.function == Function::FUNCTION_NOT_IN)
+                rpn_stack.back() = !rpn_stack.back();
+        }
+        else if (element.function == Function::FUNCTION_NOT)
+        {
+            rpn_stack.back() = !rpn_stack.back();
+        }
+        else if (element.function == Function::FUNCTION_OR)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 | arg2;
+        }
+        else if (element.function == Function::FUNCTION_AND)
+        {
+            auto arg1 = rpn_stack.back();
+            rpn_stack.pop_back();
+            auto arg2 = rpn_stack.back();
+            rpn_stack.back() = arg1 & arg2;
+        }
+        else if (element.function == Function::ALWAYS_TRUE)
+        {
+            rpn_stack.emplace_back(true, false);
+        }
+        else if (element.function == Function::ALWAYS_FALSE)
+        {
+            rpn_stack.emplace_back(false, true);
+        }
+        else
+        {
+            rpn_stack.emplace_back(true, true);
+        }
+    }
+
+    if (rpn_stack.size() != 1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::mayBeTrueOnRowGroup");
+
+    return rpn_stack[0].can_be_true;
+}
+
+std::unordered_set<std::size_t> ParquetBloomFilterCondition::getFilteringColumnKeys() const
+{
+    std::unordered_set<std::size_t> column_keys;
+
+    for (const auto & element : condition)
+    {
+        for (const auto index : element.key_columns)
+        {
+            column_keys.insert(index);
+        }
+    }
+
+    return column_keys;
+}
+
+/*
+ * `KeyCondition::rpn` is overly complex for bloom filters, some operations are not even supported. Not only that, but to avoid hashing each time
+ * we loop over a rpn element, we need to store hashes instead of where predicate values. To address this, we loop over `KeyCondition::rpn`
+ * and build a simplified RPN that holds hashes instead of values.
+ *
+ * `KeyCondition::RPNElement::FUNCTION_IN_RANGE` becomes:
+ *      `FUNCTION_IN`
+ *      `FUNCTION_UNKNOWN` when range limits are different
+ * `KeyCondition::RPNElement::FUNCTION_IN_SET` becomes
+ *      `FUNCTION_IN`
+ *
+ * Complex types and structs are not supported.
+ * There are two sources of data types being analyzed, and they need to be compatible: DB::Field type and parquet type.
+ * This is determined by the `isColumnSupported` method.
+ *
+ * Some interesting examples:
+ * 1. file(..., 'str_column UInt64') where str_column = 50; Field.type == UInt64. Parquet type string. Not supported.
+ * 2. file(...) where str_column = 50; Field.type == String (conversion already taken care by `KeyCondition`). Parquet type string.
+ * 3. file(...) where uint32_column = toIPv4(5). Field.type == IPv4. Incompatible column types, resolved by `KeyCondition` itself.
+ * 4. file(...) where toIPv4(uint32_column) = toIPv4(5). Field.type == IPv4. We know it is safe to hash it using an int32 API.
+ * */
+std::vector<ParquetBloomFilterCondition::ConditionElement> keyConditionRPNToParquetBloomFilterCondition(
+    const std::vector<KeyCondition::RPNElement> & rpn,
+    const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
+    const std::unique_ptr<parquet::RowGroupMetaData> & parquet_rg_metadata)
+{
+    std::vector<ParquetBloomFilterCondition::ConditionElement> condition_elements;
+
+    using RPNElement = KeyCondition::RPNElement;
+    using Function = ParquetBloomFilterCondition::ConditionElement::Function;
+
+    for (const auto & rpn_element : rpn)
+    {
+        // this would be a problem for `where negate(x) = -58`.
+        // It would perform a bf search on `-58`, and possibly miss row groups containing this data.
+        if (!rpn_element.monotonic_functions_chain.empty())
+        {
+            condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
+            continue;
+        }
+
+        ParquetBloomFilterCondition::ConditionElement::HashesForColumns hashes;
+
+        if (rpn_element.function == RPNElement::FUNCTION_IN_RANGE
+            || rpn_element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
+        {
+            // Only FUNCTION_EQUALS is supported and for that extremes need to be the same
+            if (rpn_element.range.left != rpn_element.range.right)
+            {
+                condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
+                continue;
+            }
+
+            const auto * parquet_column_descriptor =
+                getColumnDescriptorIfBloomFilterIsPresent(parquet_rg_metadata, clickhouse_column_index_to_parquet_index, rpn_element.key_column);
+
+            if (!parquet_column_descriptor)
+            {
+                condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
+                continue;
+            }
+
+            auto hashed_value = tryHash(rpn_element.range.left, parquet_column_descriptor);
+
+            if (!hashed_value)
+            {
+                condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
+                continue;
+            }
+
+            std::vector<uint64_t> hashes_for_column;
+            hashes_for_column.emplace_back(*hashed_value);
+
+            hashes.emplace_back(std::move(hashes_for_column));
+
+            auto function = rpn_element.function == RPNElement::FUNCTION_IN_RANGE
+                ? ParquetBloomFilterCondition::ConditionElement::Function::FUNCTION_IN
+                : ParquetBloomFilterCondition::ConditionElement::Function::FUNCTION_NOT_IN;
+
+            std::vector<std::size_t> key_columns;
+            key_columns.emplace_back(rpn_element.key_column);
+
+            condition_elements.emplace_back(function, std::move(hashes), std::move(key_columns));
+        }
+        else if (rpn_element.function == RPNElement::FUNCTION_IN_SET
+                 || rpn_element.function == RPNElement::FUNCTION_NOT_IN_SET)
+        {
+            const auto & set_index = rpn_element.set_index;
+            const auto & ordered_set = set_index->getOrderedSet();
+            const auto & indexes_mapping = set_index->getIndexesMapping();
+            bool found_empty_column = false;
+
+            std::vector<std::size_t> key_columns;
+
+            for (auto i = 0u; i < ordered_set.size(); i++)
+            {
+                const auto & set_column = ordered_set[i];
+
+                const auto * parquet_column_descriptor = getColumnDescriptorIfBloomFilterIsPresent(
+                    parquet_rg_metadata,
+                    clickhouse_column_index_to_parquet_index,
+                    indexes_mapping[i].key_index);
+
+                if (!parquet_column_descriptor)
+                {
+                    continue;
+                }
+
+                auto column = set_column;
+
+                if (column->empty())
+                {
+                    found_empty_column = true;
+                    break;
+                }
+
+                if (const auto & nullable_column = checkAndGetColumn<ColumnNullable>(set_column.get()))
+                {
+                    column = nullable_column->getNestedColumnPtr();
+                }
+
+                auto hashes_for_column_opt = hash(column.get(), parquet_column_descriptor);
+
+                if (!hashes_for_column_opt)
+                {
+                    continue;
+                }
+
+                auto & hashes_for_column = *hashes_for_column_opt;
+
+                if (hashes_for_column.empty())
+                {
+                    continue;
+                }
+
+                hashes.emplace_back(hashes_for_column);
+
+                key_columns.push_back(indexes_mapping[i].key_index);
+            }
+
+            if (found_empty_column)
+            {
+                condition_elements.emplace_back(Function::ALWAYS_FALSE);
+                continue;
+            }
+
+            if (hashes.empty())
+            {
+                condition_elements.emplace_back(Function::FUNCTION_UNKNOWN);
+                continue;
+            }
+
+            auto function = RPNElement::FUNCTION_IN_SET == rpn_element.function ? Function::FUNCTION_IN : Function::FUNCTION_NOT_IN;
+
+            condition_elements.emplace_back(function, hashes, key_columns);
+        }
+        else if (rpn_element.function == RPNElement::FUNCTION_NOT)
+        {
+            condition_elements.emplace_back(Function::FUNCTION_NOT);
+        }
+        else if (rpn_element.function == RPNElement::FUNCTION_OR)
+        {
+            condition_elements.emplace_back(Function::FUNCTION_OR);
+        }
+        else if (rpn_element.function == RPNElement::FUNCTION_AND)
+        {
+            condition_elements.emplace_back(Function::FUNCTION_AND);
+        }
+        else
+        {
+            condition_elements.emplace_back(Function::ALWAYS_TRUE);
+        }
+    }
+
+    return condition_elements;
+}
+
+}
+
+#endif
diff --git a/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h
new file mode 100644
index 00000000000..6de6030b23c
--- /dev/null
+++ b/src/Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <config.h>
+
+#if USE_PARQUET
+
+#include <Storages/MergeTree/KeyCondition.h>
+#include <parquet/metadata.h>
+#include <Processors/Formats/Impl/ArrowFieldIndexUtil.h>
+
+namespace parquet
+{
+class BloomFilter;
+}
+
+namespace DB
+{
+
+class ParquetBloomFilterCondition
+{
+public:
+
+    struct ConditionElement
+    {
+        enum Function
+        {
+            /// Atoms of a Boolean expression.
+            FUNCTION_IN,
+            FUNCTION_NOT_IN,
+            /// Can take any value.
+            FUNCTION_UNKNOWN,
+            /// Operators of the logical expression.
+            FUNCTION_NOT,
+            FUNCTION_AND,
+            FUNCTION_OR,
+            /// Constants
+            ALWAYS_FALSE,
+            ALWAYS_TRUE,
+        };
+
+        using ColumnPtr = IColumn::Ptr;
+        using HashesForColumns = std::vector<std::vector<uint64_t>>;
+        using KeyColumns = std::vector<std::size_t>;
+
+        Function function;
+        // each entry represents a list of hashes per column
+        // suppose there are three columns with 2 rows each
+        // hashes_per_column.size() == 3 and hashes_per_column[0].size() == 2
+        HashesForColumns hashes_per_column;
+        KeyColumns key_columns;
+    };
+
+    using RPNElement = KeyCondition::RPNElement;
+    using ColumnIndexToBF = std::unordered_map<std::size_t, std::unique_ptr<parquet::BloomFilter>>;
+
+    explicit ParquetBloomFilterCondition(const std::vector<ConditionElement> & condition_, const Block & header_);
+
+    bool mayBeTrueOnRowGroup(const ColumnIndexToBF & column_index_to_column_bf) const;
+    std::unordered_set<std::size_t> getFilteringColumnKeys() const;
+
+private:
+    std::vector<ParquetBloomFilterCondition::ConditionElement> condition;
+    Block header;
+};
+
+std::vector<ParquetBloomFilterCondition::ConditionElement> keyConditionRPNToParquetBloomFilterCondition(
+    const std::vector<KeyCondition::RPNElement> & rpn,
+    const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
+    const std::unique_ptr<parquet::RowGroupMetaData> & parquet_rg_metadata);
+
+}
+
+#endif
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 7384ce6cc1b..bb88ec49dea 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -14,6 +14,8 @@
 #include <arrow/status.h>
 #include <parquet/arrow/reader.h>
 #include <parquet/arrow/schema.h>
+#include <parquet/bloom_filter.h>
+#include <parquet/bloom_filter_reader.h>
 #include <parquet/file_reader.h>
 #include <parquet/statistics.h>
 #include "ArrowBufferedStreams.h"
@@ -25,6 +27,7 @@
 #include <DataTypes/DataTypeNullable.h>
 #include <Common/FieldVisitorsAccurateComparison.h>
 #include <Processors/Formats/Impl/Parquet/ParquetRecordReader.h>
+#include <Processors/Formats/Impl/Parquet/ParquetBloomFilterCondition.h>
 #include <Interpreters/convertFieldToType.h>
 
 namespace CurrentMetrics
@@ -263,6 +266,50 @@ static Field decodePlainParquetValueSlow(const std::string & data, parquet::Type
     return field;
 }
 
+static ParquetBloomFilterCondition::ColumnIndexToBF buildColumnIndexToBF(
+    parquet::BloomFilterReader & bf_reader,
+    int row_group,
+    const std::vector<ArrowFieldIndexUtil::ClickHouseIndexToParquetIndex> & clickhouse_column_index_to_parquet_index,
+    const std::unordered_set<std::size_t> & filtering_columns
+)
+{
+    auto rg_bf = bf_reader.RowGroup(row_group);
+
+    if (!rg_bf)
+    {
+        return {};
+    }
+
+    ParquetBloomFilterCondition::ColumnIndexToBF index_to_column_bf;
+
+    for (const auto & [clickhouse_index, parquet_indexes] : clickhouse_column_index_to_parquet_index)
+    {
+        if (!filtering_columns.contains(clickhouse_index))
+        {
+            continue;
+        }
+
+        // Complex / nested types contain more than one index. We don't support those.
+        if (parquet_indexes.size() > 1)
+        {
+            continue;
+        }
+
+        auto parquet_index = parquet_indexes[0];
+
+        auto bf = rg_bf->GetColumnBloomFilter(parquet_index);
+
+        if (!bf)
+        {
+            continue;
+        }
+
+        index_to_column_bf[clickhouse_index] = std::move(bf);
+    }
+
+    return index_to_column_bf;
+}
+
 /// Range of values for each column, based on statistics in the Parquet metadata.
 /// This is lower/upper bounds, not necessarily exact min and max, e.g. the min/max can be just
 /// missing in the metadata.
@@ -474,9 +521,27 @@ void ParquetBlockInputFormat::initializeIfNeeded()
     ArrowFieldIndexUtil field_util(
         format_settings.parquet.case_insensitive_column_matching,
         format_settings.parquet.allow_missing_columns);
-    column_indices = field_util.findRequiredIndices(getPort().getHeader(), *schema);
+
+    auto index_mapping = field_util.findRequiredIndices(getPort().getHeader(), *schema, *metadata);
+
+    for (const auto & [clickhouse_header_index, parquet_indexes] : index_mapping)
+    {
+        for (auto parquet_index : parquet_indexes)
+        {
+            column_indices.push_back(parquet_index);
+        }
+    }
 
     int num_row_groups = metadata->num_row_groups();
+
+    if (num_row_groups == 0)
+    {
+        return;
+    }
+
+    const auto bf_reader_properties = parquet::default_reader_properties();
+    std::unique_ptr<parquet::BloomFilterReader> bf_reader;
+
     row_group_batches.reserve(num_row_groups);
 
     auto adaptive_chunk_size = [&](int row_group_idx) -> size_t
@@ -497,11 +562,38 @@ void ParquetBlockInputFormat::initializeIfNeeded()
         return std::min(std::max(preferred_num_rows, MIN_ROW_NUM), static_cast<size_t>(format_settings.parquet.max_block_size));
     };
 
+    std::unique_ptr<ParquetBloomFilterCondition> parquet_bloom_filter_condition;
+
+    std::unordered_set<std::size_t> filtering_columns;
+
+    if (format_settings.parquet.bloom_filter_push_down && key_condition)
+    {
+        bf_reader = parquet::BloomFilterReader::Make(arrow_file, metadata, bf_reader_properties, nullptr);
+
+        const auto parquet_conditions = keyConditionRPNToParquetBloomFilterCondition(
+            key_condition->getRPN(),
+            index_mapping,
+            metadata->RowGroup(0));
+        parquet_bloom_filter_condition = std::make_unique<ParquetBloomFilterCondition>(parquet_conditions, getPort().getHeader());
+
+        filtering_columns = parquet_bloom_filter_condition->getFilteringColumnKeys();
+    }
+
     for (int row_group = 0; row_group < num_row_groups; ++row_group)
     {
         if (skip_row_groups.contains(row_group))
             continue;
 
+        if (parquet_bloom_filter_condition)
+        {
+            const auto column_index_to_bf = buildColumnIndexToBF(*bf_reader, row_group, index_mapping, filtering_columns);
+
+            if (!parquet_bloom_filter_condition->mayBeTrueOnRowGroup(column_index_to_bf))
+            {
+                continue;
+            }
+        }
+
         if (format_settings.parquet.filter_push_down && key_condition
             && !key_condition
                     ->checkInHyperrectangle(
diff --git a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
index c643ae060d6..2f3c9b6c6db 100644
--- a/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufListInputFormat.cpp
@@ -23,7 +23,7 @@ ProtobufListInputFormat::ProtobufListInputFormat(
           header_.getNames(),
           header_.getDataTypes(),
           missing_column_indices,
-          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+          ProtobufSchemas::instance().getMessageTypeForFormatSchema(
               schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
           /* with_length_delimiter = */ true,
           /* with_envelope = */ true,
@@ -93,9 +93,9 @@ ProtobufListSchemaReader::ProtobufListSchemaReader(const FormatSettings & format
 
 NamesAndTypesList ProtobufListSchemaReader::readSchema()
 {
-    const auto * message_descriptor
-        = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
-    return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
+    auto descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+        schema_info, ProtobufSchemas::WithEnvelope::Yes, google_protos_path);
+    return protobufSchemaToCHSchema(descriptor.message_descriptor, skip_unsupported_fields);
 }
 
 void registerInputFormatProtobufList(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
index 927301fb1b0..8a7d52b9c28 100644
--- a/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufListOutputFormat.cpp
@@ -20,7 +20,7 @@ ProtobufListOutputFormat::ProtobufListOutputFormat(
     , serializer(ProtobufSerializer::create(
           header_.getNames(),
           header_.getDataTypes(),
-          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+          ProtobufSchemas::instance().getMessageTypeForFormatSchema(
               schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::Yes, google_protos_path),
           /* with_length_delimiter = */ true,
           /* with_envelope = */ true,
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
index 7340faf785c..57d4f65e74e 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp
@@ -19,7 +19,7 @@ ProtobufRowInputFormat::ProtobufRowInputFormat(
     bool flatten_google_wrappers_,
     const String & google_protos_path)
     : IRowInputFormat(header_, in_, params_)
-    , message_descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+    , descriptor(ProtobufSchemas::instance().getMessageTypeForFormatSchema(
           schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, google_protos_path))
     , with_length_delimiter(with_length_delimiter_)
     , flatten_google_wrappers(flatten_google_wrappers_)
@@ -33,7 +33,7 @@ void ProtobufRowInputFormat::createReaderAndSerializer()
         getPort().getHeader().getNames(),
         getPort().getHeader().getDataTypes(),
         missing_column_indices,
-        *message_descriptor,
+        descriptor,
         with_length_delimiter,
         /* with_envelope = */ false,
         flatten_google_wrappers,
@@ -132,9 +132,9 @@ ProtobufSchemaReader::ProtobufSchemaReader(const FormatSettings & format_setting
 
 NamesAndTypesList ProtobufSchemaReader::readSchema()
 {
-    const auto * message_descriptor
-        = ProtobufSchemas::instance().getMessageTypeForFormatSchema(schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
-    return protobufSchemaToCHSchema(message_descriptor, skip_unsupported_fields);
+    auto descriptor = ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+        schema_info, ProtobufSchemas::WithEnvelope::No, google_protos_path);
+    return protobufSchemaToCHSchema(descriptor.message_descriptor, skip_unsupported_fields);
 }
 
 void registerProtobufSchemaReader(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
index 10ce37f9087..93dbaf35bdd 100644
--- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
+++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h
@@ -6,7 +6,7 @@
 #   include <Processors/Formats/IRowInputFormat.h>
 #   include <Processors/Formats/ISchemaReader.h>
 #   include <Formats/FormatSchemaInfo.h>
-#   include <google/protobuf/descriptor.h>
+#   include <Formats/ProtobufSchemas.h>
 
 namespace DB
 {
@@ -57,7 +57,7 @@ private:
     std::vector<size_t> missing_column_indices;
     std::unique_ptr<ProtobufSerializer> serializer;
 
-    const google::protobuf::Descriptor * message_descriptor;
+    const ProtobufSchemas::DescriptorHolder descriptor;
     bool with_length_delimiter;
     bool flatten_google_wrappers;
 };
diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
index 1deec264a56..4a83aa2f921 100644
--- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp
@@ -27,7 +27,7 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat(
     , serializer(ProtobufSerializer::create(
           header_.getNames(),
           header_.getDataTypes(),
-          *ProtobufSchemas::instance().getMessageTypeForFormatSchema(
+          ProtobufSchemas::instance().getMessageTypeForFormatSchema(
               schema_info_.getSchemaInfo(), ProtobufSchemas::WithEnvelope::No, settings_.protobuf.google_protos_path),
           with_length_delimiter_,
           /* with_envelope = */ false,
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/src/Processors/Formats/Impl/TSKVRowInputFormat.h
index 6ed553fdc74..628ba885fa2 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.h
@@ -38,6 +38,7 @@ private:
 
     bool supportsCountRows() const override { return true; }
     size_t countRows(size_t max_block_size) override;
+    bool supportsCustomSerializations() const override { return true; }
 
     const FormatSettings format_settings;
 
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index dbeb160922a..e6760f78ec9 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -48,6 +48,7 @@ protected:
     bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override;
     void setReadBuffer(ReadBuffer & in_) override;
     void readPrefix() override;
+    bool supportsCustomSerializations() const override { return true; }
 
     const FormatSettings format_settings;
     DataTypes data_types;
diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h
index fd75eb530aa..01016299b9c 100644
--- a/src/Processors/IProcessor.h
+++ b/src/Processors/IProcessor.h
@@ -365,11 +365,11 @@ public:
 
     /// Set limits for current storage.
     /// Different limits may be applied to different storages, we need to keep it per processor.
-    /// This method is need to be override only for sources.
+    /// This method needs to be overridden only for sources.
     virtual void setStorageLimits(const std::shared_ptr<const StorageLimitsList> & /*storage_limits*/) {}
 
     /// This method is called for every processor without input ports.
-    /// Processor can return a new progress for the last read operation.
+    /// Processor can return new progress for the last read operation.
     /// You should zero internal counters in the call, in order to make in idempotent.
     virtual std::optional<ReadProgress> getReadProgress() { return std::nullopt; }
 
diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp
index 359aa9d287c..defe7d0489a 100644
--- a/src/Processors/QueryPlan/AggregatingStep.cpp
+++ b/src/Processors/QueryPlan/AggregatingStep.cpp
@@ -95,7 +95,7 @@ Block AggregatingStep::appendGroupingColumn(Block block, const Names & keys, boo
 }
 
 AggregatingStep::AggregatingStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     Aggregator::Params params_,
     GroupingSetsParamsList grouping_sets_params_,
     bool final_,
@@ -111,8 +111,8 @@ AggregatingStep::AggregatingStep(
     bool memory_bound_merging_of_aggregation_results_enabled_,
     bool explicit_sorting_required_for_aggregation_in_order_)
     : ITransformingStep(
-        input_stream_,
-        appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_),
+        input_header_,
+        appendGroupingColumn(params_.getHeader(input_header_, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_),
         getTraits(should_produce_results_in_order_of_bucket_number_),
         false)
     , params(std::move(params_))
@@ -538,41 +538,38 @@ bool AggregatingStep::canUseProjection() const
     return grouping_sets_params.empty() && sort_description_for_merging.empty();
 }
 
-void AggregatingStep::requestOnlyMergeForAggregateProjection(const DataStream & input_stream)
+void AggregatingStep::requestOnlyMergeForAggregateProjection(const Header & input_header)
 {
     if (!canUseProjection())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot aggregate from projection");
 
-    auto output_header = getOutputStream().header;
-    input_streams.front() = input_stream;
+    auto output_header = getOutputHeader();
+    input_headers.front() = input_header;
     params.only_merge = true;
-    updateOutputStream();
-    assertBlocksHaveEqualStructure(output_header, getOutputStream().header, "AggregatingStep");
+    updateOutputHeader();
+    assertBlocksHaveEqualStructure(output_header, getOutputHeader(), "AggregatingStep");
 }
 
-std::unique_ptr<AggregatingProjectionStep> AggregatingStep::convertToAggregatingProjection(const DataStream & input_stream) const
+std::unique_ptr<AggregatingProjectionStep> AggregatingStep::convertToAggregatingProjection(const Header & input_header) const
 {
     if (!canUseProjection())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot aggregate from projection");
 
     auto aggregating_projection = std::make_unique<AggregatingProjectionStep>(
-        DataStreams{input_streams.front(), input_stream},
+        Headers{input_headers.front(), input_header},
         params,
         final,
         merge_threads,
         temporary_data_merge_threads
     );
 
-    assertBlocksHaveEqualStructure(getOutputStream().header, aggregating_projection->getOutputStream().header, "AggregatingStep");
+    assertBlocksHaveEqualStructure(getOutputHeader(), aggregating_projection->getOutputHeader(), "AggregatingStep");
     return aggregating_projection;
 }
 
-void AggregatingStep::updateOutputStream()
+void AggregatingStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(),
-        appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls),
-        getDataStreamTraits());
+    output_header = appendGroupingColumn(params.getHeader(input_headers.front(), final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls);
 }
 
 bool AggregatingStep::memoryBoundMergingWillBeUsed() const
@@ -582,7 +579,7 @@ bool AggregatingStep::memoryBoundMergingWillBeUsed() const
 }
 
 AggregatingProjectionStep::AggregatingProjectionStep(
-    DataStreams input_streams_,
+    Headers input_headers_,
     Aggregator::Params params_,
     bool final_,
     size_t merge_threads_,
@@ -592,22 +589,21 @@ AggregatingProjectionStep::AggregatingProjectionStep(
     , merge_threads(merge_threads_)
     , temporary_data_merge_threads(temporary_data_merge_threads_)
 {
-    input_streams = std::move(input_streams_);
+    input_headers = std::move(input_headers_);
 
-    if (input_streams.size() != 2)
+    if (input_headers.size() != 2)
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
             "AggregatingProjectionStep is expected to have two input streams, got {}",
-            input_streams.size());
+            input_headers.size());
 
-    auto normal_parts_header = params.getHeader(input_streams.front().header, final);
+    auto normal_parts_header = params.getHeader(input_headers.front(), final);
     params.only_merge = true;
-    auto projection_parts_header = params.getHeader(input_streams.back().header, final);
+    auto projection_parts_header = params.getHeader(input_headers.back(), final);
     params.only_merge = false;
 
     assertBlocksHaveEqualStructure(normal_parts_header, projection_parts_header, "AggregatingProjectionStep");
-    output_stream.emplace();
-    output_stream->header = std::move(normal_parts_header);
+    output_header = std::move(normal_parts_header);
 }
 
 QueryPipelineBuilderPtr AggregatingProjectionStep::updatePipeline(
@@ -658,7 +654,7 @@ QueryPipelineBuilderPtr AggregatingProjectionStep::updatePipeline(
     auto pipeline = std::make_unique<QueryPipelineBuilder>();
 
     for (auto & cur_pipeline : pipelines)
-        assertBlocksHaveEqualStructure(cur_pipeline->getHeader(), getOutputStream().header, "AggregatingProjectionStep");
+        assertBlocksHaveEqualStructure(cur_pipeline->getHeader(), getOutputHeader(), "AggregatingProjectionStep");
 
     *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), 0, &processors);
     pipeline->resize(1);
diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h
index 1173d712295..b1f28f17ef9 100644
--- a/src/Processors/QueryPlan/AggregatingStep.h
+++ b/src/Processors/QueryPlan/AggregatingStep.h
@@ -17,7 +17,7 @@ class AggregatingStep : public ITransformingStep
 {
 public:
     AggregatingStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         Aggregator::Params params_,
         GroupingSetsParamsList grouping_sets_params_,
         bool final_,
@@ -62,10 +62,10 @@ public:
     /// When we apply aggregate projection (which is full), this step will only merge data.
     /// Argument input_stream replaces current single input.
     /// Probably we should replace this step to MergingAggregated later? (now, aggregation-in-order will not work)
-    void requestOnlyMergeForAggregateProjection(const DataStream & input_stream);
+    void requestOnlyMergeForAggregateProjection(const Header & input_header);
     /// When we apply aggregate projection (which is partial), this step should be replaced to AggregatingProjection.
     /// Argument input_stream would be the second input (from projection).
-    std::unique_ptr<AggregatingProjectionStep> convertToAggregatingProjection(const DataStream & input_stream) const;
+    std::unique_ptr<AggregatingProjectionStep> convertToAggregatingProjection(const Header & input_header) const;
 
     static ActionsDAG makeCreatingMissingKeysForGroupingSetDAG(
         const Block & in_header,
@@ -75,7 +75,7 @@ public:
         bool group_by_use_nulls);
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     Aggregator::Params params;
     GroupingSetsParamsList grouping_sets_params;
@@ -112,7 +112,7 @@ class AggregatingProjectionStep : public IQueryPlanStep
 {
 public:
     AggregatingProjectionStep(
-        DataStreams input_streams_,
+        Blocks input_headers_,
         Aggregator::Params params_,
         bool final_,
         size_t merge_threads_,
diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp
index 94cb6ae2ee5..4ba53480b67 100644
--- a/src/Processors/QueryPlan/ArrayJoinStep.cpp
+++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp
@@ -24,10 +24,10 @@ static ITransformingStep::Traits getTraits()
     };
 }
 
-ArrayJoinStep::ArrayJoinStep(const DataStream & input_stream_, ArrayJoin array_join_, bool is_unaligned_, size_t max_block_size_)
+ArrayJoinStep::ArrayJoinStep(const Header & input_header_, ArrayJoin array_join_, bool is_unaligned_, size_t max_block_size_)
     : ITransformingStep(
-        input_stream_,
-        ArrayJoinTransform::transformHeader(input_stream_.header, array_join_.columns),
+        input_header_,
+        ArrayJoinTransform::transformHeader(input_header_, array_join_.columns),
         getTraits())
     , array_join(std::move(array_join_))
     , is_unaligned(is_unaligned_)
@@ -35,10 +35,9 @@ ArrayJoinStep::ArrayJoinStep(const DataStream & input_stream_, ArrayJoin array_j
 {
 }
 
-void ArrayJoinStep::updateOutputStream()
+void ArrayJoinStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), ArrayJoinTransform::transformHeader(input_streams.front().header, array_join.columns), getDataStreamTraits());
+    output_header = ArrayJoinTransform::transformHeader(input_headers.front(), array_join.columns);
 }
 
 void ArrayJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
diff --git a/src/Processors/QueryPlan/ArrayJoinStep.h b/src/Processors/QueryPlan/ArrayJoinStep.h
index 1a049d5805e..34eb34b5b25 100644
--- a/src/Processors/QueryPlan/ArrayJoinStep.h
+++ b/src/Processors/QueryPlan/ArrayJoinStep.h
@@ -11,7 +11,7 @@ using ArrayJoinActionPtr = std::shared_ptr<ArrayJoinAction>;
 class ArrayJoinStep : public ITransformingStep
 {
 public:
-    ArrayJoinStep(const DataStream & input_stream_, ArrayJoin array_join_, bool is_unaligned_, size_t max_block_size_);
+    ArrayJoinStep(const Header & input_header_, ArrayJoin array_join_, bool is_unaligned_, size_t max_block_size_);
     String getName() const override { return "ArrayJoin"; }
 
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
@@ -23,7 +23,7 @@ public:
     bool isLeft() const { return array_join.is_left; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     ArrayJoin array_join;
     bool is_unaligned = false;
diff --git a/src/Processors/QueryPlan/ConvertingActions.cpp b/src/Processors/QueryPlan/ConvertingActions.cpp
index b9703ef59cd..a3989cd2d26 100644
--- a/src/Processors/QueryPlan/ConvertingActions.cpp
+++ b/src/Processors/QueryPlan/ConvertingActions.cpp
@@ -7,7 +7,7 @@ namespace DB
 
 void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missing_objects)
 {
-    if (blocksHaveEqualStructure(plan.getCurrentDataStream().header, header))
+    if (blocksHaveEqualStructure(plan.getCurrentHeader(), header))
         return;
 
     auto mode = has_missing_objects ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name;
@@ -24,8 +24,8 @@ void addConvertingActions(QueryPlan & plan, const Block & header, bool has_missi
             true);
     };
 
-    auto convert_actions_dag = get_converting_dag(plan.getCurrentDataStream().header, header);
-    auto converting = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(convert_actions_dag));
+    auto convert_actions_dag = get_converting_dag(plan.getCurrentHeader(), header);
+    auto converting = std::make_unique<ExpressionStep>(plan.getCurrentHeader(), std::move(convert_actions_dag));
     plan.addStep(std::move(converting));
 }
 
diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
index ca46f92eeb4..1fc53010e95 100644
--- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
+++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
@@ -97,12 +97,12 @@ CreateSetAndFilterOnTheFlyStep::CrosswiseConnectionPtr CreateSetAndFilterOnTheFl
 }
 
 CreateSetAndFilterOnTheFlyStep::CreateSetAndFilterOnTheFlyStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     const Names & column_names_,
     size_t max_rows_in_set_,
     CrosswiseConnectionPtr crosswise_connection_,
     JoinTableSide position_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+    : ITransformingStep(input_header_, input_header_, getTraits())
     , column_names(column_names_)
     , max_rows_in_set(max_rows_in_set_)
     , own_set(std::make_shared<SetWithState>(SizeLimits(max_rows_in_set, 0, OverflowMode::BREAK), 0, true))
@@ -113,10 +113,10 @@ CreateSetAndFilterOnTheFlyStep::CreateSetAndFilterOnTheFlyStep(
     if (crosswise_connection == nullptr)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Crosswise connection is not initialized");
 
-    if (input_streams.size() != 1)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Step requires exactly one input stream, got {}", input_streams.size());
+    if (input_headers.size() != 1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Step requires exactly one input stream, got {}", input_headers.size());
 
-    own_set->setHeader(getColumnSubset(input_streams[0].header, column_names));
+    own_set->setHeader(getColumnSubset(input_headers[0], column_names));
 }
 
 void CreateSetAndFilterOnTheFlyStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
@@ -191,14 +191,14 @@ void CreateSetAndFilterOnTheFlyStep::describeActions(FormatSettings & settings)
     settings.out << '\n';
 }
 
-void CreateSetAndFilterOnTheFlyStep::updateOutputStream()
+void CreateSetAndFilterOnTheFlyStep::updateOutputHeader()
 {
-    if (input_streams.size() != 1)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "{} requires exactly one input stream, got {}", getName(), input_streams.size());
+    if (input_headers.size() != 1)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "{} requires exactly one input stream, got {}", getName(), input_headers.size());
 
-    own_set->setHeader(getColumnSubset(input_streams[0].header, column_names));
+    own_set->setHeader(getColumnSubset(input_headers[0], column_names));
 
-    output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+    output_header = input_headers.front();
 }
 
 bool CreateSetAndFilterOnTheFlyStep::isColumnPartOfSetKey(const String & column_name) const
diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h
index 27171511703..b55636dcb2d 100644
--- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h
+++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.h
@@ -23,7 +23,7 @@ public:
     static CrosswiseConnectionPtr createCrossConnection();
 
     CreateSetAndFilterOnTheFlyStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const Names & column_names_,
         size_t max_rows_in_set_,
         CrosswiseConnectionPtr crosswise_connection_,
@@ -43,7 +43,7 @@ public:
     void setFiltering(SetWithStatePtr filtering_set_) { filtering_set = filtering_set_; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     Names column_names;
 
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index f13a717004f..80e2dbf843e 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -35,12 +35,12 @@ static ITransformingStep::Traits getTraits()
 }
 
 CreatingSetStep::CreatingSetStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     SetAndKeyPtr set_and_key_,
     StoragePtr external_table_,
     SizeLimits network_transfer_limits_,
     ContextPtr context_)
-    : ITransformingStep(input_stream_, Block{}, getTraits())
+    : ITransformingStep(input_header_, Block{}, getTraits())
     , set_and_key(std::move(set_and_key_))
     , external_table(std::move(external_table_))
     , network_transfer_limits(std::move(network_transfer_limits_))
@@ -50,12 +50,12 @@ CreatingSetStep::CreatingSetStep(
 
 void CreatingSetStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    pipeline.addCreatingSetsTransform(getOutputStream().header, std::move(set_and_key), std::move(external_table), network_transfer_limits, context->getPreparedSetsCache());
+    pipeline.addCreatingSetsTransform(getOutputHeader(), std::move(set_and_key), std::move(external_table), network_transfer_limits, context->getPreparedSetsCache());
 }
 
-void CreatingSetStep::updateOutputStream()
+void CreatingSetStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(input_streams.front(), Block{}, getDataStreamTraits());
+    output_header = Block{};
 }
 
 void CreatingSetStep::describeActions(FormatSettings & settings) const
@@ -76,18 +76,18 @@ void CreatingSetStep::describeActions(JSONBuilder::JSONMap & map) const
 }
 
 
-CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_)
+CreatingSetsStep::CreatingSetsStep(Headers input_headers_)
 {
-    if (input_streams_.empty())
+    if (input_headers_.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "CreatingSetsStep cannot be created with no inputs");
 
-    input_streams = std::move(input_streams_);
-    output_stream = DataStream{input_streams.front().header};
+    input_headers = std::move(input_headers_);
+    output_header = input_headers.front();
 
-    for (size_t i = 1; i < input_streams.size(); ++i)
-        if (input_streams[i].header)
+    for (size_t i = 1; i < input_headers.size(); ++i)
+        if (input_headers[i])
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Creating set input must have empty header. Got: {}",
-                            input_streams[i].header.dumpStructure());
+                            input_headers[i].dumpStructure());
 }
 
 QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
@@ -126,8 +126,8 @@ void CreatingSetsStep::describePipeline(FormatSettings & settings) const
 
 void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subqueries, ContextPtr context)
 {
-    DataStreams input_streams;
-    input_streams.emplace_back(query_plan.getCurrentDataStream());
+    Headers input_headers;
+    input_headers.emplace_back(query_plan.getCurrentHeader());
 
     std::vector<std::unique_ptr<QueryPlan>> plans;
     plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
@@ -142,7 +142,7 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
         if (!plan)
             continue;
 
-        input_streams.emplace_back(plan->getCurrentDataStream());
+        input_headers.emplace_back(plan->getCurrentHeader());
         plans.emplace_back(std::move(plan));
     }
 
@@ -152,15 +152,15 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSets::Subqueries subque
         return;
     }
 
-    auto creating_sets = std::make_unique<CreatingSetsStep>(std::move(input_streams));
+    auto creating_sets = std::make_unique<CreatingSetsStep>(std::move(input_headers));
     creating_sets->setStepDescription("Create sets before main query execution");
     query_plan.unitePlans(std::move(creating_sets), std::move(plans));
 }
 
 QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipeline, PreparedSets::Subqueries subqueries, ContextPtr context)
 {
-    DataStreams input_streams;
-    input_streams.emplace_back(DataStream{pipeline->getHeader()});
+    Headers input_headers;
+    input_headers.emplace_back(pipeline->getHeader());
 
     QueryPipelineBuilders pipelines;
     pipelines.reserve(1 + subqueries.size());
@@ -178,11 +178,11 @@ QueryPipelineBuilderPtr addCreatingSetsTransform(QueryPipelineBuilderPtr pipelin
         if (!plan)
             continue;
 
-        input_streams.emplace_back(plan->getCurrentDataStream());
+        input_headers.emplace_back(plan->getCurrentHeader());
         pipelines.emplace_back(plan->buildQueryPipeline(plan_settings, pipeline_settings));
     }
 
-    return CreatingSetsStep(input_streams).updatePipeline(std::move(pipelines), pipeline_settings);
+    return CreatingSetsStep(input_headers).updatePipeline(std::move(pipelines), pipeline_settings);
 }
 
 std::vector<std::unique_ptr<QueryPlan>> DelayedCreatingSetsStep::makePlansForSets(DelayedCreatingSetsStep && step)
@@ -219,11 +219,11 @@ void addCreatingSetsStep(QueryPlan & query_plan, PreparedSetsPtr prepared_sets,
 }
 
 DelayedCreatingSetsStep::DelayedCreatingSetsStep(
-    DataStream input_stream, PreparedSets::Subqueries subqueries_, ContextPtr context_)
+    Header input_header, PreparedSets::Subqueries subqueries_, ContextPtr context_)
     : subqueries(std::move(subqueries_)), context(std::move(context_))
 {
-    input_streams = {input_stream};
-    output_stream = std::move(input_stream);
+    input_headers = {input_header};
+    output_header = std::move(input_header);
 }
 
 QueryPipelineBuilderPtr DelayedCreatingSetsStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings &)
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h
index 292ec19914c..54548a53131 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.h
+++ b/src/Processors/QueryPlan/CreatingSetsStep.h
@@ -13,7 +13,7 @@ class CreatingSetStep : public ITransformingStep
 {
 public:
     CreatingSetStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         SetAndKeyPtr set_and_key_,
         StoragePtr external_table_,
         SizeLimits network_transfer_limits_,
@@ -27,7 +27,7 @@ public:
     void describeActions(FormatSettings & settings) const override;
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     SetAndKeyPtr set_and_key;
     StoragePtr external_table;
@@ -38,7 +38,7 @@ private:
 class CreatingSetsStep : public IQueryPlanStep
 {
 public:
-    explicit CreatingSetsStep(DataStreams input_streams_);
+    explicit CreatingSetsStep(Headers input_headers_);
 
     String getName() const override { return "CreatingSets"; }
 
@@ -52,7 +52,7 @@ public:
 class DelayedCreatingSetsStep final : public IQueryPlanStep
 {
 public:
-    DelayedCreatingSetsStep(DataStream input_stream, PreparedSets::Subqueries subqueries_, ContextPtr context_);
+    DelayedCreatingSetsStep(Header input_header, PreparedSets::Subqueries subqueries_, ContextPtr context_);
 
     String getName() const override { return "DelayedCreatingSets"; }
 
diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp
index 3a98f8e4612..b50bc6b1a69 100644
--- a/src/Processors/QueryPlan/CubeStep.cpp
+++ b/src/Processors/QueryPlan/CubeStep.cpp
@@ -25,8 +25,8 @@ static ITransformingStep::Traits getTraits()
     };
 }
 
-CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_)
-    : ITransformingStep(input_stream_, generateOutputHeader(params_.getHeader(input_stream_.header, final_), params_.keys, use_nulls_), getTraits())
+CubeStep::CubeStep(const Header & input_header_, Aggregator::Params params_, bool final_, bool use_nulls_)
+    : ITransformingStep(input_header_, generateOutputHeader(params_.getHeader(input_header_, final_), params_.keys, use_nulls_), getTraits())
     , keys_size(params_.keys_size)
     , params(std::move(params_))
     , final(final_)
@@ -82,9 +82,8 @@ const Aggregator::Params & CubeStep::getParams() const
     return params;
 }
 
-void CubeStep::updateOutputStream()
+void CubeStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), generateOutputHeader(params.getHeader(input_streams.front().header, final), params.keys, use_nulls), getDataStreamTraits());
+    output_header = generateOutputHeader(params.getHeader(input_headers.front(), final), params.keys, use_nulls);
 }
 }
diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h
index 8a03a33a088..b27b2c59182 100644
--- a/src/Processors/QueryPlan/CubeStep.h
+++ b/src/Processors/QueryPlan/CubeStep.h
@@ -13,7 +13,7 @@ using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams
 class CubeStep : public ITransformingStep
 {
 public:
-    CubeStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_);
+    CubeStep(const Header & input_header_, Aggregator::Params params_, bool final_, bool use_nulls_);
 
     String getName() const override { return "Cube"; }
 
@@ -21,7 +21,7 @@ public:
 
     const Aggregator::Params & getParams() const;
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     size_t keys_size;
     Aggregator::Params params;
diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp
index f124e5a2b9f..42f7f8d6e66 100644
--- a/src/Processors/QueryPlan/DistinctStep.cpp
+++ b/src/Processors/QueryPlan/DistinctStep.cpp
@@ -32,14 +32,14 @@ static ITransformingStep::Traits getTraits(bool pre_distinct)
 }
 
 DistinctStep::DistinctStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     const SizeLimits & set_size_limits_,
     UInt64 limit_hint_,
     const Names & columns_,
     bool pre_distinct_)
     : ITransformingStep(
-            input_stream_,
-            input_stream_.header,
+            input_header_,
+            input_header_,
             getTraits(pre_distinct_))
     , set_size_limits(set_size_limits_)
     , limit_hint(limit_hint_)
@@ -153,12 +153,9 @@ void DistinctStep::describeActions(JSONBuilder::JSONMap & map) const
     map.add("Columns", std::move(columns_array));
 }
 
-void DistinctStep::updateOutputStream()
+void DistinctStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(),
-        input_streams.front().header,
-        getTraits(pre_distinct).data_stream_traits);
+    output_header = input_headers.front();
 }
 
 }
diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h
index 34e60dc552f..d6caf92d072 100644
--- a/src/Processors/QueryPlan/DistinctStep.h
+++ b/src/Processors/QueryPlan/DistinctStep.h
@@ -10,7 +10,7 @@ class DistinctStep : public ITransformingStep
 {
 public:
     DistinctStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const SizeLimits & set_size_limits_,
         UInt64 limit_hint_,
         const Names & columns_,
@@ -35,7 +35,7 @@ public:
     const SortDescription & getSortDescription() const override { return distinct_sort_desc; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     SizeLimits set_size_limits;
     UInt64 limit_hint;
diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp
index fcfea4d990b..a8a3a7f5b65 100644
--- a/src/Processors/QueryPlan/ExpressionStep.cpp
+++ b/src/Processors/QueryPlan/ExpressionStep.cpp
@@ -25,10 +25,10 @@ static ITransformingStep::Traits getTraits(const ActionsDAG & actions)
     };
 }
 
-ExpressionStep::ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_)
+ExpressionStep::ExpressionStep(const Header & input_header_, ActionsDAG actions_dag_)
     : ITransformingStep(
-        input_stream_,
-        ExpressionTransform::transformHeader(input_stream_.header, actions_dag_),
+        input_header_,
+        ExpressionTransform::transformHeader(input_header_, actions_dag_),
         getTraits(actions_dag_))
     , actions_dag(std::move(actions_dag_))
 {
@@ -43,11 +43,11 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu
         return std::make_shared<ExpressionTransform>(header, expression);
     });
 
-    if (!blocksHaveEqualStructure(pipeline.getHeader(), output_stream->header))
+    if (!blocksHaveEqualStructure(pipeline.getHeader(), *output_header))
     {
         auto convert_actions_dag = ActionsDAG::makeConvertingActions(
                 pipeline.getHeader().getColumnsWithTypeAndName(),
-                output_stream->header.getColumnsWithTypeAndName(),
+                output_header->getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Name);
         auto convert_actions = std::make_shared<ExpressionActions>(std::move(convert_actions_dag), settings.getActionsSettings());
 
@@ -71,13 +71,9 @@ void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const
     map.add("Expression", expression->toTree());
 }
 
-void ExpressionStep::updateOutputStream()
+void ExpressionStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), ExpressionTransform::transformHeader(input_streams.front().header, actions_dag), getDataStreamTraits());
-
-    if (!getDataStreamTraits().preserves_sorting)
-        return;
+    output_header = ExpressionTransform::transformHeader(input_headers.front(), actions_dag);
 }
 
 }
diff --git a/src/Processors/QueryPlan/ExpressionStep.h b/src/Processors/QueryPlan/ExpressionStep.h
index f2926318cbc..234552e5445 100644
--- a/src/Processors/QueryPlan/ExpressionStep.h
+++ b/src/Processors/QueryPlan/ExpressionStep.h
@@ -13,7 +13,7 @@ class ExpressionStep : public ITransformingStep
 {
 public:
 
-    explicit ExpressionStep(const DataStream & input_stream_, ActionsDAG actions_dag_);
+    explicit ExpressionStep(const Header & input_header_, ActionsDAG actions_dag_);
     String getName() const override { return "Expression"; }
 
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) override;
@@ -26,7 +26,7 @@ public:
     void describeActions(JSONBuilder::JSONMap & map) const override;
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     ActionsDAG actions_dag;
 };
diff --git a/src/Processors/QueryPlan/ExtremesStep.cpp b/src/Processors/QueryPlan/ExtremesStep.cpp
index 010a82072cf..1eb593df2ab 100644
--- a/src/Processors/QueryPlan/ExtremesStep.cpp
+++ b/src/Processors/QueryPlan/ExtremesStep.cpp
@@ -19,8 +19,8 @@ static ITransformingStep::Traits getTraits()
     };
 }
 
-ExtremesStep::ExtremesStep(const DataStream & input_stream_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+ExtremesStep::ExtremesStep(const Header & input_header)
+    : ITransformingStep(input_header, input_header, getTraits())
 {
 }
 
diff --git a/src/Processors/QueryPlan/ExtremesStep.h b/src/Processors/QueryPlan/ExtremesStep.h
index 57ccef077aa..363f0a2f6d4 100644
--- a/src/Processors/QueryPlan/ExtremesStep.h
+++ b/src/Processors/QueryPlan/ExtremesStep.h
@@ -7,16 +7,16 @@ namespace DB
 class ExtremesStep : public ITransformingStep
 {
 public:
-    explicit ExtremesStep(const DataStream & input_stream_);
+    explicit ExtremesStep(const Header & input_header_);
 
     String getName() const override { return "Extremes"; }
 
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
 
 private:
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 };
 
diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp
index 8687886447a..9655e80eb5f 100644
--- a/src/Processors/QueryPlan/FillingStep.cpp
+++ b/src/Processors/QueryPlan/FillingStep.cpp
@@ -29,12 +29,12 @@ static ITransformingStep::Traits getTraits()
 }
 
 FillingStep::FillingStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     SortDescription sort_description_,
     SortDescription fill_description_,
     InterpolateDescriptionPtr interpolate_description_,
     bool use_with_fill_by_sorting_prefix_)
-    : ITransformingStep(input_stream_, FillingTransform::transformHeader(input_stream_.header, sort_description_), getTraits())
+    : ITransformingStep(input_header_, FillingTransform::transformHeader(input_header_, sort_description_), getTraits())
     , sort_description(std::move(sort_description_))
     , fill_description(std::move(fill_description_))
     , interpolate_description(interpolate_description_)
@@ -80,9 +80,8 @@ void FillingStep::describeActions(JSONBuilder::JSONMap & map) const
     }
 }
 
-void FillingStep::updateOutputStream()
+void FillingStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), FillingTransform::transformHeader(input_streams.front().header, sort_description), getDataStreamTraits());
+    output_header = FillingTransform::transformHeader(input_headers.front(), sort_description);
 }
 }
diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h
index 65307ee4121..147ced21d34 100644
--- a/src/Processors/QueryPlan/FillingStep.h
+++ b/src/Processors/QueryPlan/FillingStep.h
@@ -11,7 +11,7 @@ class FillingStep : public ITransformingStep
 {
 public:
     FillingStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         SortDescription sort_description_,
         SortDescription fill_description_,
         InterpolateDescriptionPtr interpolate_description_,
@@ -27,7 +27,7 @@ public:
     const SortDescription & getSortDescription() const override { return sort_description; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     SortDescription sort_description;
     SortDescription fill_description;
diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp
index 04c218cb096..862e03d74f2 100644
--- a/src/Processors/QueryPlan/FilterStep.cpp
+++ b/src/Processors/QueryPlan/FilterStep.cpp
@@ -25,14 +25,14 @@ static ITransformingStep::Traits getTraits()
 }
 
 FilterStep::FilterStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     ActionsDAG actions_dag_,
     String filter_column_name_,
     bool remove_filter_column_)
     : ITransformingStep(
-        input_stream_,
+        input_header_,
         FilterTransform::transformHeader(
-            input_stream_.header,
+            input_header_,
             &actions_dag_,
             filter_column_name_,
             remove_filter_column_),
@@ -58,11 +58,11 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
         return std::make_shared<FilterTransform>(header, expression, filter_column_name, remove_filter_column, on_totals);
     });
 
-    if (!blocksHaveEqualStructure(pipeline.getHeader(), output_stream->header))
+    if (!blocksHaveEqualStructure(pipeline.getHeader(), *output_header))
     {
         auto convert_actions_dag = ActionsDAG::makeConvertingActions(
                 pipeline.getHeader().getColumnsWithTypeAndName(),
-                output_stream->header.getColumnsWithTypeAndName(),
+                output_header->getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Name);
         auto convert_actions = std::make_shared<ExpressionActions>(std::move(convert_actions_dag), settings.getActionsSettings());
 
@@ -95,12 +95,9 @@ void FilterStep::describeActions(JSONBuilder::JSONMap & map) const
     map.add("Expression", expression->toTree());
 }
 
-void FilterStep::updateOutputStream()
+void FilterStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(),
-        FilterTransform::transformHeader(input_streams.front().header, &actions_dag, filter_column_name, remove_filter_column),
-        getDataStreamTraits());
+    output_header = FilterTransform::transformHeader(input_headers.front(), &actions_dag, filter_column_name, remove_filter_column);
 
     if (!getDataStreamTraits().preserves_sorting)
         return;
diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h
index b5a31bef5fc..cb90459f0ab 100644
--- a/src/Processors/QueryPlan/FilterStep.h
+++ b/src/Processors/QueryPlan/FilterStep.h
@@ -10,7 +10,7 @@ class FilterStep : public ITransformingStep
 {
 public:
     FilterStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         ActionsDAG actions_dag_,
         String filter_column_name_,
         bool remove_filter_column_);
@@ -27,7 +27,7 @@ public:
     bool removesFilterColumn() const { return remove_filter_column; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     ActionsDAG actions_dag;
     String filter_column_name;
diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp
index 72a81b37ee2..bb1451287d9 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.cpp
+++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp
@@ -10,12 +10,12 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-const DataStream & IQueryPlanStep::getOutputStream() const
+const Header & IQueryPlanStep::getOutputHeader() const
 {
-    if (!hasOutputStream())
+    if (!hasOutputHeader())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlanStep {} does not have output stream.", getName());
 
-    return *output_stream;
+    return *output_header;
 }
 
 const SortDescription & IQueryPlanStep::getSortDescription() const
diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h
index 0ff16df976a..c3eeb8ebf48 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.h
+++ b/src/Processors/QueryPlan/IQueryPlanStep.h
@@ -21,24 +21,12 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
 }
 
-/// Description of data stream.
-/// Single logical data stream may relate to many ports of pipeline.
-class DataStream
-{
-public:
-    Block header;
-
-    bool hasEqualHeaderWith(const DataStream & other) const
-    {
-        return blocksHaveEqualStructure(header, other.header);
-    }
-};
-
-using DataStreams = std::vector<DataStream>;
-
 class QueryPlan;
 using QueryPlanRawPtrs = std::list<QueryPlan *>;
 
+using Header = Block;
+using Headers = std::vector<Header>;
+
 /// Single step of query plan.
 class IQueryPlanStep
 {
@@ -49,16 +37,16 @@ public:
 
     /// Add processors from current step to QueryPipeline.
     /// Calling this method, we assume and don't check that:
-    ///   * pipelines.size() == getInputStreams.size()
-    ///   * header from each pipeline is the same as header from corresponding input_streams
-    /// Result pipeline must contain any number of streams with compatible output header is hasOutputStream(),
+    ///   * pipelines.size() == getInputHeaders.size()
+    ///   * header from each pipeline is the same as header from corresponding input
+    /// Result pipeline must contain any number of ports with compatible output header if hasOutputHeader(),
     ///   or pipeline should be completed otherwise.
     virtual QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) = 0;
 
-    const DataStreams & getInputStreams() const { return input_streams; }
+    const Headers & getInputHeaders() const { return input_headers; }
 
-    bool hasOutputStream() const { return output_stream.has_value(); }
-    const DataStream & getOutputStream() const;
+    bool hasOutputHeader() const { return output_header.has_value(); }
+    const Header & getOutputHeader() const;
 
     /// Methods to describe what this step is needed for.
     const std::string & getStepDescription() const { return step_description; }
@@ -95,29 +83,29 @@ public:
     /// Updates the input streams of the given step. Used during query plan optimizations.
     /// It won't do any validation of new streams, so it is your responsibility to ensure that this update doesn't break anything
     /// (e.g. you update data stream traits or correctly remove / add columns).
-    void updateInputStreams(DataStreams input_streams_)
+    void updateInputHeaders(Headers input_headers_)
     {
-        chassert(canUpdateInputStream());
-        input_streams = std::move(input_streams_);
-        updateOutputStream();
+        chassert(canUpdateInputHeader());
+        input_headers = std::move(input_headers_);
+        updateOutputHeader();
     }
 
-    void updateInputStream(DataStream input_stream) { updateInputStreams(DataStreams{input_stream}); }
+    void updateInputHeader(Header input_header) { updateInputHeaders(Headers{input_header}); }
 
-    void updateInputStream(DataStream input_stream, size_t idx)
+    void updateInputHeader(Header input_header, size_t idx)
     {
-        chassert(canUpdateInputStream() && idx < input_streams.size());
-        input_streams[idx] = input_stream;
-        updateOutputStream();
+        chassert(canUpdateInputHeader() && idx < input_headers.size());
+        input_headers[idx] = input_header;
+        updateOutputHeader();
     }
 
-    virtual bool canUpdateInputStream() const { return false; }
+    virtual bool canUpdateInputHeader() const { return false; }
 
 protected:
-    virtual void updateOutputStream() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); }
+    virtual void updateOutputHeader() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); }
 
-    DataStreams input_streams;
-    std::optional<DataStream> output_stream;
+    Headers input_headers;
+    std::optional<Header> output_header;
 
     /// Text description about what current step does.
     std::string step_description;
diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp
index 37f56bc7a43..40cc2277cc8 100644
--- a/src/Processors/QueryPlan/ISourceStep.cpp
+++ b/src/Processors/QueryPlan/ISourceStep.cpp
@@ -4,9 +4,9 @@
 namespace DB
 {
 
-ISourceStep::ISourceStep(DataStream output_stream_)
+ISourceStep::ISourceStep(Header output_header_)
 {
-    output_stream = std::move(output_stream_);
+    output_header = std::move(output_header_);
 }
 
 QueryPipelineBuilderPtr ISourceStep::updatePipeline(QueryPipelineBuilders, const BuildQueryPipelineSettings & settings)
diff --git a/src/Processors/QueryPlan/ISourceStep.h b/src/Processors/QueryPlan/ISourceStep.h
index 744b6f9b5c4..142d97fecab 100644
--- a/src/Processors/QueryPlan/ISourceStep.h
+++ b/src/Processors/QueryPlan/ISourceStep.h
@@ -8,7 +8,7 @@ namespace DB
 class ISourceStep : public IQueryPlanStep
 {
 public:
-    explicit ISourceStep(DataStream output_stream_);
+    explicit ISourceStep(Header output_header_);
 
     QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override;
 
diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp
index 75e8500e417..400cfd07b84 100644
--- a/src/Processors/QueryPlan/ITransformingStep.cpp
+++ b/src/Processors/QueryPlan/ITransformingStep.cpp
@@ -4,27 +4,15 @@
 namespace DB
 {
 
-ITransformingStep::ITransformingStep(DataStream input_stream, Block output_header, Traits traits, bool collect_processors_)
+ITransformingStep::ITransformingStep(Block input_header, Block output_header_, Traits traits, bool collect_processors_)
     : transform_traits(std::move(traits.transform_traits))
     , collect_processors(collect_processors_)
     , data_stream_traits(std::move(traits.data_stream_traits))
 {
-    input_streams.emplace_back(std::move(input_stream));
-    output_stream = createOutputStream(input_streams.front(), std::move(output_header), data_stream_traits);
+    input_headers.emplace_back(std::move(input_header));
+    output_header = std::move(output_header_);
 }
 
-/// TODO: cleanup in the following PR.
-DataStream ITransformingStep::createOutputStream(
-    [[maybe_unused]] const DataStream & input_stream,
-    Block output_header,
-    [[maybe_unused]] const DataStreamTraits & stream_traits)
-{
-    DataStream output_stream{.header = std::move(output_header)};
-
-    return output_stream;
-}
-
-
 QueryPipelineBuilderPtr ITransformingStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings)
 {
     if (collect_processors)
diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h
index a9fdbbe5695..f27fc189dcd 100644
--- a/src/Processors/QueryPlan/ITransformingStep.h
+++ b/src/Processors/QueryPlan/ITransformingStep.h
@@ -46,7 +46,7 @@ public:
         TransformTraits transform_traits;
     };
 
-    ITransformingStep(DataStream input_stream, Block output_header, Traits traits, bool collect_processors_ = true);
+    ITransformingStep(Header input_header, Header output_header, Traits traits, bool collect_processors_ = true);
 
     QueryPipelineBuilderPtr updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings & settings) override;
 
@@ -66,15 +66,9 @@ public:
         throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented");
     }
 
-    bool canUpdateInputStream() const override { return true; }
+    bool canUpdateInputHeader() const override { return true; }
 
 protected:
-    /// Create output stream from header and traits.
-    static DataStream createOutputStream(
-            const DataStream & input_stream,
-            Block output_header,
-            const DataStreamTraits & stream_traits);
-
     TransformTraits transform_traits;
 
 private:
diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp
index b132d27670d..48bf5dfa192 100644
--- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp
+++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp
@@ -17,26 +17,25 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-static Block checkHeaders(const DataStreams & input_streams_)
+static Block checkHeaders(const Headers & input_headers)
 {
-    if (input_streams_.empty())
+    if (input_headers.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform intersect/except on empty set of query plan steps");
 
-    Block res = input_streams_.front().header;
-    for (const auto & stream : input_streams_)
-        assertBlocksHaveEqualStructure(stream.header, res, "IntersectOrExceptStep");
+    Block res = input_headers.front();
+    for (const auto & header : input_headers)
+        assertBlocksHaveEqualStructure(header, res, "IntersectOrExceptStep");
 
     return res;
 }
 
 IntersectOrExceptStep::IntersectOrExceptStep(
-    DataStreams input_streams_, Operator operator_, size_t max_threads_)
-    : header(checkHeaders(input_streams_))
-    , current_operator(operator_)
+    Headers input_headers_, Operator operator_, size_t max_threads_)
+    : current_operator(operator_)
     , max_threads(max_threads_)
 {
-    input_streams = std::move(input_streams_);
-    output_stream = DataStream{.header = header};
+    input_headers = std::move(input_headers_);
+    output_header = checkHeaders(input_headers);
 }
 
 QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
@@ -46,7 +45,7 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild
     if (pipelines.empty())
     {
         QueryPipelineProcessorsCollector collector(*pipeline, this);
-        pipeline->init(Pipe(std::make_shared<NullSource>(output_stream->header)));
+        pipeline->init(Pipe(std::make_shared<NullSource>(*output_header)));
         processors = collector.detachProcessors();
         return pipeline;
     }
@@ -54,12 +53,12 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild
     for (auto & cur_pipeline : pipelines)
     {
         /// Just in case.
-        if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header))
+        if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputHeader()))
         {
             QueryPipelineProcessorsCollector collector(*cur_pipeline, this);
             auto converting_dag = ActionsDAG::makeConvertingActions(
                 cur_pipeline->getHeader().getColumnsWithTypeAndName(),
-                getOutputStream().header.getColumnsWithTypeAndName(),
+                getOutputHeader().getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Name);
 
             auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
@@ -73,11 +72,11 @@ QueryPipelineBuilderPtr IntersectOrExceptStep::updatePipeline(QueryPipelineBuild
         }
 
         /// For the case of union.
-        cur_pipeline->addTransform(std::make_shared<ResizeProcessor>(header, cur_pipeline->getNumStreams(), 1));
+        cur_pipeline->addTransform(std::make_shared<ResizeProcessor>(getOutputHeader(), cur_pipeline->getNumStreams(), 1));
     }
 
     *pipeline = QueryPipelineBuilder::unitePipelines(std::move(pipelines), max_threads, &processors);
-    auto transform = std::make_shared<IntersectOrExceptTransform>(header, current_operator);
+    auto transform = std::make_shared<IntersectOrExceptTransform>(getOutputHeader(), current_operator);
     processors.push_back(transform);
     pipeline->addTransform(std::move(transform));
 
diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.h b/src/Processors/QueryPlan/IntersectOrExceptStep.h
index d15d2802456..a1e85e847da 100644
--- a/src/Processors/QueryPlan/IntersectOrExceptStep.h
+++ b/src/Processors/QueryPlan/IntersectOrExceptStep.h
@@ -12,7 +12,7 @@ public:
     using Operator = ASTSelectIntersectExceptQuery::Operator;
 
     /// max_threads is used to limit the number of threads for result pipeline.
-    IntersectOrExceptStep(DataStreams input_streams_, Operator operator_, size_t max_threads_ = 0);
+    IntersectOrExceptStep(Headers input_headers_, Operator operator_, size_t max_threads_ = 0);
 
     String getName() const override { return "IntersectOrExcept"; }
 
@@ -21,7 +21,6 @@ public:
     void describePipeline(FormatSettings & settings) const override;
 
 private:
-    Block header;
     Operator current_operator;
     size_t max_threads;
 };
diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp
index 8fe2515e323..018b52a5c68 100644
--- a/src/Processors/QueryPlan/JoinStep.cpp
+++ b/src/Processors/QueryPlan/JoinStep.cpp
@@ -39,15 +39,15 @@ std::vector<std::pair<String, String>> describeJoinActions(const JoinPtr & join)
 }
 
 JoinStep::JoinStep(
-    const DataStream & left_stream_,
-    const DataStream & right_stream_,
+    const Header & left_header_,
+    const Header & right_header_,
     JoinPtr join_,
     size_t max_block_size_,
     size_t max_streams_,
     bool keep_left_read_in_order_)
     : join(std::move(join_)), max_block_size(max_block_size_), max_streams(max_streams_), keep_left_read_in_order(keep_left_read_in_order_)
 {
-    updateInputStreams(DataStreams{left_stream_, right_stream_});
+    updateInputHeaders({left_header_, right_header_});
 }
 
 QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
@@ -58,7 +58,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
     if (join->pipelineType() == JoinPipelineType::YShaped)
     {
         auto joined_pipeline = QueryPipelineBuilder::joinPipelinesYShaped(
-            std::move(pipelines[0]), std::move(pipelines[1]), join, output_stream->header, max_block_size, &processors);
+            std::move(pipelines[0]), std::move(pipelines[1]), join, *output_header, max_block_size, &processors);
         joined_pipeline->resize(max_streams);
         return joined_pipeline;
     }
@@ -67,7 +67,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines
         std::move(pipelines[0]),
         std::move(pipelines[1]),
         join,
-        output_stream->header,
+        *output_header,
         max_block_size,
         max_streams,
         keep_left_read_in_order,
@@ -98,12 +98,9 @@ void JoinStep::describeActions(JSONBuilder::JSONMap & map) const
         map.add(name, value);
 }
 
-void JoinStep::updateOutputStream()
+void JoinStep::updateOutputHeader()
 {
-    output_stream = DataStream
-    {
-        .header = JoiningTransform::transformHeader(input_streams[0].header, join),
-    };
+    output_header = JoiningTransform::transformHeader(input_headers.front(), join);
 }
 
 static ITransformingStep::Traits getStorageJoinTraits()
@@ -121,10 +118,10 @@ static ITransformingStep::Traits getStorageJoinTraits()
     };
 }
 
-FilledJoinStep::FilledJoinStep(const DataStream & input_stream_, JoinPtr join_, size_t max_block_size_)
+FilledJoinStep::FilledJoinStep(const Header & input_header_, JoinPtr join_, size_t max_block_size_)
     : ITransformingStep(
-        input_stream_,
-        JoiningTransform::transformHeader(input_stream_.header, join_),
+        input_header_,
+        JoiningTransform::transformHeader(input_header_, join_),
         getStorageJoinTraits())
     , join(std::move(join_))
     , max_block_size(max_block_size_)
@@ -148,14 +145,13 @@ void FilledJoinStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu
     {
         bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals;
         auto counter = on_totals ? nullptr : finish_counter;
-        return std::make_shared<JoiningTransform>(header, output_stream->header, join, max_block_size, on_totals, default_totals, counter);
+        return std::make_shared<JoiningTransform>(header, *output_header, join, max_block_size, on_totals, default_totals, counter);
     });
 }
 
-void FilledJoinStep::updateOutputStream()
+void FilledJoinStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), JoiningTransform::transformHeader(input_streams.front().header, join), getDataStreamTraits());
+    output_header = JoiningTransform::transformHeader(input_headers.front(), join);
 }
 
 void FilledJoinStep::describeActions(FormatSettings & settings) const
diff --git a/src/Processors/QueryPlan/JoinStep.h b/src/Processors/QueryPlan/JoinStep.h
index 51ea337b7c6..6ede6771b08 100644
--- a/src/Processors/QueryPlan/JoinStep.h
+++ b/src/Processors/QueryPlan/JoinStep.h
@@ -14,8 +14,8 @@ class JoinStep : public IQueryPlanStep
 {
 public:
     JoinStep(
-        const DataStream & left_stream_,
-        const DataStream & right_stream_,
+        const Header & left_header_,
+        const Header & right_header_,
         JoinPtr join_,
         size_t max_block_size_,
         size_t max_streams_,
@@ -34,10 +34,10 @@ public:
     void setJoin(JoinPtr join_) { join = std::move(join_); }
     bool allowPushDownToRight() const;
 
-    bool canUpdateInputStream() const override { return true; }
+    bool canUpdateInputHeader() const override { return true; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     JoinPtr join;
     size_t max_block_size;
@@ -50,7 +50,7 @@ private:
 class FilledJoinStep : public ITransformingStep
 {
 public:
-    FilledJoinStep(const DataStream & input_stream_, JoinPtr join_, size_t max_block_size_);
+    FilledJoinStep(const Header & input_header_, JoinPtr join_, size_t max_block_size_);
 
     String getName() const override { return "FilledJoin"; }
     void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
@@ -61,7 +61,7 @@ public:
     const JoinPtr & getJoin() const { return join; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     JoinPtr join;
     size_t max_block_size;
diff --git a/src/Processors/QueryPlan/LimitByStep.cpp b/src/Processors/QueryPlan/LimitByStep.cpp
index 8b4abecc12c..92900b8153f 100644
--- a/src/Processors/QueryPlan/LimitByStep.cpp
+++ b/src/Processors/QueryPlan/LimitByStep.cpp
@@ -23,9 +23,9 @@ static ITransformingStep::Traits getTraits()
 }
 
 LimitByStep::LimitByStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     size_t group_length_, size_t group_offset_, const Names & columns_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+    : ITransformingStep(input_header_, input_header_, getTraits())
     , group_length(group_length_)
     , group_offset(group_offset_)
     , columns(columns_)
diff --git a/src/Processors/QueryPlan/LimitByStep.h b/src/Processors/QueryPlan/LimitByStep.h
index 0edda3247d6..e34d1d5327d 100644
--- a/src/Processors/QueryPlan/LimitByStep.h
+++ b/src/Processors/QueryPlan/LimitByStep.h
@@ -9,7 +9,7 @@ class LimitByStep : public ITransformingStep
 {
 public:
     explicit LimitByStep(
-            const DataStream & input_stream_,
+            const Header & input_header_,
             size_t group_length_, size_t group_offset_, const Names & columns_);
 
     String getName() const override { return "LimitBy"; }
@@ -20,9 +20,9 @@ public:
     void describeActions(FormatSettings & settings) const override;
 
 private:
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
     size_t group_length;
diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp
index 2e2c5ed7c1e..a186e1f7965 100644
--- a/src/Processors/QueryPlan/LimitStep.cpp
+++ b/src/Processors/QueryPlan/LimitStep.cpp
@@ -23,12 +23,12 @@ static ITransformingStep::Traits getTraits()
 }
 
 LimitStep::LimitStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     size_t limit_, size_t offset_,
     bool always_read_till_end_,
     bool with_ties_,
     SortDescription description_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+    : ITransformingStep(input_header_, input_header_, getTraits())
     , limit(limit_), offset(offset_)
     , always_read_till_end(always_read_till_end_)
     , with_ties(with_ties_), description(std::move(description_))
diff --git a/src/Processors/QueryPlan/LimitStep.h b/src/Processors/QueryPlan/LimitStep.h
index 1ae6b73cc3d..4a779259681 100644
--- a/src/Processors/QueryPlan/LimitStep.h
+++ b/src/Processors/QueryPlan/LimitStep.h
@@ -10,7 +10,7 @@ class LimitStep : public ITransformingStep
 {
 public:
     LimitStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         size_t limit_, size_t offset_,
         bool always_read_till_end_ = false, /// Read all data even if limit is reached. Needed for totals.
         bool with_ties_ = false, /// Limit with ties.
@@ -34,9 +34,9 @@ public:
     bool withTies() const { return with_ties; }
 
 private:
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
     size_t limit;
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
index ae8ce78844b..03a26406deb 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
@@ -32,7 +32,7 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_
 }
 
 MergingAggregatedStep::MergingAggregatedStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     Aggregator::Params params_,
     GroupingSetsParamsList grouping_sets_params_,
     bool final_,
@@ -44,8 +44,8 @@ MergingAggregatedStep::MergingAggregatedStep(
     size_t memory_bound_merging_max_block_bytes_,
     bool memory_bound_merging_of_aggregation_results_enabled_)
     : ITransformingStep(
-        input_stream_,
-        MergingAggregatedTransform::appendGroupingIfNeeded(input_stream_.header, params_.getHeader(input_stream_.header, final_)),
+        input_header_,
+        MergingAggregatedTransform::appendGroupingIfNeeded(input_header_, params_.getHeader(input_header_, final_)),
         getTraits(should_produce_results_in_order_of_bucket_number_))
     , params(std::move(params_))
     , grouping_sets_params(std::move(grouping_sets_params_))
@@ -70,7 +70,7 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c
 {
     if (memoryBoundMergingWillBeUsed())
     {
-        if (input_streams.front().header.has("__grouping_set") || !grouping_sets_params.empty())
+        if (input_headers.front().has("__grouping_set") || !grouping_sets_params.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                  "Memory bound merging of aggregated results is not supported for grouping sets.");
 
@@ -114,7 +114,7 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c
     }
     else
     {
-        if (input_streams.front().header.has("__grouping_set") || !grouping_sets_params.empty())
+        if (input_headers.front().has("__grouping_set") || !grouping_sets_params.empty())
             throw Exception(ErrorCodes::LOGICAL_ERROR,
                  "Memory efficient merging of aggregated results is not supported for grouping sets.");
         auto num_merge_threads = memory_efficient_merge_threads
@@ -145,11 +145,10 @@ void MergingAggregatedStep::describeActions(JSONBuilder::JSONMap & map) const
         map.add("Order", dumpSortDescription(group_by_sort_description));
 }
 
-void MergingAggregatedStep::updateOutputStream()
+void MergingAggregatedStep::updateOutputHeader()
 {
-    const auto & in_header = input_streams.front().header;
-    output_stream = createOutputStream(input_streams.front(),
-        MergingAggregatedTransform::appendGroupingIfNeeded(in_header, params.getHeader(in_header, final)), getDataStreamTraits());
+    const auto & in_header = input_headers.front();
+    output_header = MergingAggregatedTransform::appendGroupingIfNeeded(in_header, params.getHeader(in_header, final));
 }
 
 bool MergingAggregatedStep::memoryBoundMergingWillBeUsed() const
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h
index 78b53e66325..960a2f350b9 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.h
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.h
@@ -14,7 +14,7 @@ class MergingAggregatedStep : public ITransformingStep
 {
 public:
     MergingAggregatedStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         Aggregator::Params params_,
         GroupingSetsParamsList grouping_sets_params_,
         bool final_,
@@ -41,7 +41,7 @@ public:
     bool memoryBoundMergingWillBeUsed() const;
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
 
     Aggregator::Params params;
diff --git a/src/Processors/QueryPlan/OffsetStep.cpp b/src/Processors/QueryPlan/OffsetStep.cpp
index 4bbe81f9169..1d25414a44c 100644
--- a/src/Processors/QueryPlan/OffsetStep.cpp
+++ b/src/Processors/QueryPlan/OffsetStep.cpp
@@ -22,8 +22,8 @@ static ITransformingStep::Traits getTraits()
     };
 }
 
-OffsetStep::OffsetStep(const DataStream & input_stream_, size_t offset_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+OffsetStep::OffsetStep(const Header & input_header_, size_t offset_)
+    : ITransformingStep(input_header_, input_header_, getTraits())
     , offset(offset_)
 {
 }
diff --git a/src/Processors/QueryPlan/OffsetStep.h b/src/Processors/QueryPlan/OffsetStep.h
index a32835b62a6..ae6bc1c66c0 100644
--- a/src/Processors/QueryPlan/OffsetStep.h
+++ b/src/Processors/QueryPlan/OffsetStep.h
@@ -9,7 +9,7 @@ namespace DB
 class OffsetStep : public ITransformingStep
 {
 public:
-    OffsetStep(const DataStream & input_stream_, size_t offset_);
+    OffsetStep(const Header & input_header_, size_t offset_);
 
     String getName() const override { return "Offset"; }
 
@@ -19,9 +19,9 @@ public:
     void describeActions(FormatSettings & settings) const override;
 
 private:
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
     size_t offset;
diff --git a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
index 47df05301c9..a876084e5a7 100644
--- a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
+++ b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
@@ -15,20 +15,20 @@ bool addPlansForSets(QueryPlan & root_plan, QueryPlan::Node & node, QueryPlan::N
     auto plans = DelayedCreatingSetsStep::makePlansForSets(std::move(*delayed));
     node.children.reserve(1 + plans.size());
 
-    DataStreams input_streams;
-    input_streams.reserve(1 + plans.size());
-    input_streams.push_back(node.children.front()->step->getOutputStream());
+    Headers input_headers;
+    input_headers.reserve(1 + plans.size());
+    input_headers.push_back(node.children.front()->step->getOutputHeader());
 
     for (const auto & plan : plans)
     {
-        input_streams.push_back(plan->getCurrentDataStream());
+        input_headers.push_back(plan->getCurrentHeader());
         node.children.push_back(plan->getRootNode());
         auto [add_nodes, add_resources] = QueryPlan::detachNodesAndResources(std::move(*plan));
         nodes.splice(nodes.end(), std::move(add_nodes));
         root_plan.addResources(std::move(add_resources));
     }
 
-    auto creating_sets = std::make_unique<CreatingSetsStep>(std::move(input_streams));
+    auto creating_sets = std::make_unique<CreatingSetsStep>(std::move(input_headers));
     creating_sets->setStepDescription("Create sets before main query execution");
     node.step = std::move(creating_sets);
     return true;
diff --git a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp
index 0c708599398..d416212722f 100644
--- a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp
@@ -41,8 +41,8 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan::
 
     const auto & filter_dag = filter->getExpression();
     const auto & filter_column_name = filter->getFilterColumnName();
-    const auto & left_stream_input_header = join->getInputStreams().front().header;
-    const auto & right_stream_input_header = join->getInputStreams().back().header;
+    const auto & left_stream_input_header = join->getInputHeaders().front();
+    const auto & right_stream_input_header = join->getInputHeaders().back();
 
     bool left_stream_safe = true;
     bool right_stream_safe = true;
diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
index 63ea8880cca..524baae2859 100644
--- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp
@@ -49,9 +49,9 @@ static bool filterColumnIsNotAmongAggregatesArguments(const AggregateDescription
 static void checkChildrenSize(QueryPlan::Node * node, size_t child_num)
 {
     auto & child = node->step;
-    if (child_num > child->getInputStreams().size() || child_num > node->children.size())
+    if (child_num > child->getInputHeaders().size() || child_num > node->children.size())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of children: expected at least {}, got {} children and {} streams",
-                        child_num, child->getInputStreams().size(), node->children.size());
+                        child_num, child->getInputHeaders().size(), node->children.size());
 }
 
 static bool identifiersIsAmongAllGroupingSets(const GroupingSetsParamsList & grouping_sets_params, const NameSet & identifiers_in_predicate)
@@ -114,7 +114,7 @@ static std::optional<ActionsDAG> splitFilter(QueryPlan::Node * parent_node, cons
     const auto & filter_column_name = filter->getFilterColumnName();
     bool removes_filter = filter->removesFilterColumn();
 
-    const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName();
+    const auto & all_inputs = child->getInputHeaders()[child_idx].getColumnsWithTypeAndName();
     return expression.splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs);
 }
 
@@ -150,17 +150,17 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes,
     String split_filter_column_name = split_filter.getOutputs().front()->result_name;
 
     node.step = std::make_unique<FilterStep>(
-        node.children.at(0)->step->getOutputStream(), std::move(split_filter), std::move(split_filter_column_name), can_remove_filter);
+        node.children.at(0)->step->getOutputHeader(), std::move(split_filter), std::move(split_filter_column_name), can_remove_filter);
 
     if (auto * transforming_step = dynamic_cast<ITransformingStep *>(child.get()))
     {
-        transforming_step->updateInputStream(node.step->getOutputStream());
+        transforming_step->updateInputHeader(node.step->getOutputHeader());
     }
     else
     {
         if (auto * join = typeid_cast<JoinStep *>(child.get()))
         {
-            join->updateInputStream(node.step->getOutputStream(), child_idx);
+            join->updateInputHeader(node.step->getOutputHeader(), child_idx);
         }
         else
             throw Exception(
@@ -176,11 +176,11 @@ addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes,
         {
             /// This means that all predicates of filter were pushed down.
             /// Replace current actions to expression, as we don't need to filter anything.
-            parent = std::make_unique<ExpressionStep>(child->getOutputStream(), std::move(expression));
+            parent = std::make_unique<ExpressionStep>(child->getOutputHeader(), std::move(expression));
         }
         else
         {
-            filter->updateInputStream(child->getOutputStream());
+            filter->updateInputHeader(child->getOutputHeader());
         }
     }
 
@@ -203,7 +203,7 @@ static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, QueryPlan::N
 {
     if (typeid_cast<Step *>(child.get()))
     {
-        Names allowed_inputs = child->getOutputStream().header.getNames();
+        Names allowed_inputs = child->getOutputHeader().getNames();
         if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs))
             return updated_steps;
     }
@@ -241,10 +241,10 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
       * to RIGHT/LEFT JOIN side.
       */
 
-    const auto & join_header = child->getOutputStream().header;
+    const auto & join_header = child->getOutputHeader();
     const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin();
-    const auto & left_stream_input_header = child->getInputStreams().front().header;
-    const auto & right_stream_input_header = child->getInputStreams().back().header;
+    const auto & left_stream_input_header = child->getInputHeaders().front();
+    const auto & right_stream_input_header = child->getInputHeaders().back();
 
     if (table_join.kind() == JoinKind::Full)
         return 0;
@@ -393,11 +393,11 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::
         {
             /// This means that all predicates of filter were pushed down.
             /// Replace current actions to expression, as we don't need to filter anything.
-            parent = std::make_unique<ExpressionStep>(child->getOutputStream(), std::move(filter_expression));
+            parent = std::make_unique<ExpressionStep>(child->getOutputHeader(), std::move(filter_expression));
         }
         else
         {
-            filter->updateInputStream(child->getOutputStream());
+            filter->updateInputHeader(child->getOutputHeader());
         }
     }
 
@@ -468,8 +468,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         ///                       - Something
         /// Filter - CreatingSets - CreatingSet
         ///                       - CreatingSet
-        auto input_streams = child->getInputStreams();
-        input_streams.front() = filter->getOutputStream();
+        auto input_streams = child->getInputHeaders();
+        input_streams.front() = filter->getOutputHeader();
         child = std::make_unique<CreatingSetsStep>(input_streams);
         std::swap(parent, child);
         std::swap(parent_node->children, child_node->children);
@@ -485,7 +485,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         /// CreatingSets does not change header.
         /// We can push down filter and update header.
         /// Filter - DelayedCreatingSets - Something
-        child = std::make_unique<DelayedCreatingSetsStep>(filter->getOutputStream(), delayed->detachSets(), delayed->getContext());
+        child = std::make_unique<DelayedCreatingSetsStep>(filter->getOutputHeader(), delayed->detachSets(), delayed->getContext());
         std::swap(parent, child);
         std::swap(parent_node->children, child_node->children);
         std::swap(parent_node->children.front(), child_node->children.front());
@@ -503,7 +503,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
             return 0;
 
         Names keys;
-        const auto & header = totals_having->getInputStreams().front().header;
+        const auto & header = totals_having->getInputHeaders().front();
         for (const auto & column : header)
             if (typeid_cast<const DataTypeAggregateFunction *>(column.type.get()) == nullptr)
                 keys.push_back(column.name);
@@ -523,7 +523,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         const auto & keys = array_join->getColumns();
         std::unordered_set<std::string_view> keys_set(keys.begin(), keys.end());
 
-        const auto & array_join_header = array_join->getInputStreams().front().header;
+        const auto & array_join_header = array_join->getInputHeaders().front();
 
         Names allowed_inputs;
         for (const auto & column : array_join_header)
@@ -564,7 +564,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         });
         bool can_remove_filter = sort_description_it == sort_description.end();
 
-        Names allowed_inputs = child->getOutputStream().header.getNames();
+        Names allowed_inputs = child->getOutputHeader().getNames();
         if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter))
             return updated_steps;
     }
@@ -574,7 +574,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
         const auto & filter_column_name = assert_cast<const FilterStep *>(parent_node->step.get())->getFilterColumnName();
         bool can_remove_filter = !join_filter_set_step->isColumnPartOfSetKey(filter_column_name);
 
-        Names allowed_inputs = child->getOutputStream().header.getNames();
+        Names allowed_inputs = child->getOutputHeader().getNames();
         if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs, can_remove_filter))
             return updated_steps;
     }
@@ -583,15 +583,15 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
     {
         /// Union does not change header.
         /// We can push down filter and update header.
-        auto union_input_streams = child->getInputStreams();
-        for (auto & input_stream : union_input_streams)
-            input_stream.header = filter->getOutputStream().header;
+        auto union_input_headers = child->getInputHeaders();
+        for (auto & input_header : union_input_headers)
+            input_header = filter->getOutputHeader();
 
         ///                - Something
         /// Filter - Union - Something
         ///                - Something
 
-        child = std::make_unique<UnionStep>(union_input_streams, union_step->getMaxThreads());
+        child = std::make_unique<UnionStep>(union_input_headers, union_step->getMaxThreads());
 
         std::swap(parent, child);
         std::swap(parent_node->children, child_node->children);
@@ -608,7 +608,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
             parent_node->children[i] = &filter_node;
 
             filter_node.step = std::make_unique<FilterStep>(
-                filter_node.children.front()->step->getOutputStream(),
+                filter_node.children.front()->step->getOutputHeader(),
                 filter->getExpression().clone(),
                 filter->getFilterColumnName(),
                 filter->removesFilterColumn());
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
index 8866bb99cbe..de6ed9a6996 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpArrayJoin.cpp
@@ -43,15 +43,15 @@ size_t tryLiftUpArrayJoin(QueryPlan::Node * parent_node, QueryPlan::Nodes & node
     child_node->children.emplace_back(&node);
     /// Expression/Filter -> ArrayJoin -> node -> Something
 
-    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputStream(),
+    node.step = std::make_unique<ExpressionStep>(node.children.at(0)->step->getOutputHeader(),
                                                  std::move(split_actions.first));
     node.step->setStepDescription(description);
-    array_join_step->updateInputStream(node.step->getOutputStream());
+    array_join_step->updateInputHeader(node.step->getOutputHeader());
 
     if (expression_step)
-        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputStream(), std::move(split_actions.second));
+        parent = std::make_unique<ExpressionStep>(array_join_step->getOutputHeader(), std::move(split_actions.second));
     else
-        parent = std::make_unique<FilterStep>(array_join_step->getOutputStream(), std::move(split_actions.second),
+        parent = std::make_unique<FilterStep>(array_join_step->getOutputHeader(), std::move(split_actions.second),
                                               filter_step->getFilterColumnName(), filter_step->removesFilterColumn());
 
     parent->setStepDescription(description + " [split]");
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
index 7794ddae8fa..8b6e21be811 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
@@ -17,11 +17,11 @@ namespace ErrorCodes
 namespace
 {
 
-const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node)
+const DB::Header & getChildOutputHeader(DB::QueryPlan::Node & node)
 {
     if (node.children.size() != 1)
         throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Node \"{}\" is expected to have only one child.", node.step->getName());
-    return node.children.front()->step->getOutputStream();
+    return node.children.front()->step->getOutputHeader();
 }
 
 }
@@ -80,17 +80,17 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
     std::swap(node_with_needed.children, child_node->children);
     child_node->children = {&node_with_needed};
 
-    node_with_needed.step = std::make_unique<ExpressionStep>(getChildOutputStream(node_with_needed), std::move(needed_for_sorting));
+    node_with_needed.step = std::make_unique<ExpressionStep>(getChildOutputHeader(node_with_needed), std::move(needed_for_sorting));
     node_with_needed.step->setStepDescription(child_step->getStepDescription());
     // Sorting (parent_node) -> so far the origin Expression (child_node) -> NeededCalculations (node_with_needed)
 
     std::swap(parent_step, child_step);
     // so far the origin Expression (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
 
-    sorting_step->updateInputStream(getChildOutputStream(*child_node));
+    sorting_step->updateInputHeader(getChildOutputHeader(*child_node));
 
     auto description = parent_step->getStepDescription();
-    parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputStream(), std::move(unneeded_for_sorting));
+    parent_step = std::make_unique<DB::ExpressionStep>(child_step->getOutputHeader(), std::move(unneeded_for_sorting));
     parent_step->setStepDescription(description + " [lifted up part]");
     // UneededCalculations (parent_node) -> Sorting (child_node) -> NeededCalculations (node_with_needed)
 
diff --git a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp
index 77fe46ca796..a69c65f30cd 100644
--- a/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpUnion.cpp
@@ -24,15 +24,15 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
     {
         /// Union does not change header.
         /// We can push down expression and update header.
-        auto union_input_streams = child->getInputStreams();
-        for (auto & input_stream : union_input_streams)
-            input_stream.header = expression->getOutputStream().header;
+        auto union_input_headers = child->getInputHeaders();
+        for (auto & input_header : union_input_headers)
+            input_header = expression->getOutputHeader();
 
         ///                    - Something
         /// Expression - Union - Something
         ///                    - Something
 
-        child = std::make_unique<UnionStep>(union_input_streams, union_step->getMaxThreads());
+        child = std::make_unique<UnionStep>(union_input_headers, union_step->getMaxThreads());
 
         std::swap(parent, child);
         std::swap(parent_node->children, child_node->children);
@@ -49,7 +49,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
             parent_node->children[i] = &expr_node;
 
             expr_node.step = std::make_unique<ExpressionStep>(
-                expr_node.children.front()->step->getOutputStream(),
+                expr_node.children.front()->step->getOutputHeader(),
                 expression->getExpression().clone());
             expr_node.step->setStepDescription(expression->getStepDescription());
         }
@@ -84,7 +84,7 @@ size_t tryLiftUpUnion(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
             parent_node->children[i] = &distinct_node;
 
             distinct_node.step = std::make_unique<DistinctStep>(
-                distinct_node.children.front()->step->getOutputStream(),
+                distinct_node.children.front()->step->getOutputHeader(),
                 distinct->getSetSizeLimits(),
                 distinct->getLimitHint(),
                 distinct->getColumnNames(),
diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
index f121aea73b3..10cc78da33a 100644
--- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
+++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp
@@ -85,7 +85,7 @@ size_t tryPushDownLimit(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
         return 0;
 
     /// Input stream for Limit have changed.
-    limit->updateInputStream(transforming->getInputStreams().front());
+    limit->updateInputHeader(transforming->getInputHeaders().front());
 
     parent.swap(child);
     return 2;
diff --git a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
index 8ecd9989fa0..4dd7c953bf1 100644
--- a/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/mergeExpressions.cpp
@@ -48,7 +48,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 
         auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions));
 
-        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputStreams().front(), std::move(merged));
+        auto expr = std::make_unique<ExpressionStep>(child_expr->getInputHeaders().front(), std::move(merged));
         expr->setStepDescription("(" + parent_expr->getStepDescription() + " + " + child_expr->getStepDescription() + ")");
 
         parent_node->step = std::move(expr);
@@ -66,7 +66,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
         auto merged = ActionsDAG::merge(std::move(child_actions), std::move(parent_actions));
 
         auto filter = std::make_unique<FilterStep>(
-            child_expr->getInputStreams().front(),
+            child_expr->getInputHeaders().front(),
             std::move(merged),
             parent_filter->getFilterColumnName(),
             parent_filter->removesFilterColumn());
@@ -117,7 +117,7 @@ size_t tryMergeFilters(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
 
         child_actions.removeUnusedActions(false);
 
-        auto filter = std::make_unique<FilterStep>(child_filter->getInputStreams().front(),
+        auto filter = std::make_unique<FilterStep>(child_filter->getInputHeaders().front(),
                                                    std::move(child_actions),
                                                    condition.result_name,
                                                    true);
diff --git a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
index 39131955a4f..4933026377f 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizePrewhere.cpp
@@ -187,7 +187,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     if (!optimize_result.fully_moved_to_prewhere)
     {
         filter_node->step = std::make_unique<FilterStep>(
-            source_step_with_filter->getOutputStream(),
+            source_step_with_filter->getOutputHeader(),
             std::move(split_result.second),
             filter_step->getFilterColumnName(),
             filter_step->removesFilterColumn());
@@ -196,7 +196,7 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
     {
         /// Have to keep this expression to change column names to column identifiers
         filter_node->step = std::make_unique<ExpressionStep>(
-            source_step_with_filter->getOutputStream(),
+            source_step_with_filter->getOutputHeader(),
             std::move(split_result.second));
     }
 }
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
index a053b774e07..e64a88de62e 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeReadInOrder.cpp
@@ -1064,7 +1064,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
                 /// We cannot sort up to partial read-in-order description with limit cause result set can be wrong.
                 const auto & descr = limit ? sorting->getSortDescription() : *max_sort_descr;
                 additional_sorting = std::make_unique<SortingStep>(
-                    child->step->getOutputStream(),
+                    child->step->getOutputHeader(),
                     descr,
                     limit, /// TODO: support limit with ties
                     sorting->getSettings());
@@ -1072,7 +1072,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
             else if (info->sort_description_for_merging.size() < max_sort_descr->size())
             {
                 additional_sorting = std::make_unique<SortingStep>(
-                    child->step->getOutputStream(),
+                    child->step->getOutputHeader(),
                     info->sort_description_for_merging,
                     *max_sort_descr,
                     sorting->getSettings().max_block_size,
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
index ca128a121fb..511ae274101 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@@ -798,7 +798,7 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
     }
 
     // LOG_TRACE(getLogger("optimizeUseProjections"), "Projection reading header {}",
-    //           projection_reading->getOutputStream().header.dumpStructure());
+    //           projection_reading->getOutputHeader().header.dumpStructure());
 
     projection_reading->setStepDescription(selected_projection_name);
     auto & projection_reading_node = nodes.emplace_back(QueryPlan::Node{.step = std::move(projection_reading)});
@@ -814,14 +814,14 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
         {
             const auto & result_name = best_candidate->dag.getOutputs().front()->result_name;
             aggregate_projection_node->step = std::make_unique<FilterStep>(
-                projection_reading_node.step->getOutputStream(),
+                projection_reading_node.step->getOutputHeader(),
                 std::move(best_candidate->dag),
                 result_name,
                 true);
         }
         else
             aggregate_projection_node->step
-                = std::make_unique<ExpressionStep>(projection_reading_node.step->getOutputStream(), std::move(best_candidate->dag));
+                = std::make_unique<ExpressionStep>(projection_reading_node.step->getOutputHeader(), std::move(best_candidate->dag));
 
         aggregate_projection_node->children.push_back(&projection_reading_node);
     }
@@ -833,12 +833,12 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
     if (!has_ordinary_parts)
     {
         /// All parts are taken from projection
-        aggregating->requestOnlyMergeForAggregateProjection(aggregate_projection_node->step->getOutputStream());
+        aggregating->requestOnlyMergeForAggregateProjection(aggregate_projection_node->step->getOutputHeader());
         node.children.front() = aggregate_projection_node;
     }
     else
     {
-        node.step = aggregating->convertToAggregatingProjection(aggregate_projection_node->step->getOutputStream());
+        node.step = aggregating->convertToAggregatingProjection(aggregate_projection_node->step->getOutputHeader());
         node.children.push_back(aggregate_projection_node);
     }
 
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
index aa266273e5b..a0c4884d181 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp
@@ -33,7 +33,7 @@ struct NormalProjectionCandidate : public ProjectionCandidate
 {
 };
 
-static std::optional<ActionsDAG> makeMaterializingDAG(const Block & proj_header, const Block main_header)
+static std::optional<ActionsDAG> makeMaterializingDAG(const Block & proj_header, const Block & main_header)
 {
     /// Materialize constants in case we don't have it in output header.
     /// This may happen e.g. if we have PREWHERE.
@@ -249,14 +249,14 @@ std::optional<String> optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod
         if (query.filter_node)
         {
             expr_or_filter_node.step = std::make_unique<FilterStep>(
-                projection_reading_node.step->getOutputStream(),
+                projection_reading_node.step->getOutputHeader(),
                 std::move(*query.dag),
                 query.filter_node->result_name,
                 true);
         }
         else
             expr_or_filter_node.step = std::make_unique<ExpressionStep>(
-                projection_reading_node.step->getOutputStream(),
+                projection_reading_node.step->getOutputHeader(),
                 std::move(*query.dag));
 
         expr_or_filter_node.children.push_back(&projection_reading_node);
@@ -270,13 +270,13 @@ std::optional<String> optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod
     }
     else
     {
-        const auto & main_stream = iter->node->children[iter->next_child - 1]->step->getOutputStream();
-        const auto * proj_stream = &next_node->step->getOutputStream();
+        const auto & main_stream = iter->node->children[iter->next_child - 1]->step->getOutputHeader();
+        const auto * proj_stream = &next_node->step->getOutputHeader();
 
-        if (auto materializing = makeMaterializingDAG(proj_stream->header, main_stream.header))
+        if (auto materializing = makeMaterializingDAG(*proj_stream, main_stream))
         {
             auto converting = std::make_unique<ExpressionStep>(*proj_stream, std::move(*materializing));
-            proj_stream = &converting->getOutputStream();
+            proj_stream = &converting->getOutputHeader();
             auto & expr_node = nodes.emplace_back();
             expr_node.step = std::move(converting);
             expr_node.children.push_back(next_node);
@@ -284,8 +284,8 @@ std::optional<String> optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod
         }
 
         auto & union_node = nodes.emplace_back();
-        DataStreams input_streams = {main_stream, *proj_stream};
-        union_node.step = std::make_unique<UnionStep>(std::move(input_streams));
+        Headers input_headers = {main_stream, *proj_stream};
+        union_node.step = std::make_unique<UnionStep>(std::move(input_headers));
         union_node.children = {iter->node->children[iter->next_child - 1], next_node};
         iter->node->children[iter->next_child - 1] = &union_node;
     }
diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
index ef13a5f85a6..941913989a5 100644
--- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
+++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
@@ -53,7 +53,7 @@ namespace
     DistinctColumns getDistinctColumns(const DistinctStep * distinct)
     {
         /// find non-const columns in DISTINCT
-        const ColumnsWithTypeAndName & distinct_columns = distinct->getOutputStream().header.getColumnsWithTypeAndName();
+        const ColumnsWithTypeAndName & distinct_columns = distinct->getOutputHeader().getColumnsWithTypeAndName();
         std::set<std::string_view> non_const_columns;
         std::unordered_set<std::string_view> column_names(cbegin(distinct->getColumnNames()), cend(distinct->getColumnNames()));
         for (const auto & column : distinct_columns)
diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp
index 3c67e68f704..0b847004b03 100644
--- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp
+++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp
@@ -100,7 +100,7 @@ private:
         }
 
         /// sorting removed, so need to update sorting traits for upstream steps
-        const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream();
+        const Header * input_header = &parent_node->children.front()->step->getOutputHeader();
         chassert(parent_node == (stack.rbegin() + 1)->node); /// skip element on top of stack since it's sorting which was just removed
         for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it)
         {
@@ -119,8 +119,8 @@ private:
                 break;
             }
 
-            trans->updateInputStream(*input_stream);
-            input_stream = &trans->getOutputStream();
+            trans->updateInputHeader(*input_header);
+            input_header = &trans->getOutputHeader();
 
             /// update sorting properties though stack until reach node which affects order (inclusive)
             if (node == nodes_affect_order.back())
diff --git a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
index 6aed57634b0..0de9ebc2b4d 100644
--- a/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
+++ b/src/Processors/QueryPlan/Optimizations/splitFilter.cpp
@@ -64,12 +64,12 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
     }
 
     filter_node.step = std::make_unique<FilterStep>(
-            filter_node.children.at(0)->step->getOutputStream(),
+            filter_node.children.at(0)->step->getOutputHeader(),
             std::move(split.first),
             std::move(split_filter_name),
             remove_filter);
 
-    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
+    node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputHeader(), std::move(split.second));
 
     filter_node.step->setStepDescription("(" + description + ")[split]");
     node->step->setStepDescription(description);
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index f61e862b9b6..5c294093151 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -47,14 +47,14 @@ void QueryPlan::checkNotCompleted() const
 
 bool QueryPlan::isCompleted() const
 {
-    return isInitialized() && !root->step->hasOutputStream();
+    return isInitialized() && !root->step->hasOutputHeader();
 }
 
-const DataStream & QueryPlan::getCurrentDataStream() const
+const Header & QueryPlan::getCurrentHeader() const
 {
     checkInitialized();
     checkNotCompleted();
-    return root->step->getOutputStream();
+    return root->step->getOutputHeader();
 }
 
 void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector<std::unique_ptr<QueryPlan>> plans)
@@ -62,8 +62,8 @@ void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector<std::unique_ptr<Qu
     if (isInitialized())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite plans because current QueryPlan is already initialized");
 
-    const auto & inputs = step->getInputStreams();
-    size_t num_inputs = step->getInputStreams().size();
+    const auto & inputs = step->getInputHeaders();
+    size_t num_inputs = step->getInputHeaders().size();
     if (num_inputs != plans.size())
         throw Exception(
             ErrorCodes::LOGICAL_ERROR,
@@ -74,8 +74,8 @@ void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector<std::unique_ptr<Qu
 
     for (size_t i = 0; i < num_inputs; ++i)
     {
-        const auto & step_header = inputs[i].header;
-        const auto & plan_header = plans[i]->getCurrentDataStream().header;
+        const auto & step_header = inputs[i];
+        const auto & plan_header = plans[i]->getCurrentHeader();
         if (!blocksHaveEqualStructure(step_header, plan_header))
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
@@ -106,7 +106,7 @@ void QueryPlan::addStep(QueryPlanStepPtr step)
 {
     checkNotCompleted();
 
-    size_t num_input_streams = step->getInputStreams().size();
+    size_t num_input_streams = step->getInputHeaders().size();
 
     if (num_input_streams == 0)
     {
@@ -129,8 +129,8 @@ void QueryPlan::addStep(QueryPlanStepPtr step)
                 "Cannot add step {} to QueryPlan because step has input, but QueryPlan is not initialized",
                 step->getName());
 
-        const auto & root_header = root->step->getOutputStream().header;
-        const auto & step_header = step->getInputStreams().front().header;
+        const auto & root_header = root->step->getOutputHeader();
+        const auto & step_header = step->getInputHeaders().front();
         if (!blocksHaveEqualStructure(root_header, step_header))
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR,
@@ -214,11 +214,11 @@ static void explainStep(const IQueryPlanStep & step, JSONBuilder::JSONMap & map,
             map.add("Description", description);
     }
 
-    if (options.header && step.hasOutputStream())
+    if (options.header && step.hasOutputHeader())
     {
         auto header_array = std::make_unique<JSONBuilder::JSONArray>();
 
-        for (const auto & output_column : step.getOutputStream().header)
+        for (const auto & output_column : step.getOutputHeader())
         {
             auto column_map = std::make_unique<JSONBuilder::JSONMap>();
             column_map->add("Name", output_column.name);
@@ -316,16 +316,16 @@ static void explainStep(
     {
         settings.out << prefix;
 
-        if (!step.hasOutputStream())
+        if (!step.hasOutputHeader())
             settings.out << "No header";
-        else if (!step.getOutputStream().header)
+        else if (!step.getOutputHeader())
             settings.out << "Empty header";
         else
         {
             settings.out << "Header: ";
             bool first = true;
 
-            for (const auto & elem : step.getOutputStream().header)
+            for (const auto & elem : step.getOutputHeader())
             {
                 if (!first)
                     settings.out << "\n" << prefix << "        ";
@@ -469,21 +469,21 @@ static void updateDataStreams(QueryPlan::Node & root)
         static void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node * /*parent_node*/)
         {
             auto & current_step = *current_node->step;
-            if (!current_step.canUpdateInputStream() || current_node->children.empty())
+            if (!current_step.canUpdateInputHeader() || current_node->children.empty())
                 return;
 
             for (const auto * child : current_node->children)
             {
-                if (!child->step->hasOutputStream())
+                if (!child->step->hasOutputHeader())
                     return;
             }
 
-            DataStreams streams;
-            streams.reserve(current_node->children.size());
+            Headers headers;
+            headers.reserve(current_node->children.size());
             for (const auto * child : current_node->children)
-                streams.emplace_back(child->step->getOutputStream());
+                headers.emplace_back(child->step->getOutputHeader());
 
-            current_step.updateInputStreams(std::move(streams));
+            current_step.updateInputHeaders(std::move(headers));
         }
     };
 
diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h
index d810cf21e71..e12a6ee9e65 100644
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@@ -12,7 +12,8 @@
 namespace DB
 {
 
-class DataStream;
+class Block;
+using Header = Block;
 
 class IQueryPlanStep;
 using QueryPlanStepPtr = std::unique_ptr<IQueryPlanStep>;
@@ -52,7 +53,7 @@ public:
 
     bool isInitialized() const { return root != nullptr; } /// Tree is not empty
     bool isCompleted() const; /// Tree is not empty and root hasOutputStream()
-    const DataStream & getCurrentDataStream() const; /// Checks that (isInitialized() && !isCompleted())
+    const Header & getCurrentHeader() const; /// Checks that (isInitialized() && !isCompleted())
 
     void optimize(const QueryPlanOptimizationSettings & optimization_settings);
 
diff --git a/src/Processors/QueryPlan/ReadFromLoopStep.cpp b/src/Processors/QueryPlan/ReadFromLoopStep.cpp
index 4c3ad241853..59badd8b603 100644
--- a/src/Processors/QueryPlan/ReadFromLoopStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromLoopStep.cpp
@@ -132,7 +132,7 @@ namespace DB
             size_t max_block_size_,
             size_t num_streams_)
             : SourceStepWithFilter(
-            DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)},
+            storage_snapshot_->getSampleBlockForColumns(column_names_),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -157,8 +157,8 @@ namespace DB
 
         if (pipe.empty())
         {
-            assert(output_stream != std::nullopt);
-            pipe = Pipe(std::make_shared<NullSource>(output_stream->header));
+            assert(output_header != std::nullopt);
+            pipe = Pipe(std::make_shared<NullSource>(*output_header));
         }
 
         pipeline.init(std::move(pipe));
diff --git a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
index e1bdae760dd..cb4342b0fa2 100644
--- a/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromMemoryStorageStep.cpp
@@ -110,7 +110,7 @@ ReadFromMemoryStorageStep::ReadFromMemoryStorageStep(
     const size_t num_streams_,
     const bool delay_read_for_global_sub_queries_)
     : SourceStepWithFilter(
-        DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)},
+        storage_snapshot_->getSampleBlockForColumns(columns_to_read_),
         columns_to_read_,
         query_info_,
         storage_snapshot_,
@@ -128,8 +128,8 @@ void ReadFromMemoryStorageStep::initializePipeline(QueryPipelineBuilder & pipeli
 
     if (pipe.empty())
     {
-        assert(output_stream != std::nullopt);
-        pipe = Pipe(std::make_shared<NullSource>(output_stream->header));
+        assert(output_header != std::nullopt);
+        pipe = Pipe(std::make_shared<NullSource>(*output_header));
     }
 
     pipeline.init(std::move(pipe));
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 9f2700d7def..8fe2d874ca5 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -219,14 +219,14 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts)
 }
 
 /// build sort description for output stream
-static SortDescription getSortDescriptionForOutputStream(
-    const DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info, bool enable_vertical_final)
+static SortDescription getSortDescriptionForOutputHeader(
+    const Header & output_header, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info, PrewhereInfoPtr prewhere_info, bool enable_vertical_final)
 {
     /// Updating sort description can be done after PREWHERE actions are applied to the header.
     /// Aftert PREWHERE actions are applied, column names in header can differ from storage column names due to aliases
     /// To mitigate it, we're trying to build original header and use it to deduce sorting description
     /// TODO: this approach is fragile, it'd be more robust to update sorting description for the whole plan during plan optimization
-    Block original_header = output_stream.header.cloneEmpty();
+    Block original_header = output_header.cloneEmpty();
     if (prewhere_info)
     {
         {
@@ -252,7 +252,7 @@ static SortDescription getSortDescriptionForOutputStream(
     }
 
     SortDescription sort_description;
-    const Block & header = output_stream.header;
+    const Block & header = output_header;
     for (const auto & sorting_key : sorting_key_columns)
     {
         const auto it = std::find_if(
@@ -325,9 +325,9 @@ ReadFromMergeTree::ReadFromMergeTree(
     std::optional<MergeTreeAllRangesCallback> all_ranges_callback_,
     std::optional<MergeTreeReadTaskCallback> read_task_callback_,
     std::optional<size_t> number_of_current_replica_)
-    : SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader(
+    : SourceStepWithFilter(MergeTreeSelectProcessor::transformHeader(
         storage_snapshot_->getSampleBlockForColumns(all_column_names_),
-        query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_)
+        query_info_.prewhere_info), all_column_names_, query_info_, storage_snapshot_, context_)
     , reader_settings(getMergeTreeReaderSettings(context_, query_info_))
     , prepared_parts(std::move(parts_))
     , mutations_snapshot(std::move(mutations_))
@@ -1796,8 +1796,8 @@ int ReadFromMergeTree::getSortDirection() const
 
 void ReadFromMergeTree::updateSortDescription()
 {
-    result_sort_description = getSortDescriptionForOutputStream(
-        *output_stream,
+    result_sort_description = getSortDescriptionForOutputHeader(
+        *output_header,
         storage_snapshot->metadata->getSortingKeyColumns(),
         getSortDirection(),
         query_info.input_order_info,
@@ -1843,9 +1843,9 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info
     query_info.prewhere_info = prewhere_info_value;
     prewhere_info = prewhere_info_value;
 
-    output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader(
+    output_header = MergeTreeSelectProcessor::transformHeader(
         storage_snapshot->getSampleBlockForColumns(all_column_names),
-        prewhere_info_value)};
+        prewhere_info_value);
 
     updateSortDescription();
 }
@@ -2072,7 +2072,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
 
     if (result.parts_with_ranges.empty())
     {
-        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputStream().header)));
+        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputHeader())));
         return;
     }
 
@@ -2093,7 +2093,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
 
     if (pipe.empty())
     {
-        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputStream().header)));
+        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputHeader())));
         return;
     }
 
@@ -2125,11 +2125,11 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
 
     /// Extra columns may be returned (for example, if sampling is used).
     /// Convert pipe to step header structure.
-    if (!isCompatibleHeader(cur_header, getOutputStream().header))
+    if (!isCompatibleHeader(cur_header, getOutputHeader()))
     {
         auto converting = ActionsDAG::makeConvertingActions(
             cur_header.getColumnsWithTypeAndName(),
-            getOutputStream().header.getColumnsWithTypeAndName(),
+            getOutputHeader().getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Name);
 
         append_actions(std::move(converting));
@@ -2146,11 +2146,11 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
 
     /// Some extra columns could be added by sample/final/in-order/etc
     /// Remove them from header if not needed.
-    if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header))
+    if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputHeader()))
     {
         auto convert_actions_dag = ActionsDAG::makeConvertingActions(
             pipe.getHeader().getColumnsWithTypeAndName(),
-            getOutputStream().header.getColumnsWithTypeAndName(),
+            getOutputHeader().getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Name,
             true);
 
diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
index 92c936cdc20..7f254b9bc51 100644
--- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
+++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp
@@ -6,7 +6,7 @@ namespace DB
 {
 
 ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_)
-    : ISourceStep(DataStream{.header = pipe_.getHeader()})
+    : ISourceStep(pipe_.getHeader())
     , pipe(std::move(pipe_))
 {
 }
diff --git a/src/Processors/QueryPlan/ReadFromRecursiveCTEStep.cpp b/src/Processors/QueryPlan/ReadFromRecursiveCTEStep.cpp
index b191646a3e9..993cd9ab65c 100644
--- a/src/Processors/QueryPlan/ReadFromRecursiveCTEStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromRecursiveCTEStep.cpp
@@ -6,15 +6,15 @@
 namespace DB
 {
 
-ReadFromRecursiveCTEStep::ReadFromRecursiveCTEStep(Block output_header, QueryTreeNodePtr recursive_cte_union_node_)
-    : ISourceStep(DataStream{.header = std::move(output_header)})
+ReadFromRecursiveCTEStep::ReadFromRecursiveCTEStep(Block output_header_, QueryTreeNodePtr recursive_cte_union_node_)
+    : ISourceStep(std::move(output_header_))
     , recursive_cte_union_node(std::move(recursive_cte_union_node_))
 {
 }
 
 void ReadFromRecursiveCTEStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    pipeline.init(Pipe(std::make_shared<RecursiveCTESource>(getOutputStream().header, recursive_cte_union_node)));
+    pipeline.init(Pipe(std::make_shared<RecursiveCTESource>(getOutputHeader(), recursive_cte_union_node)));
 }
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp
index 1fe1141e161..943c1e2bdfa 100644
--- a/src/Processors/QueryPlan/ReadFromRemote.cpp
+++ b/src/Processors/QueryPlan/ReadFromRemote.cpp
@@ -121,7 +121,7 @@ ReadFromRemote::ReadFromRemote(
     UInt32 shard_count_,
     std::shared_ptr<const StorageLimitsList> storage_limits_,
     const String & cluster_name_)
-    : ISourceStep(DataStream{.header = std::move(header_)})
+    : ISourceStep(std::move(header_))
     , shards(std::move(shards_))
     , stage(stage_)
     , main_table(std::move(main_table_))
@@ -229,7 +229,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
     };
 
     pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes));
-    addConvertingActions(pipes.back(), output_stream->header, shard.has_missing_objects);
+    addConvertingActions(pipes.back(), *output_header, shard.has_missing_objects);
 }
 
 void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard)
@@ -310,7 +310,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
 
             pipes.emplace_back(
                 createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
-            addConvertingActions(pipes.back(), output_stream->header, shard.has_missing_objects);
+            addConvertingActions(pipes.back(), *output_header, shard.has_missing_objects);
         }
     }
     else
@@ -339,7 +339,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
 
         pipes.emplace_back(
             createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
-        addConvertingActions(pipes.back(), output_stream->header, shard.has_missing_objects);
+        addConvertingActions(pipes.back(), *output_header, shard.has_missing_objects);
     }
 }
 
@@ -379,7 +379,7 @@ ReadFromParallelRemoteReplicasStep::ReadFromParallelRemoteReplicasStep(
     std::shared_ptr<const StorageLimitsList> storage_limits_,
     std::vector<ConnectionPoolPtr> pools_to_use_,
     std::optional<size_t> exclude_pool_index_)
-    : ISourceStep(DataStream{.header = std::move(header_)})
+    : ISourceStep(std::move(header_))
     , cluster(cluster_)
     , query_ast(query_ast_)
     , storage_id(storage_id_)
@@ -476,12 +476,12 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
     String query_string = formattedAST(query_ast);
 
     assert(stage != QueryProcessingStage::Complete);
-    assert(output_stream);
+    assert(output_header);
 
     auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
         pool,
         query_string,
-        output_stream->header,
+        *output_header,
         context,
         throttler,
         scalars,
@@ -493,7 +493,7 @@ void ReadFromParallelRemoteReplicasStep::addPipeForSingeReplica(
     remote_query_executor->setMainTable(storage_id);
 
     pipes.emplace_back(createRemoteSourcePipe(std::move(remote_query_executor), add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
-    addConvertingActions(pipes.back(), output_stream->header);
+    addConvertingActions(pipes.back(), *output_header);
 }
 
 }
diff --git a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp
index 66db1e77bdd..e3f11da6a0b 100644
--- a/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp
+++ b/src/Processors/QueryPlan/ReadFromStreamLikeEngine.cpp
@@ -21,7 +21,7 @@ ReadFromStreamLikeEngine::ReadFromStreamLikeEngine(
     const StorageSnapshotPtr & storage_snapshot_,
     std::shared_ptr<const StorageLimitsList> storage_limits_,
     ContextPtr context_)
-    : ISourceStep{DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)}}
+    : ISourceStep{storage_snapshot_->getSampleBlockForColumns(column_names_)}
     , WithContext{context_}
     , storage_limits{std::move(storage_limits_)}
 {
diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
index fa6811dd2db..d2f3423400f 100644
--- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
+++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp
@@ -404,7 +404,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep(
     size_t max_block_size_,
     size_t num_streams_)
     : SourceStepWithFilter(
-        DataStream{.header = storage_snapshot_->getSampleBlockForColumns(column_names_)},
+        storage_snapshot_->getSampleBlockForColumns(column_names_),
         column_names_,
         query_info_,
         storage_snapshot_,
@@ -431,8 +431,8 @@ void ReadFromSystemNumbersStep::initializePipeline(QueryPipelineBuilder & pipeli
 
     if (pipe.empty())
     {
-        assert(output_stream != std::nullopt);
-        pipe = Pipe(std::make_shared<NullSource>(output_stream->header));
+        assert(output_header != std::nullopt);
+        pipe = Pipe(std::make_shared<NullSource>(*output_header));
     }
 
     /// Add storage limits.
diff --git a/src/Processors/QueryPlan/ReadNothingStep.cpp b/src/Processors/QueryPlan/ReadNothingStep.cpp
index 3037172bbd4..511f7663e35 100644
--- a/src/Processors/QueryPlan/ReadNothingStep.cpp
+++ b/src/Processors/QueryPlan/ReadNothingStep.cpp
@@ -5,14 +5,14 @@
 namespace DB
 {
 
-ReadNothingStep::ReadNothingStep(Block output_header)
-    : ISourceStep(DataStream{.header = std::move(output_header)})
+ReadNothingStep::ReadNothingStep(Block output_header_)
+    : ISourceStep(std::move(output_header_))
 {
 }
 
 void ReadNothingStep::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    pipeline.init(Pipe(std::make_shared<NullSource>(getOutputStream().header)));
+    pipeline.init(Pipe(std::make_shared<NullSource>(getOutputHeader())));
 }
 
 }
diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp
index aa6efa930e6..f48b13d71df 100644
--- a/src/Processors/QueryPlan/RollupStep.cpp
+++ b/src/Processors/QueryPlan/RollupStep.cpp
@@ -21,8 +21,8 @@ static ITransformingStep::Traits getTraits()
     };
 }
 
-RollupStep::RollupStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_)
-    : ITransformingStep(input_stream_, generateOutputHeader(params_.getHeader(input_stream_.header, final_), params_.keys, use_nulls_), getTraits())
+RollupStep::RollupStep(const Header & input_header_, Aggregator::Params params_, bool final_, bool use_nulls_)
+    : ITransformingStep(input_header_, generateOutputHeader(params_.getHeader(input_header_, final_), params_.keys, use_nulls_), getTraits())
     , params(std::move(params_))
     , keys_size(params.keys_size)
     , final(final_)
@@ -46,13 +46,9 @@ void RollupStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
     });
 }
 
-void RollupStep::updateOutputStream()
+void RollupStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(),
-        generateOutputHeader(params.getHeader(input_streams.front().header, final), params.keys, use_nulls),
-        getDataStreamTraits());
+    output_header = generateOutputHeader(params.getHeader(input_headers.front(), final), params.keys, use_nulls);
 }
 
-
 }
diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h
index 0f9c1592969..8b9cbf76088 100644
--- a/src/Processors/QueryPlan/RollupStep.h
+++ b/src/Processors/QueryPlan/RollupStep.h
@@ -13,7 +13,7 @@ using AggregatingTransformParamsPtr = std::shared_ptr<AggregatingTransformParams
 class RollupStep : public ITransformingStep
 {
 public:
-    RollupStep(const DataStream & input_stream_, Aggregator::Params params_, bool final_, bool use_nulls_);
+    RollupStep(const Header & input_header_, Aggregator::Params params_, bool final_, bool use_nulls_);
 
     String getName() const override { return "Rollup"; }
 
@@ -22,7 +22,7 @@ public:
     const Aggregator::Params & getParams() const { return params; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     Aggregator::Params params;
     size_t keys_size;
diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp
index e9d91937c7b..5ad2f1f62d5 100644
--- a/src/Processors/QueryPlan/SortingStep.cpp
+++ b/src/Processors/QueryPlan/SortingStep.cpp
@@ -77,11 +77,11 @@ static ITransformingStep::Traits getTraits(size_t limit)
 }
 
 SortingStep::SortingStep(
-    const DataStream & input_stream,
+    const Header & input_header,
     SortDescription description_,
     UInt64 limit_,
     const Settings & settings_)
-    : ITransformingStep(input_stream, input_stream.header, getTraits(limit_))
+    : ITransformingStep(input_header, input_header, getTraits(limit_))
     , type(Type::Full)
     , result_description(std::move(description_))
     , limit(limit_)
@@ -92,23 +92,23 @@ SortingStep::SortingStep(
 }
 
 SortingStep::SortingStep(
-        const DataStream & input_stream,
+        const Header & input_header,
         const SortDescription & description_,
         const SortDescription & partition_by_description_,
         UInt64 limit_,
         const Settings & settings_)
-    : SortingStep(input_stream, description_, limit_, settings_)
+    : SortingStep(input_header, description_, limit_, settings_)
 {
     partition_by_description = partition_by_description_;
 }
 
 SortingStep::SortingStep(
-    const DataStream & input_stream_,
+    const Header & input_header,
     SortDescription prefix_description_,
     SortDescription result_description_,
     size_t max_block_size_,
     UInt64 limit_)
-    : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_))
+    : ITransformingStep(input_header, input_header, getTraits(limit_))
     , type(Type::FinishSorting)
     , prefix_description(std::move(prefix_description_))
     , result_description(std::move(result_description_))
@@ -118,12 +118,12 @@ SortingStep::SortingStep(
 }
 
 SortingStep::SortingStep(
-    const DataStream & input_stream,
+    const Header & input_header,
     SortDescription sort_description_,
     size_t max_block_size_,
     UInt64 limit_,
     bool always_read_till_end_)
-    : ITransformingStep(input_stream, input_stream.header, getTraits(limit_))
+    : ITransformingStep(input_header, input_header, getTraits(limit_))
     , type(Type::MergingSorted)
     , result_description(std::move(sort_description_))
     , limit(limit_)
@@ -133,9 +133,9 @@ SortingStep::SortingStep(
     sort_settings.max_block_size = max_block_size_;
 }
 
-void SortingStep::updateOutputStream()
+void SortingStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+    output_header = input_headers.front();
 }
 
 void SortingStep::updateLimit(size_t limit_)
diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h
index 8f798c10006..6cdf626d4c8 100644
--- a/src/Processors/QueryPlan/SortingStep.h
+++ b/src/Processors/QueryPlan/SortingStep.h
@@ -36,14 +36,14 @@ public:
 
     /// Full
     SortingStep(
-        const DataStream & input_stream,
+        const Header & input_header,
         SortDescription description_,
         UInt64 limit_,
         const Settings & settings_);
 
     /// Full with partitioning
     SortingStep(
-        const DataStream & input_stream,
+        const Header & input_header,
         const SortDescription & description_,
         const SortDescription & partition_by_description_,
         UInt64 limit_,
@@ -51,7 +51,7 @@ public:
 
     /// FinishSorting
     SortingStep(
-        const DataStream & input_stream_,
+        const Header & input_header,
         SortDescription prefix_description_,
         SortDescription result_description_,
         size_t max_block_size_,
@@ -59,7 +59,7 @@ public:
 
     /// MergingSorted
     SortingStep(
-        const DataStream & input_stream,
+        const Header & input_header,
         SortDescription sort_description_,
         size_t max_block_size_,
         UInt64 limit_ = 0,
@@ -95,7 +95,7 @@ public:
 
 private:
     void scatterByPartitionIfNeeded(QueryPipelineBuilder& pipeline);
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     static void mergeSorting(
         QueryPipelineBuilder & pipeline,
diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.cpp b/src/Processors/QueryPlan/SourceStepWithFilter.cpp
index 3de9ae37db0..f55d5ccddce 100644
--- a/src/Processors/QueryPlan/SourceStepWithFilter.cpp
+++ b/src/Processors/QueryPlan/SourceStepWithFilter.cpp
@@ -86,7 +86,7 @@ void SourceStepWithFilter::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_i
 {
     query_info.prewhere_info = prewhere_info_value;
     prewhere_info = prewhere_info_value;
-    output_stream = DataStream{.header = applyPrewhereActions(output_stream->header, prewhere_info)};
+    output_header = applyPrewhereActions(*output_header, prewhere_info);
 }
 
 void SourceStepWithFilter::describeActions(FormatSettings & format_settings) const
diff --git a/src/Processors/QueryPlan/SourceStepWithFilter.h b/src/Processors/QueryPlan/SourceStepWithFilter.h
index 6cea5fd7245..be6585b8755 100644
--- a/src/Processors/QueryPlan/SourceStepWithFilter.h
+++ b/src/Processors/QueryPlan/SourceStepWithFilter.h
@@ -19,12 +19,12 @@ public:
     using Base::Base;
 
     SourceStepWithFilter(
-        DataStream output_stream_,
+        Header output_header_,
         const Names & column_names_,
         const SelectQueryInfo & query_info_,
         const StorageSnapshotPtr & storage_snapshot_,
         const ContextPtr & context_)
-        : ISourceStep(std::move(output_stream_))
+        : ISourceStep(std::move(output_header_))
         , required_source_columns(column_names_)
         , query_info(query_info_)
         , prewhere_info(query_info.prewhere_info)
diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp
index 2554053064f..5cd483862ff 100644
--- a/src/Processors/QueryPlan/TotalsHavingStep.cpp
+++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp
@@ -26,7 +26,7 @@ static ITransformingStep::Traits getTraits(bool has_filter)
 }
 
 TotalsHavingStep::TotalsHavingStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     const AggregateDescriptions & aggregates_,
     bool overflow_row_,
     std::optional<ActionsDAG> actions_dag_,
@@ -36,14 +36,14 @@ TotalsHavingStep::TotalsHavingStep(
     double auto_include_threshold_,
     bool final_)
     : ITransformingStep(
-        input_stream_,
+        input_header_,
         TotalsHavingTransform::transformHeader(
-            input_stream_.header,
+            input_header_,
             actions_dag_ ? &*actions_dag_ : nullptr,
             filter_column_,
             remove_filter_,
             final_,
-            getAggregatesMask(input_stream_.header, aggregates_)),
+            getAggregatesMask(input_header_, aggregates_)),
         getTraits(!filter_column_.empty()))
     , aggregates(aggregates_)
     , overflow_row(overflow_row_)
@@ -129,18 +129,16 @@ void TotalsHavingStep::describeActions(JSONBuilder::JSONMap & map) const
     }
 }
 
-void TotalsHavingStep::updateOutputStream()
+void TotalsHavingStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(),
+    output_header =
         TotalsHavingTransform::transformHeader(
-            input_streams.front().header,
+            input_headers.front(),
             getActions(),
             filter_column_name,
             remove_filter,
             final,
-            getAggregatesMask(input_streams.front().header, aggregates)),
-        getDataStreamTraits());
+            getAggregatesMask(input_headers.front(), aggregates));
 }
 
 
diff --git a/src/Processors/QueryPlan/TotalsHavingStep.h b/src/Processors/QueryPlan/TotalsHavingStep.h
index 4b414d41c57..3bcd6360c80 100644
--- a/src/Processors/QueryPlan/TotalsHavingStep.h
+++ b/src/Processors/QueryPlan/TotalsHavingStep.h
@@ -13,7 +13,7 @@ class TotalsHavingStep : public ITransformingStep
 {
 public:
     TotalsHavingStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const AggregateDescriptions & aggregates_,
         bool overflow_row_,
         std::optional<ActionsDAG> actions_dag_,
@@ -33,7 +33,7 @@ public:
     const ActionsDAG * getActions() const { return actions_dag ? &*actions_dag : nullptr; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     const AggregateDescriptions aggregates;
 
diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp
index 6c5250a8ad2..b7a87b27be5 100644
--- a/src/Processors/QueryPlan/UnionStep.cpp
+++ b/src/Processors/QueryPlan/UnionStep.cpp
@@ -14,31 +14,28 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
 }
 
-static Block checkHeaders(const DataStreams & input_streams)
+static Block checkHeaders(const Headers & input_headers)
 {
-    if (input_streams.empty())
+    if (input_headers.empty())
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite an empty set of query plan steps");
 
-    Block res = input_streams.front().header;
-    for (const auto & stream : input_streams)
-        assertBlocksHaveEqualStructure(stream.header, res, "UnionStep");
+    Block res = input_headers.front();
+    for (const auto & header : input_headers)
+        assertBlocksHaveEqualStructure(header, res, "UnionStep");
 
     return res;
 }
 
-UnionStep::UnionStep(DataStreams input_streams_, size_t max_threads_)
-    : header(checkHeaders(input_streams_))
-    , max_threads(max_threads_)
+UnionStep::UnionStep(Headers input_headers_, size_t max_threads_)
+    : max_threads(max_threads_)
 {
-    updateInputStreams(std::move(input_streams_));
+    updateInputHeaders(std::move(input_headers_));
 }
 
-void UnionStep::updateOutputStream()
+void UnionStep::updateOutputHeader()
 {
-    if (input_streams.size() == 1)
-        output_stream = input_streams.front();
-    else
-        output_stream = DataStream{.header = header};
+    if (input_headers.size() == 1 || !output_header)
+        output_header = checkHeaders(input_headers);
 }
 
 QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &)
@@ -48,7 +45,7 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline
     if (pipelines.empty())
     {
         QueryPipelineProcessorsCollector collector(*pipeline, this);
-        pipeline->init(Pipe(std::make_shared<NullSource>(output_stream->header)));
+        pipeline->init(Pipe(std::make_shared<NullSource>(*output_header)));
         processors = collector.detachProcessors();
         return pipeline;
     }
@@ -56,16 +53,16 @@ QueryPipelineBuilderPtr UnionStep::updatePipeline(QueryPipelineBuilders pipeline
     for (auto & cur_pipeline : pipelines)
     {
 #if !defined(NDEBUG)
-        assertCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header, "UnionStep");
+        assertCompatibleHeader(cur_pipeline->getHeader(), getOutputHeader(), "UnionStep");
 #endif
         /// Headers for union must be equal.
         /// But, just in case, convert it to the same header if not.
-        if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputStream().header))
+        if (!isCompatibleHeader(cur_pipeline->getHeader(), getOutputHeader()))
         {
             QueryPipelineProcessorsCollector collector(*cur_pipeline, this);
             auto converting_dag = ActionsDAG::makeConvertingActions(
                 cur_pipeline->getHeader().getColumnsWithTypeAndName(),
-                getOutputStream().header.getColumnsWithTypeAndName(),
+                getOutputHeader().getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Name);
 
             auto converting_actions = std::make_shared<ExpressionActions>(std::move(converting_dag));
diff --git a/src/Processors/QueryPlan/UnionStep.h b/src/Processors/QueryPlan/UnionStep.h
index 4ab08785b01..a98d2ef06f3 100644
--- a/src/Processors/QueryPlan/UnionStep.h
+++ b/src/Processors/QueryPlan/UnionStep.h
@@ -9,7 +9,7 @@ class UnionStep : public IQueryPlanStep
 {
 public:
     /// max_threads is used to limit the number of threads for result pipeline.
-    explicit UnionStep(DataStreams input_streams_, size_t max_threads_ = 0);
+    explicit UnionStep(Headers input_headers_, size_t max_threads_ = 0);
 
     String getName() const override { return "Union"; }
 
@@ -19,12 +19,11 @@ public:
 
     size_t getMaxThreads() const { return max_threads; }
 
-    bool canUpdateInputStream() const override { return true; }
+    bool canUpdateInputHeader() const override { return true; }
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
-    Block header;
     size_t max_threads;
 };
 
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
index 3d1faf7c079..ea7a31a503e 100644
--- a/src/Processors/QueryPlan/WindowStep.cpp
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -44,11 +44,11 @@ static Block addWindowFunctionResultColumns(const Block & block,
 }
 
 WindowStep::WindowStep(
-    const DataStream & input_stream_,
+    const Header & input_header_,
     const WindowDescription & window_description_,
     const std::vector<WindowFunctionDescription> & window_functions_,
     bool streams_fan_out_)
-    : ITransformingStep(input_stream_, addWindowFunctionResultColumns(input_stream_.header, window_functions_), getTraits(!streams_fan_out_))
+    : ITransformingStep(input_header_, addWindowFunctionResultColumns(input_header_, window_functions_), getTraits(!streams_fan_out_))
     , window_description(window_description_)
     , window_functions(window_functions_)
     , streams_fan_out(streams_fan_out_)
@@ -74,7 +74,7 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
         [&](const Block & /*header*/)
         {
             return std::make_shared<WindowTransform>(
-                input_streams.front().header, output_stream->header, window_description, window_functions);
+                input_headers.front(), *output_header, window_description, window_functions);
         });
 
     if (streams_fan_out)
@@ -82,7 +82,7 @@ void WindowStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
         pipeline.resize(num_threads);
     }
 
-    assertBlocksHaveEqualStructure(pipeline.getHeader(), output_stream->header,
+    assertBlocksHaveEqualStructure(pipeline.getHeader(), *output_header,
         "WindowStep transform for '" + window_description.window_name + "'");
 }
 
@@ -144,10 +144,9 @@ void WindowStep::describeActions(JSONBuilder::JSONMap & map) const
     map.add("Functions", std::move(functions_array));
 }
 
-void WindowStep::updateOutputStream()
+void WindowStep::updateOutputHeader()
 {
-    output_stream = createOutputStream(
-        input_streams.front(), addWindowFunctionResultColumns(input_streams.front().header, window_functions), getDataStreamTraits());
+    output_header = addWindowFunctionResultColumns(input_headers.front(), window_functions);
 
     window_description.checkValid();
 }
diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h
index d79cd7fd45e..8b336a8b219 100644
--- a/src/Processors/QueryPlan/WindowStep.h
+++ b/src/Processors/QueryPlan/WindowStep.h
@@ -11,7 +11,7 @@ class WindowTransform;
 class WindowStep : public ITransformingStep
 {
 public:
-    explicit WindowStep(const DataStream & input_stream_,
+    explicit WindowStep(const Header & input_header_,
             const WindowDescription & window_description_,
             const std::vector<WindowFunctionDescription> & window_functions_,
             bool streams_fan_out_);
@@ -26,7 +26,7 @@ public:
     const WindowDescription & getWindowDescription() const;
 
 private:
-    void updateOutputStream() override;
+    void updateOutputHeader() override;
 
     WindowDescription window_description;
     std::vector<WindowFunctionDescription> window_functions;
diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp
index afbb996f56e..5e88c102165 100644
--- a/src/Processors/Transforms/CubeTransform.cpp
+++ b/src/Processors/Transforms/CubeTransform.cpp
@@ -7,7 +7,7 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
+    extern const int TOO_MANY_COLUMNS;
 }
 
 CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_, bool use_nulls_)
@@ -15,7 +15,7 @@ CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_
     , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates))
 {
     if (keys.size() >= 8 * sizeof(mask))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many keys are used for CubeTransform.");
+        throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Too many keys ({}) are used for CubeTransform, the maximum is {}.", keys.size(), 8 * sizeof(mask) - 1);
 }
 
 Chunk CubeTransform::generate()
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
index 6f43a8e4b24..37948cd55e7 100644
--- a/src/Processors/Transforms/WindowTransform.cpp
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -1495,11 +1495,10 @@ void WindowTransform::work()
     }
 }
 
-struct WindowFunctionRank final : public WindowFunction
+struct WindowFunctionRank final : public StatelessWindowFunction
 {
-    WindowFunctionRank(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
+    WindowFunctionRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
     {}
 
     bool allocatesMemoryInArena() const override { return false; }
@@ -1514,11 +1513,10 @@ struct WindowFunctionRank final : public WindowFunction
     }
 };
 
-struct WindowFunctionDenseRank final : public WindowFunction
+struct WindowFunctionDenseRank final : public StatelessWindowFunction
 {
-    WindowFunctionDenseRank(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
+    WindowFunctionDenseRank(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
     {}
 
     bool allocatesMemoryInArena() const override { return false; }
@@ -1716,7 +1714,7 @@ struct WindowFunctionExponentialTimeDecayedSum final : public StatefulWindowFunc
         const Float64 decay_length;
 };
 
-struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
+struct WindowFunctionExponentialTimeDecayedMax final : public StatelessWindowFunction
 {
     static constexpr size_t ARGUMENT_VALUE = 0;
     static constexpr size_t ARGUMENT_TIME = 1;
@@ -1731,9 +1729,8 @@ struct WindowFunctionExponentialTimeDecayedMax final : public WindowFunction
         return applyVisitor(FieldVisitorConvertToNumber<Float64>(), parameters_[0]);
     }
 
-    WindowFunctionExponentialTimeDecayedMax(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
+    WindowFunctionExponentialTimeDecayedMax(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeFloat64>())
         , decay_length(getDecayLength(parameters_, name_))
     {
         if (argument_types.size() != 2)
@@ -1991,11 +1988,10 @@ struct WindowFunctionExponentialTimeDecayedAvg final : public StatefulWindowFunc
         const Float64 decay_length;
 };
 
-struct WindowFunctionRowNumber final : public WindowFunction
+struct WindowFunctionRowNumber final : public StatelessWindowFunction
 {
-    WindowFunctionRowNumber(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
+    WindowFunctionRowNumber(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, std::make_shared<DataTypeUInt64>())
     {}
 
     bool allocatesMemoryInArena() const override { return false; }
@@ -2273,13 +2269,12 @@ public:
 
 // ClickHouse-specific variant of lag/lead that respects the window frame.
 template <bool is_lead>
-struct WindowFunctionLagLeadInFrame final : public WindowFunction
+struct WindowFunctionLagLeadInFrame final : public StatelessWindowFunction
 {
     FunctionBasePtr func_cast = nullptr;
 
-    WindowFunctionLagLeadInFrame(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_))
+    WindowFunctionLagLeadInFrame(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, createResultType(argument_types_, name_))
     {
         if (!parameters.empty())
         {
@@ -2427,11 +2422,10 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
     }
 };
 
-struct WindowFunctionNthValue final : public WindowFunction
+struct WindowFunctionNthValue final : public StatelessWindowFunction
 {
-    WindowFunctionNthValue(const std::string & name_,
-            const DataTypes & argument_types_, const Array & parameters_)
-        : WindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_))
+    WindowFunctionNthValue(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_)
+        : StatelessWindowFunction(name_, argument_types_, parameters_, createResultType(name_, argument_types_))
     {
         if (!parameters.empty())
         {
diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp
index ae1fb6d629e..cd18eac50a7 100644
--- a/src/Server/PrometheusRequestHandler.cpp
+++ b/src/Server/PrometheusRequestHandler.cpp
@@ -52,7 +52,7 @@ protected:
     const PrometheusRequestHandlerConfig & config() { return parent().config; }
     PrometheusMetricsWriter & metrics_writer() { return *parent().metrics_writer; }
     LoggerPtr log() { return parent().log; }
-    WriteBuffer & getOutputStream(HTTPServerResponse & response) { return parent().getOutputStream(response); }
+    WriteBuffer & getOutputHeader(HTTPServerResponse & response) { return parent().getOutputHeader(response); }
 
 private:
     PrometheusRequestHandler & parent_ref;
@@ -74,7 +74,7 @@ public:
     void handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response) override
     {
         response.setContentType("text/plain; version=0.0.4; charset=UTF-8");
-        auto & out = getOutputStream(response);
+        auto & out = getOutputHeader(response);
 
         if (config().expose_events)
             metrics_writer().writeEvents(out);
@@ -288,7 +288,7 @@ public:
         response.setContentType("application/x-protobuf");
         response.set("Content-Encoding", "snappy");
 
-        ProtobufZeroCopyOutputStreamFromWriteBuffer zero_copy_output_stream{std::make_unique<SnappyWriteBuffer>(getOutputStream(response))};
+        ProtobufZeroCopyOutputStreamFromWriteBuffer zero_copy_output_stream{std::make_unique<SnappyWriteBuffer>(getOutputHeader(response))};
         read_response.SerializeToZeroCopyStream(&zero_copy_output_stream);
         zero_copy_output_stream.finalize();
 
@@ -372,7 +372,7 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe
     }
 }
 
-WriteBufferFromHTTPServerResponse & PrometheusRequestHandler::getOutputStream(HTTPServerResponse & response)
+WriteBufferFromHTTPServerResponse & PrometheusRequestHandler::getOutputHeader(HTTPServerResponse & response)
 {
     if (response_finalized)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "PrometheusRequestHandler: Response already sent");
@@ -411,7 +411,7 @@ void PrometheusRequestHandler::trySendExceptionToClient(const String & exception
 
     try
     {
-        sendExceptionToHTTPClient(exception_message, exception_code, request, response, &getOutputStream(response), log);
+        sendExceptionToHTTPClient(exception_message, exception_code, request, response, &getOutputHeader(response), log);
     }
     catch (...)
     {
diff --git a/src/Server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h
index 281ecf5260e..b5a844f25f7 100644
--- a/src/Server/PrometheusRequestHandler.h
+++ b/src/Server/PrometheusRequestHandler.h
@@ -29,7 +29,7 @@ private:
     void createImpl();
 
     /// Returns the write buffer used for the current HTTP response.
-    WriteBufferFromHTTPServerResponse & getOutputStream(HTTPServerResponse & response);
+    WriteBufferFromHTTPServerResponse & getOutputHeader(HTTPServerResponse & response);
 
     /// Finalizes the output stream and sends the response to the client.
     void finalizeResponse(HTTPServerResponse & response);
diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp
index 5642e8ea486..df3e3a710a2 100644
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@@ -174,7 +174,7 @@ StorageFileLog::StorageFileLog(
     {
         if (mode < LoadingStrictnessLevel::ATTACH)
         {
-            if (disk->exists(metadata_base_path))
+            if (disk->existsDirectory(metadata_base_path))
             {
                 throw Exception(
                     ErrorCodes::TABLE_METADATA_ALREADY_EXISTS,
@@ -232,7 +232,7 @@ void StorageFileLog::loadMetaFiles(bool attach)
     if (attach)
     {
         /// Meta file may lost, log and create directory
-        if (!disk->exists(metadata_base_path))
+        if (!disk->existsDirectory(metadata_base_path))
         {
             /// Create metadata_base_path directory when store meta data
             LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName());
@@ -329,7 +329,7 @@ void StorageFileLog::serialize() const
 void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const
 {
     auto full_path = getFullMetaPath(file_meta.file_name);
-    if (disk->exists(full_path))
+    if (disk->existsFile(full_path))
     {
         checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position);
     }
@@ -355,7 +355,7 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const
 
 void StorageFileLog::deserialize()
 {
-    if (!disk->exists(metadata_base_path))
+    if (!disk->existsDirectory(metadata_base_path))
         return;
 
     std::vector<std::string> files_to_remove;
@@ -547,7 +547,7 @@ void StorageFileLog::checkOffsetIsValid(const String & filename, UInt64 offset)
 StorageFileLog::ReadMetadataResult StorageFileLog::readMetadata(const String & filename) const
 {
     auto full_path = getFullMetaPath(filename);
-    if (!disk->isFile(full_path))
+    if (!disk->existsFile(full_path))
     {
         throw Exception(
             ErrorCodes::BAD_FILE_TYPE,
diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp
index a5a5a07c9a1..8e983c9a5dd 100644
--- a/src/Storages/Freeze.cpp
+++ b/src/Storages/Freeze.cpp
@@ -69,7 +69,7 @@ bool FreezeMetaData::load(DiskPtr data_disk, const String & path)
     auto metadata_storage = data_disk->getMetadataStorage();
     auto file_path = getFileName(path);
 
-    if (!metadata_storage->exists(file_path))
+    if (!metadata_storage->existsFile(file_path))
         return false;
     auto metadata_str = metadata_storage->readFileToString(file_path);
     ReadBufferFromString buffer(metadata_str);
@@ -105,7 +105,7 @@ void FreezeMetaData::clean(DiskPtr data_disk, const String & path)
 {
     auto metadata_storage = data_disk->getMetadataStorage();
     auto fname = getFileName(path);
-    if (metadata_storage->exists(fname))
+    if (metadata_storage->existsFile(fname))
     {
         auto tx = metadata_storage->createTransaction();
         tx->unlinkFile(fname);
@@ -150,10 +150,11 @@ BlockIO Unfreezer::systemUnfreeze(const String & backup_name)
 
     for (const auto & disk: disks)
     {
-        for (const auto& store_path: store_paths)
+        for (const auto & store_path : store_paths)
         {
-            if (!disk->exists(store_path))
+            if (!disk->existsDirectory(store_path))
                 continue;
+
             for (auto prefix_it = disk->iterateDirectory(store_path); prefix_it->isValid(); prefix_it->next())
             {
                 auto prefix_directory = store_path / prefix_it->name();
@@ -173,7 +174,8 @@ BlockIO Unfreezer::systemUnfreeze(const String & backup_name)
                 }
             }
         }
-        if (disk->exists(backup_path))
+
+        if (disk->existsDirectory(backup_path))
         {
             /// After unfreezing we need to clear revision.txt file and empty directories
             disk->removeRecursive(backup_path);
@@ -211,7 +213,7 @@ PartitionCommandsResultInfo Unfreezer::unfreezePartitionsFromTableDirectory(Merg
 
     for (const auto & disk : disks)
     {
-        if (!disk->exists(table_directory))
+        if (!disk->existsDirectory(table_directory))
             continue;
 
         for (auto it = disk->iterateDirectory(table_directory); it->isValid(); it->next())
diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp
index 5d312de8e5d..902f7e05566 100644
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@@ -802,7 +802,7 @@ public:
         LoggerPtr log_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(header)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(header), column_names_, query_info_, storage_snapshot_, context_)
         , storage(std::move(storage_))
         , sources_info(std::move(sources_info_))
         , builder(std::move(builder_))
@@ -925,7 +925,7 @@ void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
 
     if (hive_files->empty())
     {
-        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputStream().header)));
+        pipeline.init(Pipe(std::make_shared<NullSource>(getOutputHeader())));
         return;
     }
 
@@ -949,7 +949,7 @@ void ReadFromHive::initializePipeline(QueryPipelineBuilder & pipeline, const Bui
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
     if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
+        pipe = Pipe(std::make_shared<NullSource>(getOutputHeader()));
 
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index a2efa4201a7..23f1811d330 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -31,6 +31,7 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
     extern const int DEADLOCK_AVOIDED;
     extern const int CANNOT_RESTORE_TABLE;
+    extern const int TABLE_IS_BEING_RESTARTED;
 }
 
 IStorage::IStorage(StorageID storage_id_, std::unique_ptr<StorageInMemoryMetadata> metadata_)
@@ -66,12 +67,13 @@ RWLockImpl::LockHolder IStorage::tryLockTimed(
 TableLockHolder IStorage::lockForShare(const String & query_id, const std::chrono::milliseconds & acquire_timeout)
 {
     TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout);
-
-    if (is_dropped || is_detached)
-    {
-        auto table_id = getStorageID();
+    auto table_id = getStorageID();
+    if (!table_id.hasUUID() && (is_dropped || is_detached))
         throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped or detached", table_id.database_name, table_id.table_name);
-    }
+
+    if (is_being_restarted)
+        throw Exception(
+            ErrorCodes::TABLE_IS_BEING_RESTARTED, "Table {}.{} is being restarted", table_id.database_name, table_id.table_name);
     return result;
 }
 
@@ -79,12 +81,10 @@ TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::ch
 {
     TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout);
 
-    if (is_dropped || is_detached)
-    {
-        // Table was dropped while acquiring the lock
+    auto table_id = getStorageID();
+    if (is_being_restarted || (!table_id.hasUUID() && (is_dropped || is_detached)))
+        // Table was dropped or is being restarted while acquiring the lock
         result = nullptr;
-    }
-
     return result;
 }
 
diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp
index 32c3ee4130e..219092e7ab5 100644
--- a/src/Storages/IStorageCluster.cpp
+++ b/src/Storages/IStorageCluster.cpp
@@ -66,7 +66,7 @@ public:
         ClusterPtr cluster_,
         LoggerPtr log_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -191,7 +191,7 @@ void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const
             auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                 std::vector<IConnectionPool::Entry>{try_result},
                 queryToString(query_to_send),
-                getOutputStream().header,
+                getOutputHeader(),
                 new_context,
                 /*throttler=*/nullptr,
                 scalars,
@@ -210,7 +210,7 @@ void ReadFromCluster::initializePipeline(QueryPipelineBuilder & pipeline, const
 
     auto pipe = Pipe::unitePipes(std::move(pipes));
     if (pipe.empty())
-        pipe = Pipe(std::make_shared<NullSource>(getOutputStream().header));
+        pipe = Pipe(std::make_shared<NullSource>(getOutputHeader()));
 
     for (const auto & processor : pipe.getProcessors())
         processors.emplace_back(processor);
diff --git a/src/Storages/MergeTree/Backup.cpp b/src/Storages/MergeTree/Backup.cpp
index 8ba37ffc042..608716934aa 100644
--- a/src/Storages/MergeTree/Backup.cpp
+++ b/src/Storages/MergeTree/Backup.cpp
@@ -48,7 +48,7 @@ void BackupImpl(
         auto source = it->path();
         auto destination = fs::path(destination_path) / it->name();
 
-        if (!src_disk->isDirectory(source))
+        if (!src_disk->existsDirectory(source))
         {
             if (make_source_readonly)
             {
@@ -142,7 +142,7 @@ void Backup(
     const NameSet & files_to_copy_intead_of_hardlinks,
     DiskTransactionPtr disk_transaction)
 {
-    if (dst_disk->exists(destination_path) && !dst_disk->isDirectoryEmpty(destination_path))
+    if (dst_disk->existsFileOrDirectory(destination_path) && !dst_disk->isDirectoryEmpty(destination_path))
     {
         throw DB::Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists and is not empty.",
                             DB::fullPath(dst_disk, destination_path));
diff --git a/src/Storages/MergeTree/CMakeLists.txt b/src/Storages/MergeTree/CMakeLists.txt
index e69de29bb2d..a5ec3212ef2 100644
--- a/src/Storages/MergeTree/CMakeLists.txt
+++ b/src/Storages/MergeTree/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(MergeSelectors)
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
index fd0fa38138f..3f8fef6fbff 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp
@@ -85,7 +85,7 @@ std::optional<String> DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger
     {
         res = getPartDirForPrefix(prefix, detached, try_no);
 
-        if (!volume->getDisk()->exists(full_relative_path / res))
+        if (!volume->getDisk()->existsDirectory(full_relative_path / res))
             return res;
 
         /// If part with compacted storage is broken then we probably
@@ -143,11 +143,11 @@ bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(con
     /// We cannot know for sure that content of detached part is the same,
     /// but in most cases it's enough to compare checksums.txt and list of files.
 
-    if (!exists("checksums.txt"))
+    if (!existsFile("checksums.txt"))
         return false;
 
     auto storage_from_detached = create(volume, fs::path(root_path) / MergeTreeData::DETACHED_DIR_NAME, detached_part_path, /*initialize=*/ true);
-    if (!storage_from_detached->exists("checksums.txt"))
+    if (!storage_from_detached->existsFile("checksums.txt"))
         return false;
 
     if (!original_checksums_content)
@@ -207,7 +207,7 @@ Poco::Timestamp DataPartStorageOnDiskBase::getLastModified() const
 
 static UInt64 calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from)
 {
-    if (disk->isFile(from))
+    if (disk->existsFile(from))
         return disk->getFileSize(from);
 
     std::vector<std::string> files;
@@ -427,7 +427,7 @@ void DataPartStorageOnDiskBase::backup(
             backup_entries.emplace_back(filepath_in_backup, std::make_unique<BackupEntryFromSmallFile>(disk, filepath_on_disk, read_settings, copy_encrypted));
             return;
         }
-        if (is_projection_part && allow_backup_broken_projection && !disk->exists(filepath_on_disk))
+        if (is_projection_part && allow_backup_broken_projection && !disk->existsFile(filepath_on_disk))
             return;
 
         if (make_temporary_hard_links)
@@ -514,14 +514,14 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze(
     if (params.external_transaction)
     {
         params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt");
-        params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt");
+        params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME);
         if (!params.keep_metadata_version)
             params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME);
     }
     else
     {
         disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt");
-        disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt");
+        disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME);
         if (!params.keep_metadata_version)
             disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME);
     }
@@ -570,14 +570,14 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freezeRemote(
     if (params.external_transaction)
     {
         params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt");
-        params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt");
+        params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME);
         if (!params.keep_metadata_version)
             params.external_transaction->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME);
     }
     else
     {
         dst_disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt");
-        dst_disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt");
+        dst_disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME);
         if (!params.keep_metadata_version)
             dst_disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME);
     }
@@ -601,7 +601,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart(
     String path_to_clone = fs::path(to) / dir_path / "";
     auto src_disk = volume->getDisk();
 
-    if (dst_disk->exists(path_to_clone))
+    if (dst_disk->existsDirectory(path_to_clone))
     {
         throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS,
                         "Cannot clone part {} from '{}' to '{}': path '{}' already exists",
@@ -640,7 +640,7 @@ void DataPartStorageOnDiskBase::rename(
 
     String to = fs::path(new_root_path) / new_part_dir / "";
 
-    if (volume->getDisk()->exists(to))
+    if (volume->getDisk()->existsDirectory(to))
     {
         /// FIXME it should be logical error
         if (remove_new_dir_if_exists)
@@ -729,7 +729,7 @@ void DataPartStorageOnDiskBase::remove(
 
         to = fs::path(root_path) / part_dir_without_slash;
 
-        if (disk->exists(to))
+        if (disk->existsDirectory(to))
         {
             LOG_WARNING(log, "Directory {} (to which part must be renamed before removing) already exists. "
                         "Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to));
@@ -747,7 +747,7 @@ void DataPartStorageOnDiskBase::remove(
             }
         }
 
-        if (!disk->exists(from))
+        if (!disk->existsDirectory(from))
         {
             LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, from));
             /// We will never touch this part again, so unlocking it from zero-copy
@@ -831,7 +831,7 @@ void DataPartStorageOnDiskBase::remove(
 
             /// If we have a directory with suffix '.proj' it is likely a projection.
             /// Try to load checksums for it (to avoid recursive removing fallback).
-            if (projection_storage->exists(checksums_name))
+            if (projection_storage->existsFile(checksums_name))
             {
                 try
                 {
@@ -866,7 +866,7 @@ void DataPartStorageOnDiskBase::clearDirectory(
     /// It does not make sense to try fast path for incomplete temporary parts, because some files are probably absent.
     /// Sometimes we add something to checksums.files before actually writing checksums and columns on disk.
     /// Also sometimes we write checksums.txt and columns.txt in arbitrary order, so this check becomes complex...
-    bool incomplete_temporary_part = is_temp && (!disk->exists(fs::path(dir) / "checksums.txt") || !disk->exists(fs::path(dir) / "columns.txt"));
+    bool incomplete_temporary_part = is_temp && (!disk->existsFile(fs::path(dir) / "checksums.txt") || !disk->existsFile(fs::path(dir) / "columns.txt"));
     if (checksums.empty() || incomplete_temporary_part)
     {
         /// If the part is not completely written, we cannot use fast path by listing files.
@@ -887,14 +887,14 @@ void DataPartStorageOnDiskBase::clearDirectory(
         RemoveBatchRequest request;
         for (const auto & file : names_to_remove)
         {
-            if (isGinFile(file) && (!disk->isFile(fs::path(dir) / file)))
+            if (isGinFile(file) && (!disk->existsFile(fs::path(dir) / file)))
                 continue;
 
             request.emplace_back(fs::path(dir) / file);
         }
         request.emplace_back(fs::path(dir) / "default_compression_codec.txt", true);
         request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true);
-        request.emplace_back(fs::path(dir) / "txn_version.txt", true);
+        request.emplace_back(fs::path(dir) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME, true);
         request.emplace_back(fs::path(dir) / "metadata_version.txt", true);
 
         disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
@@ -942,7 +942,7 @@ SyncGuardPtr DataPartStorageOnDiskBase::getDirectorySyncGuard() const
 
 std::unique_ptr<WriteBufferFromFileBase> DataPartStorageOnDiskBase::writeTransactionFile(WriteMode mode) const
 {
-    return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / "txn_version.txt", 256, mode);
+    return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME, 256, mode);
 }
 
 void DataPartStorageOnDiskBase::removeRecursive()
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp
index 42f82fd96e2..ac15cc78ad5 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp
@@ -42,17 +42,17 @@ DataPartStoragePtr DataPartStorageOnDiskFull::getProjection(const std::string &
 
 bool DataPartStorageOnDiskFull::exists() const
 {
-    return volume->getDisk()->exists(fs::path(root_path) / part_dir);
+    return volume->getDisk()->existsDirectory(fs::path(root_path) / part_dir);
 }
 
-bool DataPartStorageOnDiskFull::exists(const std::string & name) const
+bool DataPartStorageOnDiskFull::existsFile(const std::string & name) const
 {
-    return volume->getDisk()->exists(fs::path(root_path) / part_dir / name);
+    return volume->getDisk()->existsFile(fs::path(root_path) / part_dir / name);
 }
 
-bool DataPartStorageOnDiskFull::isDirectory(const std::string & name) const
+bool DataPartStorageOnDiskFull::existsDirectory(const std::string & name) const
 {
-    return volume->getDisk()->isDirectory(fs::path(root_path) / part_dir / name);
+    return volume->getDisk()->existsDirectory(fs::path(root_path) / part_dir / name);
 }
 
 class DataPartStorageIteratorOnDisk final : public IDataPartStorageIterator
@@ -65,7 +65,7 @@ public:
 
     void next() override { it->next(); }
     bool isValid() const override { return it->isValid(); }
-    bool isFile() const override { return isValid() && disk->isFile(it->path()); }
+    bool isFile() const override { return isValid() && disk->existsFile(it->path()); }
     std::string name() const override { return it->name(); }
     std::string path() const override { return it->path(); }
 
@@ -128,6 +128,15 @@ std::unique_ptr<ReadBufferFromFileBase> DataPartStorageOnDiskFull::readFile(
     return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size);
 }
 
+std::unique_ptr<ReadBufferFromFileBase> DataPartStorageOnDiskFull::readFileIfExists(
+    const std::string & name,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size) const
+{
+    return volume->getDisk()->readFileIfExists(fs::path(root_path) / part_dir / name, settings, read_hint, file_size);
+}
+
 std::unique_ptr<WriteBufferFromFileBase> DataPartStorageOnDiskFull::writeFile(
     const String & name,
     size_t buf_size,
diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h
index c76ec41ccb0..13eb498f064 100644
--- a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h
+++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h
@@ -16,8 +16,8 @@ public:
     DataPartStoragePtr getProjection(const std::string & name) const override;
 
     bool exists() const override;
-    bool exists(const std::string & name) const override;
-    bool isDirectory(const std::string & name) const override;
+    bool existsFile(const std::string & name) const override;
+    bool existsDirectory(const std::string & name) const override;
 
     DataPartStorageIteratorPtr iterate() const override;
     Poco::Timestamp getFileLastModified(const String & file_name) const override;
@@ -32,6 +32,12 @@ public:
         std::optional<size_t> read_hint,
         std::optional<size_t> file_size) const override;
 
+    std::unique_ptr<ReadBufferFromFileBase> readFileIfExists(
+        const std::string & name,
+        const ReadSettings & settings,
+        std::optional<size_t> read_hint,
+        std::optional<size_t> file_size) const override;
+
     void createProjection(const std::string & name) override;
 
     std::unique_ptr<WriteBufferFromFileBase> writeFile(
diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp
index 5684df68ee4..17ac503eff4 100644
--- a/src/Storages/MergeTree/GinIndexStore.cpp
+++ b/src/Storages/MergeTree/GinIndexStore.cpp
@@ -167,7 +167,7 @@ GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_,
 bool GinIndexStore::exists() const
 {
     String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE;
-    return storage->exists(segment_id_file_name);
+    return storage->existsFile(segment_id_file_name);
 }
 
 UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n)
@@ -175,7 +175,7 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n)
     std::lock_guard guard(mutex);
 
     /// When the method is called for the first time, the file doesn't exist yet, need to create it and write segment ID 1.
-    if (!storage->exists(file_name))
+    if (!storage->existsFile(file_name))
     {
         /// Create file
         std::unique_ptr<DB::WriteBufferFromFileBase> ostr = this->data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {});
@@ -233,7 +233,7 @@ UInt32 GinIndexStore::getNumOfSegments()
         return cached_segment_num;
 
     String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE;
-    if (!storage->exists(segment_id_file_name))
+    if (!storage->existsFile(segment_id_file_name))
         return 0;
 
     UInt32 result = 0;
diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h
index 6fa406be044..a09c24c63ab 100644
--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@@ -112,8 +112,8 @@ public:
     virtual bool exists() const = 0;
 
     /// File inside part directory exists. Specified path is relative to the part path.
-    virtual bool exists(const std::string & name) const = 0;
-    virtual bool isDirectory(const std::string & name) const = 0;
+    virtual bool existsFile(const std::string & name) const = 0;
+    virtual bool existsDirectory(const std::string & name) const = 0;
 
     /// Modification time for part directory.
     virtual Poco::Timestamp getLastModified() const = 0;
@@ -138,6 +138,17 @@ public:
         std::optional<size_t> read_hint,
         std::optional<size_t> file_size) const = 0;
 
+    virtual std::unique_ptr<ReadBufferFromFileBase> readFileIfExists(
+        const std::string & name,
+        const ReadSettings & settings,
+        std::optional<size_t> read_hint,
+        std::optional<size_t> file_size) const
+    {
+        if (existsFile(name))
+            return readFile(name, settings, read_hint, file_size);
+        return {};
+    }
+
     struct ProjectionChecksums
     {
         const std::string & name;
@@ -229,7 +240,6 @@ public:
         bool allow_backup_broken_projection) const = 0;
 
     /// Creates hardlinks into 'to/dir_path' for every file in data part.
-    /// Callback is called after hardlinks are created, but before 'delete-on-destroy.txt' marker is removed.
     /// Some files can be copied instead of hardlinks. It's because of details of zero copy replication
     /// implementation which relies on paths of some blobs in S3. For example if we want to hardlink
     /// the whole part during mutation we shouldn't hardlink checksums.txt, because otherwise
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 4bdee6701d6..20d7528d38a 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -85,6 +85,19 @@ namespace ErrorCodes
     extern const int NO_SUCH_COLUMN_IN_TABLE;
 }
 
+namespace
+{
+
+String getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage)
+{
+    if (data_part_storage.existsFile("primary" + getIndexExtension(true)))
+        return getIndexExtension(true);
+    else
+        return {".idx"};
+}
+
+}
+
 
 void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const PartMetadataManagerPtr & manager)
 {
@@ -695,15 +708,14 @@ ColumnsStatistics IMergeTreeDataPart::loadStatistics() const
         String file_name = stat->getFileName() + STATS_FILE_SUFFIX;
         String file_path = fs::path(getDataPartStorage().getRelativePath()) / file_name;
 
-        if (!metadata_manager->exists(file_name))
+        if (auto stat_file = metadata_manager->readIfExists(file_name))
         {
-            LOG_INFO(storage.log, "Cannot find stats file {}", file_path);
-            continue;
+            CompressedReadBuffer compressed_buffer(*stat_file);
+            stat->deserialize(compressed_buffer);
+            result.push_back(stat);
         }
-        auto stat_file = metadata_manager->read(file_name);
-        CompressedReadBuffer compressed_buffer(*stat_file);
-        stat->deserialize(compressed_buffer);
-        result.push_back(stat);
+        else
+            LOG_INFO(storage.log, "Cannot find stats file {}", file_path);
     }
     return result;
 }
@@ -719,7 +731,8 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
 
     try
     {
-        loadUUID();
+        if (!isStoredOnReadonlyDisk())
+            loadUUID();
         loadColumns(require_columns_checksums);
         loadChecksums(require_columns_checksums);
         loadIndexGranularity();
@@ -734,7 +747,9 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
         bool has_broken_projections = false;
         if (!parent_part)
         {
-            loadTTLInfos();
+            if (!isStoredOnReadonlyDisk())
+                loadTTLInfos();
+
             loadProjections(require_columns_checksums, check_consistency, has_broken_projections, false /* if_not_loaded */);
         }
 
@@ -761,11 +776,10 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
                 for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next())
                 {
                     std::string file_info;
-                    if (!getDataPartStorage().isDirectory(it->name()))
+                    if (!getDataPartStorage().existsDirectory(it->name()))
                         file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name()));
 
                     files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info);
-
                 }
                 if (!files_in_part.empty())
                     e->addMessage("Part contains files: {}", files_in_part);
@@ -786,14 +800,19 @@ void IMergeTreeDataPart::appendFilesOfColumnsChecksumsIndexes(Strings & files, b
 {
     if (isStoredOnDisk())
     {
-        appendFilesOfUUID(files);
+        if (!isStoredOnReadonlyDisk())
+            appendFilesOfUUID(files);
+
         appendFilesOfColumns(files);
         appendFilesOfChecksums(files);
         appendFilesOfIndexGranularity(files);
         appendFilesOfIndex(files);
         appendFilesOfRowsCount(files);
         appendFilesOfPartitionAndMinMaxIndex(files);
-        appendFilesOfTTLInfos(files);
+
+        if (!isStoredOnReadonlyDisk())
+            appendFilesOfTTLInfos(files);
+
         appendFilesOfDefaultCompressionCodec(files);
         appendFilesOfMetadataVersion(files);
     }
@@ -835,7 +854,7 @@ void IMergeTreeDataPart::loadProjections(
     for (const auto & projection : metadata_snapshot->projections)
     {
         auto path = projection.name + ".proj";
-        if (getDataPartStorage().exists(path))
+        if (getDataPartStorage().existsDirectory(path))
         {
             if (hasProjection(projection.name))
             {
@@ -915,7 +934,7 @@ void IMergeTreeDataPart::loadIndex() const
             loaded_index[i]->reserve(index_granularity.getMarksCount());
         }
 
-        String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
+        String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage());
         String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name;
         auto index_file = metadata_manager->read(index_name);
         size_t marks_count = index_granularity.getMarksCount();
@@ -976,7 +995,7 @@ void IMergeTreeDataPart::appendFilesOfIndex(Strings & files) const
 
     if (metadata_snapshot->hasPrimaryKey())
     {
-        String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
+        String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage());
         files.push_back(index_name);
     }
 }
@@ -988,13 +1007,13 @@ NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
 
     NameSet result = {"checksums.txt", "columns.txt"};
 
-    if (getDataPartStorage().exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME))
+    if (getDataPartStorage().existsFile(DEFAULT_COMPRESSION_CODEC_FILE_NAME))
         result.emplace(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
 
-    if (getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME))
+    if (getDataPartStorage().existsFile(TXN_VERSION_METADATA_FILE_NAME))
         result.emplace(TXN_VERSION_METADATA_FILE_NAME);
 
-    if (getDataPartStorage().exists(METADATA_VERSION_FILE_NAME))
+    if (getDataPartStorage().existsFile(METADATA_VERSION_FILE_NAME))
         result.emplace(METADATA_VERSION_FILE_NAME);
 
     return result;
@@ -1010,14 +1029,9 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
     }
 
     String path = fs::path(getDataPartStorage().getRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME;
-    bool exists = metadata_manager->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
-    if (!exists)
+
+    if (auto file_buf = metadata_manager->readIfExists(DEFAULT_COMPRESSION_CODEC_FILE_NAME))
     {
-        default_codec = detectDefaultCompressionCodec();
-    }
-    else
-    {
-        auto file_buf = metadata_manager->read(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
         String codec_line;
         readEscapedStringUntilEOL(codec_line, *file_buf);
 
@@ -1047,6 +1061,8 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
             default_codec = detectDefaultCompressionCodec();
         }
     }
+    else
+        default_codec = detectDefaultCompressionCodec();
 }
 
 template <typename Writer>
@@ -1071,11 +1087,8 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti
     {
         try
         {
-            if (data_part_storage.exists(tmp_filename))
-            {
-                data_part_storage.removeFile(tmp_filename);
-                data_part_storage.commitTransaction();
-            }
+            data_part_storage.removeFileIfExists(tmp_filename);
+            data_part_storage.commitTransaction();
         }
         catch (...)
         {
@@ -1106,7 +1119,7 @@ void IMergeTreeDataPart::writeColumns(const NamesAndTypesList & columns_, const
 
 void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_, bool fsync_part_dir) const
 {
-    static constexpr auto filename = "txn_version.txt";
+    static constexpr auto filename = TXN_VERSION_METADATA_FILE_NAME;
     static constexpr auto tmp_filename = "txn_version.txt.tmp";
     auto & data_part_storage = const_cast<IDataPartStorage &>(getDataPartStorage());
 
@@ -1133,8 +1146,7 @@ void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_,
     {
         try
         {
-            if (data_part_storage.exists(tmp_filename))
-                data_part_storage.removeFile(tmp_filename);
+            data_part_storage.removeFileIfExists(tmp_filename);
         }
         catch (...)
         {
@@ -1152,7 +1164,7 @@ void IMergeTreeDataPart::removeDeleteOnDestroyMarker()
 
 void IMergeTreeDataPart::removeVersionMetadata()
 {
-    getDataPartStorage().removeFileIfExists("txn_version.txt");
+    getDataPartStorage().removeFileIfExists(TXN_VERSION_METADATA_FILE_NAME);
 }
 
 
@@ -1272,10 +1284,8 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c
 
 void IMergeTreeDataPart::loadChecksums(bool require)
 {
-    bool exists = metadata_manager->exists("checksums.txt");
-    if (exists)
+    if (auto buf = metadata_manager->readIfExists("checksums.txt"))
     {
-        auto buf = metadata_manager->read("checksums.txt");
         if (checksums.read(*buf))
         {
             assertEOF(*buf);
@@ -1310,25 +1320,22 @@ void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files)
 
 void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart()
 {
-    auto read_rows_count = [&]()
-    {
-        auto buf = metadata_manager->read("count.txt");
-        readIntText(rows_count, *buf);
-        assertEOF(*buf);
-    };
     if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part)
     {
-        if (metadata_manager->exists("count.txt"))
+        if (auto buf = metadata_manager->readIfExists("count.txt"))
         {
-            read_rows_count();
+            readIntText(rows_count, *buf);
+            assertEOF(*buf);
             return;
         }
     }
     else
     {
-        if (getDataPartStorage().exists("count.txt"))
+        if (getDataPartStorage().existsFile("count.txt"))
         {
-            read_rows_count();
+            auto buf = metadata_manager->read("count.txt");
+            readIntText(rows_count, *buf);
+            assertEOF(*buf);
             return;
         }
     }
@@ -1337,9 +1344,8 @@ void IMergeTreeDataPart::loadRowsCountFileForUnexpectedPart()
 
 void IMergeTreeDataPart::loadRowsCount()
 {
-    auto read_rows_count = [&]()
+    auto read_rows_count = [&](auto & buf)
     {
-        auto buf = metadata_manager->read("count.txt");
         readIntText(rows_count, *buf);
         assertEOF(*buf);
     };
@@ -1350,12 +1356,11 @@ void IMergeTreeDataPart::loadRowsCount()
     }
     else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::Compact || parent_part)
     {
-        bool exists = metadata_manager->exists("count.txt");
-        if (!exists)
+        if (auto buf = metadata_manager->readIfExists("count.txt"))
+            read_rows_count(buf);
+        else
             throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name);
 
-        read_rows_count();
-
 #ifndef NDEBUG
         /// columns have to be loaded
         for (const auto & column : getColumns())
@@ -1413,9 +1418,10 @@ void IMergeTreeDataPart::loadRowsCount()
     }
     else
     {
-        if (getDataPartStorage().exists("count.txt"))
+        if (getDataPartStorage().existsFile("count.txt"))
         {
-            read_rows_count();
+            auto buf = metadata_manager->read("count.txt");
+            read_rows_count(buf);
             return;
         }
 
@@ -1542,10 +1548,8 @@ void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
 
 void IMergeTreeDataPart::loadTTLInfos()
 {
-    bool exists = metadata_manager->exists("ttl.txt");
-    if (exists)
+    if (auto in = metadata_manager->readIfExists("ttl.txt"))
     {
-        auto in = metadata_manager->read("ttl.txt");
         assertString("ttl format version: ", *in);
         size_t format_version;
         readText(format_version, *in);
@@ -1575,10 +1579,8 @@ void IMergeTreeDataPart::appendFilesOfTTLInfos(Strings & files)
 
 void IMergeTreeDataPart::loadUUID()
 {
-    bool exists = metadata_manager->exists(UUID_FILE_NAME);
-    if (exists)
+    if (auto in = metadata_manager->readIfExists(UUID_FILE_NAME))
     {
-        auto in = metadata_manager->read(UUID_FILE_NAME);
         readText(uuid, *in);
         if (uuid == UUIDHelpers::Nil)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty {} in part: {}", String(UUID_FILE_NAME), name);
@@ -1600,7 +1602,14 @@ void IMergeTreeDataPart::loadColumns(bool require)
     NamesAndTypesList loaded_columns;
     bool is_readonly_storage = getDataPartStorage().isReadonly();
 
-    if (!metadata_manager->exists("columns.txt"))
+    if (auto in = metadata_manager->readIfExists("columns.txt"))
+    {
+        loaded_columns.readText(*in);
+
+        for (auto & column : loaded_columns)
+            setVersionToAggregateFunctions(column.type, true);
+    }
+    else
     {
         /// We can get list of columns only from columns.txt in compact parts.
         if (require || part_type == Type::Compact)
@@ -1618,14 +1627,6 @@ void IMergeTreeDataPart::loadColumns(bool require)
         if (!is_readonly_storage)
             writeColumns(loaded_columns, {});
     }
-    else
-    {
-        auto in = metadata_manager->read("columns.txt");
-        loaded_columns.readText(*in);
-
-        for (auto & column : loaded_columns)
-            setVersionToAggregateFunctions(column.type, true);
-    }
 
     SerializationInfo::Settings settings =
     {
@@ -1634,16 +1635,12 @@ void IMergeTreeDataPart::loadColumns(bool require)
     };
 
     SerializationInfoByName infos;
-    if (metadata_manager->exists(SERIALIZATION_FILE_NAME))
-    {
-        auto in = metadata_manager->read(SERIALIZATION_FILE_NAME);
+    if (auto in = metadata_manager->readIfExists(SERIALIZATION_FILE_NAME))
         infos = SerializationInfoByName::readJSON(loaded_columns, settings, *in);
-    }
 
     int32_t loaded_metadata_version;
-    if (metadata_manager->exists(METADATA_VERSION_FILE_NAME))
+    if (auto in = metadata_manager->readIfExists(METADATA_VERSION_FILE_NAME))
     {
-        auto in = metadata_manager->read(METADATA_VERSION_FILE_NAME);
         readIntText(loaded_metadata_version, *in);
     }
     else
@@ -1677,8 +1674,8 @@ void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) co
                         "CreationTID of part {} (table {}) is set to unexpected value {}, it's a bug. Current transaction: {}",
                         name, storage.getStorageID().getNameForLogs(), version.creation_tid, txn ? txn->dumpDescription() : "<none>");
 
-    assert(!txn || storage.supportsTransactions());
-    assert(!txn || getDataPartStorage().exists(TXN_VERSION_METADATA_FILE_NAME));
+    chassert(!txn || storage.supportsTransactions());
+    chassert(!txn || getDataPartStorage().existsFile(TXN_VERSION_METADATA_FILE_NAME));
 }
 
 void IMergeTreeDataPart::storeVersionMetadata(bool force) const
@@ -1759,7 +1756,7 @@ static std::unique_ptr<ReadBufferFromFileBase> openForReading(const IDataPartSto
 void IMergeTreeDataPart::loadVersionMetadata() const
 try
 {
-    static constexpr auto version_file_name = "txn_version.txt";
+    static constexpr auto version_file_name = TXN_VERSION_METADATA_FILE_NAME;
     static constexpr auto tmp_version_file_name = "txn_version.txt.tmp";
     auto & data_part_storage = const_cast<IDataPartStorage &>(getDataPartStorage());
 
@@ -1775,11 +1772,12 @@ try
         data_part_storage.removeFile(tmp_version_file_name);
     };
 
-    if (data_part_storage.exists(version_file_name))
+    if (data_part_storage.existsFile(version_file_name))
     {
         auto buf = openForReading(data_part_storage, version_file_name);
         version.read(*buf);
-        if (data_part_storage.exists(tmp_version_file_name))
+
+        if (!isStoredOnReadonlyDisk() && data_part_storage.existsFile(tmp_version_file_name))
             remove_tmp_file();
         return;
     }
@@ -1790,7 +1788,7 @@ try
     /// 3. Version metadata were written to *.tmp file, but hard restart happened before fsync.
     /// 4. Fsyncs in storeVersionMetadata() work incorrectly.
 
-    if (!data_part_storage.exists(tmp_version_file_name))
+    if (isStoredOnReadonlyDisk() || !data_part_storage.existsFile(tmp_version_file_name))
     {
         /// Case 1.
         /// We do not have version metadata and transactions history for old parts,
@@ -1808,7 +1806,9 @@ try
     /// Transaction was not committed if *.tmp file was not renamed, so we should complete rollback by removing part.
     version.setCreationTID(Tx::DummyTID, nullptr);
     version.creation_csn = Tx::RolledBackCSN;
-    remove_tmp_file();
+
+    if (!isStoredOnReadonlyDisk())
+        remove_tmp_file();
 }
 catch (Exception & e)
 {
@@ -1845,18 +1845,17 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const
     if (state == MergeTreeDataPartState::Temporary)
         return true;
 
-    if (!getDataPartStorage().exists())
-        return true;
-
     String content;
     String version_file_name = TXN_VERSION_METADATA_FILE_NAME;
     try
     {
-        size_t file_size = getDataPartStorage().getFileSize(TXN_VERSION_METADATA_FILE_NAME);
-        auto read_settings = getReadSettings().adjustBufferSize(file_size);
+        size_t small_file_size = 4096;
+        auto read_settings = getReadSettings().adjustBufferSize(small_file_size);
         /// Avoid cannot allocated thread error. No need in threadpool read method here.
         read_settings.local_fs_method = LocalFSReadMethod::pread;
-        auto buf = getDataPartStorage().readFile(TXN_VERSION_METADATA_FILE_NAME, read_settings, file_size, std::nullopt);
+        auto buf = getDataPartStorage().readFileIfExists(TXN_VERSION_METADATA_FILE_NAME, read_settings, small_file_size, std::nullopt);
+        if (!buf)
+            return false;
 
         readStringUntilEOF(content, *buf);
         ReadBufferFromString str_buf{content};
@@ -2114,7 +2113,9 @@ void IMergeTreeDataPart::checkConsistencyBase() const
 {
     auto metadata_snapshot = storage.getInMemoryMetadataPtr();
     if (parent_part)
+    {
         metadata_snapshot = metadata_snapshot->projections.get(name).metadata;
+    }
     else
     {
         // No need to check projections here because we already did consistent checking when loading projections if necessary.
@@ -2175,7 +2176,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const
         auto check_file_not_empty = [this](const String & file_path)
         {
             UInt64 file_size;
-            if (!getDataPartStorage().exists(file_path) || (file_size = getDataPartStorage().getFileSize(file_path)) == 0)
+            if (!getDataPartStorage().existsFile(file_path) || (file_size = getDataPartStorage().getFileSize(file_path)) == 0)
                 throw Exception(
                     ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
                     "Part {} is broken: {} is empty",
@@ -2187,7 +2188,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const
         /// Check that the primary key index is not empty.
         if (!pk.column_names.empty())
         {
-            String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()).value();
+            String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage());
             check_file_not_empty(index_name);
         }
 
@@ -2422,11 +2423,11 @@ std::optional<String> IMergeTreeDataPart::getStreamNameOrHash(
     const String & extension,
     const IDataPartStorage & storage_)
 {
-    if (storage_.exists(stream_name + extension))
+    if (storage_.existsFile(stream_name + extension))
         return stream_name;
 
     auto hash = sipHash128String(stream_name);
-    if (storage_.exists(hash + extension))
+    if (storage_.existsFile(hash + extension))
         return hash;
 
     return {};
@@ -2538,20 +2539,6 @@ bool isWidePart(const MergeTreeDataPartPtr & data_part)
     return (data_part && data_part->getType() == MergeTreeDataPartType::Wide);
 }
 
-std::optional<std::string> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage)
-{
-    if (data_part_storage.exists())
-    {
-        for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
-        {
-            const auto & extension = fs::path(it->name()).extension();
-            if (extension == getIndexExtension(true))
-                return extension;
-        }
-    }
-    return {".idx"};
-}
-
 bool isCompressedFromIndexExtension(const String & index_extension)
 {
     return index_extension == getIndexExtension(true);
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 85ef0472ce7..378832d32a1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -105,11 +105,11 @@ public:
         const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
 
     virtual bool isStoredOnDisk() const = 0;
-
+    virtual bool isStoredOnReadonlyDisk() const = 0;
     virtual bool isStoredOnRemoteDisk() const = 0;
-
     virtual bool isStoredOnRemoteDiskWithZeroCopySupport() const = 0;
 
+
     /// NOTE: Returns zeros if column files are not found in checksums.
     /// Otherwise return information about column size on disk.
     ColumnSize getColumnSize(const String & column_name) const;
@@ -127,7 +127,7 @@ public:
     ColumnSize getTotalColumnsSize() const { return total_columns_size; }
 
     /// Return information about secondary indexes size on disk for all indexes in part
-    IndexSize getTotalSeconaryIndicesSize() const { return total_secondary_indices_size; }
+    IndexSize getTotalSecondaryIndicesSize() const { return total_secondary_indices_size; }
 
     virtual std::optional<String> getFileNameForColumn(const NameAndTypePair & column) const = 0;
 
@@ -751,7 +751,6 @@ bool isCompactPart(const MergeTreeDataPartPtr & data_part);
 bool isWidePart(const MergeTreeDataPartPtr & data_part);
 
 inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; }
-std::optional<String> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage);
 bool isCompressedFromIndexExtension(const String & index_extension);
 
 using MergeTreeDataPartsVector = std::vector<MergeTreeDataPartPtr>;
diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp
index b2f18f08f41..72c969371b9 100644
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@@ -156,13 +156,21 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
         auto it = original_requested_columns.begin();
         for (size_t pos = 0; pos < num_columns; ++pos, ++it)
         {
-            auto name_in_storage = it->getNameInStorage();
-
-            if (full_requested_columns_set.emplace(name_in_storage).second)
-                full_requested_columns.emplace_back(name_in_storage, it->getTypeInStorage());
-
             if (res_columns[pos])
+            {
+                /// If column is already read, request it as is.
+                if (full_requested_columns_set.emplace(it->name).second)
+                    full_requested_columns.emplace_back(it->name, it->type);
+
                 additional_columns.insert({res_columns[pos], it->type, it->name});
+            }
+            else
+            {
+                /// If column or subcolumn is missed, request full column for correct evaluation of defaults of subcolumns.
+                auto name_in_storage = it->getNameInStorage();
+                if (full_requested_columns_set.emplace(name_in_storage).second)
+                    full_requested_columns.emplace_back(name_in_storage, it->getTypeInStorage());
+            }
         }
 
         auto dag = DB::evaluateMissingDefaults(
@@ -183,6 +191,12 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
         it = original_requested_columns.begin();
         for (size_t pos = 0; pos < num_columns; ++pos, ++it)
         {
+            if (additional_columns.has(it->name))
+            {
+                res_columns[pos] = additional_columns.getByName(it->name).column;
+                continue;
+            }
+
             auto name_in_storage = it->getNameInStorage();
             res_columns[pos] = additional_columns.getByName(name_in_storage).column;
 
diff --git a/src/Storages/MergeTree/IPartMetadataManager.cpp b/src/Storages/MergeTree/IPartMetadataManager.cpp
index 03fa3e3309e..1f15756b3c6 100644
--- a/src/Storages/MergeTree/IPartMetadataManager.cpp
+++ b/src/Storages/MergeTree/IPartMetadataManager.cpp
@@ -18,4 +18,12 @@ bool IPartMetadataManager::isCompressedFromFileName(const String & file_name)
         || isCompressedFromIndexExtension(extension);
 }
 
+std::unique_ptr<ReadBuffer> IPartMetadataManager::readIfExists(const String & file_name) const
+{
+    if (exists(file_name))
+        return read(file_name);
+    else
+        return {};
+}
+
 }
diff --git a/src/Storages/MergeTree/IPartMetadataManager.h b/src/Storages/MergeTree/IPartMetadataManager.h
index e817421f7d0..d4ad61ffe14 100644
--- a/src/Storages/MergeTree/IPartMetadataManager.h
+++ b/src/Storages/MergeTree/IPartMetadataManager.h
@@ -33,6 +33,9 @@ public:
     /// Read metadata content and return ReadBuffer object.
     virtual std::unique_ptr<ReadBuffer> read(const String & file_name) const = 0;
 
+    /// Read metadata content and return ReadBuffer object if it exists, otherwise return nullptr.
+    virtual std::unique_ptr<ReadBuffer> readIfExists(const String & file_name) const;
+
     /// Return true if metadata exists in part.
     virtual bool exists(const String & file_name) const = 0;
 
diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp
index 2a3a283bc7d..1506dc38946 100644
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@@ -817,7 +817,7 @@ void KeyCondition::getAllSpaceFillingCurves()
 KeyCondition::KeyCondition(
     const ActionsDAG * filter_dag,
     ContextPtr context,
-    const Names & key_column_names,
+    const Names & key_column_names_,
     const ExpressionActionsPtr & key_expr_,
     bool single_point_)
     : key_expr(key_expr_)
@@ -825,7 +825,7 @@ KeyCondition::KeyCondition(
     , single_point(single_point_)
 {
     size_t key_index = 0;
-    for (const auto & name : key_column_names)
+    for (const auto & name : key_column_names_)
     {
         if (!key_columns.contains(name))
         {
diff --git a/src/Storages/MergeTree/AllMergeSelector.cpp b/src/Storages/MergeTree/MergeSelectors/AllMergeSelector.cpp
similarity index 68%
rename from src/Storages/MergeTree/AllMergeSelector.cpp
rename to src/Storages/MergeTree/MergeSelectors/AllMergeSelector.cpp
index 1bd73de6884..5571846f1e6 100644
--- a/src/Storages/MergeTree/AllMergeSelector.cpp
+++ b/src/Storages/MergeTree/MergeSelectors/AllMergeSelector.cpp
@@ -1,11 +1,17 @@
-#include <Storages/MergeTree/AllMergeSelector.h>
-
-#include <cmath>
-
+#include <Storages/MergeTree/MergeSelectors/AllMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
 
 namespace DB
 {
 
+void registerAllMergeSelector(MergeSelectorFactory & factory)
+{
+    factory.registerPrivateSelector("All", [](const std::any &)
+    {
+        return std::make_shared<AllMergeSelector>();
+    });
+}
+
 AllMergeSelector::PartsRange AllMergeSelector::select(
     const PartsRanges & parts_ranges,
     size_t /*max_total_size_to_merge*/)
diff --git a/src/Storages/MergeTree/AllMergeSelector.h b/src/Storages/MergeTree/MergeSelectors/AllMergeSelector.h
similarity index 85%
rename from src/Storages/MergeTree/AllMergeSelector.h
rename to src/Storages/MergeTree/MergeSelectors/AllMergeSelector.h
index 6cd3bb6f3fa..80698c78c5b 100644
--- a/src/Storages/MergeTree/AllMergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelectors/AllMergeSelector.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/MergeTree/MergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelector.h>
 
 
 namespace DB
diff --git a/src/Storages/MergeTree/MergeSelectors/CMakeLists.txt b/src/Storages/MergeTree/MergeSelectors/CMakeLists.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelectors/MergeSelector.h
similarity index 100%
rename from src/Storages/MergeTree/MergeSelector.h
rename to src/Storages/MergeTree/MergeSelectors/MergeSelector.h
diff --git a/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.cpp b/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.cpp
new file mode 100644
index 00000000000..fb4bd269d6d
--- /dev/null
+++ b/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.cpp
@@ -0,0 +1,52 @@
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
+
+#include <Common/Exception.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+MergeSelectorFactory & MergeSelectorFactory::instance()
+{
+    static MergeSelectorFactory ret;
+    return ret;
+}
+
+void MergeSelectorFactory::registerPrivateSelector(std::string name, MergeSelectorFactory::Creator && creator)
+{
+    if (!creators.emplace(name, creator).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge selector '{}' already exists", name);
+}
+
+
+void MergeSelectorFactory::registerPublicSelector(std::string name, MergeSelectorAlgorithm enum_value, Creator && creator)
+{
+    registerPrivateSelector(name, std::move(creator));
+    if (!enum_to_name_mapping.emplace(enum_value, name).second)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge select with enum value {} already exists with different name", enum_value);
+}
+
+MergeSelectorPtr MergeSelectorFactory::get(const std::string & name, const std::any & settings) const
+{
+    auto it = creators.find(name);
+    if (it == creators.end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown merge selector {}", name);
+
+    return it->second(settings);
+}
+
+MergeSelectorPtr MergeSelectorFactory::get(MergeSelectorAlgorithm algorithm, const std::any & settings) const
+{
+    auto it = enum_to_name_mapping.find(algorithm);
+    if (it == enum_to_name_mapping.end())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown merge selector {}", algorithm);
+    return get(it->second, settings);
+
+}
+
+
+}
diff --git a/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h b/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h
new file mode 100644
index 00000000000..e7d066ecbb0
--- /dev/null
+++ b/src/Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <unordered_map>
+#include <functional>
+#include <memory>
+#include <string>
+#include <any>
+#include <boost/noncopyable.hpp>
+
+#include <Core/MergeSelectorAlgorithm.h>
+
+namespace DB
+{
+
+class IMergeSelector;
+
+using MergeSelectorPtr = std::shared_ptr<IMergeSelector>;
+
+class MergeSelectorFactory final : private boost::noncopyable
+{
+private:
+    using Creator = std::function<MergeSelectorPtr(std::any)>;
+    using CreatorByNameMap = std::unordered_map<std::string, Creator>;
+    using EnumToName = std::unordered_map<MergeSelectorAlgorithm, std::string>;
+
+    CreatorByNameMap creators;
+    EnumToName enum_to_name_mapping;
+    MergeSelectorFactory() = default;
+public:
+    static MergeSelectorFactory & instance();
+
+    MergeSelectorPtr get(const std::string & name, const std::any & settings = {}) const;
+    MergeSelectorPtr get(MergeSelectorAlgorithm algorithm, const std::any & settings = {}) const;
+
+    void registerPrivateSelector(std::string name, Creator && creator);
+    void registerPublicSelector(std::string name, MergeSelectorAlgorithm enum_value, Creator && creator);
+};
+
+}
diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp
similarity index 73%
rename from src/Storages/MergeTree/SimpleMergeSelector.cpp
rename to src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp
index ee103945342..c393349ef32 100644
--- a/src/Storages/MergeTree/SimpleMergeSelector.cpp
+++ b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.cpp
@@ -1,15 +1,35 @@
-#include <Storages/MergeTree/SimpleMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
+#include <Core/MergeSelectorAlgorithm.h>
 
 #include <base/interpolate.h>
+#include <Common/thread_local_rng.h>
 
 #include <cmath>
 #include <cassert>
 #include <iostream>
+#include <random>
 
 
 namespace DB
 {
 
+void registerSimpleMergeSelector(MergeSelectorFactory & factory)
+{
+    factory.registerPublicSelector("Simple", MergeSelectorAlgorithm::SIMPLE, [](const std::any & settings)
+    {
+        return std::make_shared<SimpleMergeSelector>(std::any_cast<SimpleMergeSelector::Settings>(settings));
+    });
+}
+
+void registerStochasticSimpleMergeSelector(MergeSelectorFactory & factory)
+{
+    factory.registerPublicSelector("StochasticSimple", MergeSelectorAlgorithm::STOCHASTIC_SIMPLE, [](const std::any & settings)
+    {
+        return std::make_shared<SimpleMergeSelector>(std::any_cast<SimpleMergeSelector::Settings>(settings));
+    });
+}
+
 namespace
 {
 
@@ -105,46 +125,52 @@ bool allow(
     if (settings.min_age_to_force_merge && min_age >= settings.min_age_to_force_merge)
         return true;
 
-//    std::cerr << "sum_size: " << sum_size << "\n";
-
     /// Map size to 0..1 using logarithmic scale
     /// Use log(1 + x) instead of log1p(x) because our sum_size is always integer.
     /// Also log1p seems to be slow and significantly affect performance of merges assignment.
     double size_normalized = mapPiecewiseLinearToUnit(log(1 + sum_size), min_size_to_lower_base_log, max_size_to_lower_base_log);
-
-//    std::cerr << "size_normalized: " << size_normalized << "\n";
-
     /// Calculate boundaries for age
     double min_age_to_lower_base = interpolateLinear(settings.min_age_to_lower_base_at_min_size, settings.min_age_to_lower_base_at_max_size, size_normalized);
     double max_age_to_lower_base = interpolateLinear(settings.max_age_to_lower_base_at_min_size, settings.max_age_to_lower_base_at_max_size, size_normalized);
-
-//    std::cerr << "min_age_to_lower_base: " << min_age_to_lower_base << "\n";
-//    std::cerr << "max_age_to_lower_base: " << max_age_to_lower_base << "\n";
-
     /// Map age to 0..1
     double age_normalized = mapPiecewiseLinearToUnit(min_age, min_age_to_lower_base, max_age_to_lower_base);
-
-//    std::cerr << "age: " << min_age << "\n";
-//    std::cerr << "age_normalized: " << age_normalized << "\n";
-
     /// Map partition_size to 0..1
     double num_parts_normalized = mapPiecewiseLinearToUnit(partition_size, settings.min_parts_to_lower_base, settings.max_parts_to_lower_base);
-
-//    std::cerr << "partition_size: " << partition_size << "\n";
-//    std::cerr << "num_parts_normalized: " << num_parts_normalized << "\n";
-
+    /// The ratio should be within [0, 1]
     double combined_ratio = std::min(1.0, age_normalized + num_parts_normalized);
 
-//    std::cerr << "combined_ratio: " << combined_ratio << "\n";
-
     double lowered_base = interpolateLinear(settings.base, 2.0, combined_ratio);
-
-//    std::cerr << "------- lowered_base: " << lowered_base << "\n";
+    if (settings.use_blurry_base)
+    {
+        double partition_fill_factor = std::max(0., 1 - partition_size / settings.parts_to_throw_insert);
+        /// Scale factor controls when (relativelty to the number of parts in partition)
+        /// do we activate our special algorithm.
+        /// With standard parameters the logic kicks in starting from 80% empty factor.
+        /// The division by 2 is due to the fact that for normal distribution nearly 95.4%
+        /// of all observations fall within two standard deviations.
+        double scaling_factor = std::pow(partition_fill_factor, settings.blurry_base_scale_factor) / 2;
+        /// The base lower than 1 doesn't make sense, so we try to avoid it.
+        std::normal_distribution<double> distribution{lowered_base, (lowered_base - 1) * scaling_factor};
+        /// The threshold should be strictly bigger than 1, because we don't allow to merge the part with itself.
+        lowered_base = std::min(distribution(thread_local_rng), std::max(1.01, lowered_base));
+    }
 
     return (sum_size + range_size * settings.size_fixed_cost_to_add) / (max_size + settings.size_fixed_cost_to_add) >= lowered_base;
 }
 
 
+size_t calculateRangeWithStochasticSliding(size_t parts_count, size_t parts_threshold)
+{
+    auto mean = static_cast<double>(parts_count);
+    std::normal_distribution<double> distribution{mean, mean / 4};
+    size_t right_boundary = static_cast<size_t>(distribution(thread_local_rng));
+    if (right_boundary > parts_count)
+        right_boundary = 2 * parts_count - right_boundary;
+    if (right_boundary < parts_threshold)
+        right_boundary = parts_threshold;
+    return right_boundary - parts_threshold;
+}
+
 void selectWithinPartition(
     const SimpleMergeSelector::PartsRange & parts,
     const size_t max_total_size_to_merge,
@@ -165,10 +191,15 @@ void selectWithinPartition(
     /// grow uncontrollably, similar to a snowball effect.
     /// To address this we will try to assign a merge taking into consideration
     /// only last N parts.
-    static constexpr size_t parts_threshold = 1000;
+    const size_t parts_threshold = settings.window_size;
     size_t begin = 0;
     if (parts_count >= parts_threshold)
-        begin = parts_count - parts_threshold;
+    {
+        if (settings.enable_stochastic_sliding)
+            begin = calculateRangeWithStochasticSliding(parts_count, parts_threshold);
+        else
+            begin = parts_count - parts_threshold;
+    }
 
     for (; begin < parts_count; ++begin)
     {
diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h
similarity index 92%
rename from src/Storages/MergeTree/SimpleMergeSelector.h
rename to src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h
index a12b8ba48cb..2d4129b8bf8 100644
--- a/src/Storages/MergeTree/SimpleMergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h
@@ -1,6 +1,6 @@
 #pragma once
 
-#include <Storages/MergeTree/MergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelector.h>
 
 
 /**
@@ -91,6 +91,16 @@ public:
         /// Zero means unlimited. Can be overridden by the same merge tree setting.
         size_t max_parts_to_merge_at_once = 100;
 
+        /// Some sort of a maximum number of parts in partition. Can be overridden by the same merge tree setting.
+        size_t parts_to_throw_insert = 3000;
+
+        /** This mode allows selector algorithm not to perform precise comparisons with base (read the comment below).
+          * Instead, we do it in an epsilon neighborhood, where epsilon is controlled by the number of parts in
+          * the current partition and is a normally distributed random variable.
+          */
+        bool use_blurry_base = false;
+        size_t blurry_base_scale_factor = 42;
+
         /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases).
           * For example, if all parts have equal size, it means, that at least 'base' number of parts should be merged.
           * If parts has non-uniform sizes, then minimum number of parts to merge is effectively increased.
@@ -104,6 +114,10 @@ public:
           */
         double base = 5;
 
+
+        size_t window_size = 1000;
+        bool enable_stochastic_sliding = false;
+
         /** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables:
           *
           * 1. Total number of parts in partition. If too many - then base is lowered.
diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.cpp
similarity index 86%
rename from src/Storages/MergeTree/TTLMergeSelector.cpp
rename to src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.cpp
index f4c698d76d7..75e3a090160 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.cpp
@@ -1,14 +1,32 @@
-#include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Parsers/queryToString.h>
 
 #include <algorithm>
 #include <cmath>
+#include <any>
 
 
 namespace DB
 {
 
+void registerTTLDeleteMergeSelector(MergeSelectorFactory & factory)
+{
+    factory.registerPrivateSelector("TTLDelete", [](const std::any & params)
+    {
+        return std::make_shared<TTLDeleteMergeSelector>(std::any_cast<TTLDeleteMergeSelector::Params>(params));
+    });
+}
+
+void registerTTLRecompressMergeSelector(MergeSelectorFactory & factory)
+{
+    factory.registerPrivateSelector("TTLRecompress", [](const std::any & params)
+    {
+        return std::make_shared<TTLRecompressMergeSelector>(std::any_cast<TTLRecompressMergeSelector::Params>(params));
+    });
+}
+
 const String & getPartitionIdForPart(const ITTLMergeSelector::Part & part_info)
 {
     const MergeTreeData::DataPartPtr & part = part_info.getDataPartPtr();
diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.h
similarity index 74%
rename from src/Storages/MergeTree/TTLMergeSelector.h
rename to src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.h
index 8c82e284a45..4f43f88fe0b 100644
--- a/src/Storages/MergeTree/TTLMergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelectors/TTLMergeSelector.h
@@ -1,7 +1,7 @@
 #pragma once
 
 #include <base/types.h>
-#include <Storages/MergeTree/MergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelector.h>
 #include <Storages/TTLDescription.h>
 
 #include <map>
@@ -58,10 +58,18 @@ class TTLDeleteMergeSelector : public ITTLMergeSelector
 public:
     using PartitionIdToTTLs = std::map<String, time_t>;
 
-    TTLDeleteMergeSelector(PartitionIdToTTLs & merge_due_times_, time_t current_time_, Int64 merge_cooldown_time_,
-                           bool only_drop_parts_, bool dry_run_)
-        : ITTLMergeSelector(merge_due_times_, current_time_, merge_cooldown_time_, dry_run_)
-        , only_drop_parts(only_drop_parts_) {}
+    struct Params
+    {
+        PartitionIdToTTLs & merge_due_times;
+        time_t current_time;
+        Int64 merge_cooldown_time;
+        bool only_drop_parts;
+        bool dry_run;
+    };
+
+    explicit TTLDeleteMergeSelector(const Params & params)
+        : ITTLMergeSelector(params.merge_due_times, params.current_time, params.merge_cooldown_time, params.dry_run)
+        , only_drop_parts(params.only_drop_parts) {}
 
     time_t getTTLForPart(const IMergeSelector::Part & part) const override;
 
@@ -78,10 +86,18 @@ private:
 class TTLRecompressMergeSelector : public ITTLMergeSelector
 {
 public:
-    TTLRecompressMergeSelector(PartitionIdToTTLs & merge_due_times_, time_t current_time_, Int64 merge_cooldown_time_,
-                               const TTLDescriptions & recompression_ttls_, bool dry_run_)
-        : ITTLMergeSelector(merge_due_times_, current_time_, merge_cooldown_time_, dry_run_)
-        , recompression_ttls(recompression_ttls_)
+    struct Params
+    {
+        PartitionIdToTTLs & merge_due_times;
+        time_t current_time;
+        Int64 merge_cooldown_time;
+        TTLDescriptions recompression_ttls;
+        bool dry_run;
+    };
+
+    explicit TTLRecompressMergeSelector(const Params & params)
+        : ITTLMergeSelector(params.merge_due_times, params.current_time, params.merge_cooldown_time, params.dry_run)
+        , recompression_ttls(params.recompression_ttls)
     {}
 
     /// Return part min recompression TTL.
diff --git a/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp
new file mode 100644
index 00000000000..61f941adc36
--- /dev/null
+++ b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.cpp
@@ -0,0 +1,25 @@
+#include <Storages/MergeTree/MergeSelectors/MergeSelector.h>
+
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
+
+namespace DB
+{
+
+void registerSimpleMergeSelector(MergeSelectorFactory & factory);
+void registerStochasticSimpleMergeSelector(MergeSelectorFactory & factory);
+void registerAllMergeSelector(MergeSelectorFactory & factory);
+void registerTTLDeleteMergeSelector(MergeSelectorFactory & factory);
+void registerTTLRecompressMergeSelector(MergeSelectorFactory & factory);
+
+void registerMergeSelectors()
+{
+    auto & factory = MergeSelectorFactory::instance();
+
+    registerSimpleMergeSelector(factory);
+    registerStochasticSimpleMergeSelector(factory);
+    registerAllMergeSelector(factory);
+    registerTTLDeleteMergeSelector(factory);
+    registerTTLRecompressMergeSelector(factory);
+}
+
+}
diff --git a/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.h b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.h
new file mode 100644
index 00000000000..50f2a9d7a86
--- /dev/null
+++ b/src/Storages/MergeTree/MergeSelectors/registerMergeSelectors.h
@@ -0,0 +1,6 @@
+#pragma once
+
+namespace DB
+{
+void registerMergeSelectors();
+}
diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp
index 9c37f205174..74d6d60ba1b 100644
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@@ -900,12 +900,12 @@ class ColumnGathererStep : public ITransformingStep
 {
 public:
     ColumnGathererStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const String & rows_sources_temporary_file_name_,
         UInt64 merge_block_size_rows_,
         UInt64 merge_block_size_bytes_,
         bool is_result_sparse_)
-        : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+        : ITransformingStep(input_header_, input_header_, getTraits())
         , rows_sources_temporary_file_name(rows_sources_temporary_file_name_)
         , merge_block_size_rows(merge_block_size_rows_)
         , merge_block_size_bytes(merge_block_size_bytes_)
@@ -935,9 +935,9 @@ public:
         pipeline.addTransform(std::move(transform));
     }
 
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
 private:
@@ -993,12 +993,12 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
 
     /// Union of all parts streams
     {
-        DataStreams input_streams;
-        input_streams.reserve(plans.size());
+        Headers input_headers;
+        input_headers.reserve(plans.size());
         for (auto & plan : plans)
-            input_streams.emplace_back(plan->getCurrentDataStream());
+            input_headers.emplace_back(plan->getCurrentHeader());
 
-        auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+        auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
         merge_column_query_plan.unitePlans(std::move(union_step), std::move(plans));
     }
 
@@ -1007,7 +1007,7 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
         bool is_result_sparse = global_ctx->new_data_part->getSerialization(column_name)->getKind() == ISerialization::Kind::SPARSE;
         const auto data_settings = global_ctx->data->getSettings();
         auto merge_step = std::make_unique<ColumnGathererStep>(
-            merge_column_query_plan.getCurrentDataStream(),
+            merge_column_query_plan.getCurrentHeader(),
             RowsSourcesTemporaryFile::FILE_ID,
             (*data_settings)[MergeTreeSetting::merge_max_block_size],
             (*data_settings)[MergeTreeSetting::merge_max_block_size_bytes],
@@ -1030,7 +1030,7 @@ MergeTask::VerticalMergeRuntimeContext::PreparedColumnPipeline MergeTask::Vertic
             auto indices_expression_dag = indexes_it->second.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())->getActionsDAG().clone();
             indices_expression_dag.addMaterializingOutputActions(/*materialize_sparse=*/ true); /// Const columns cannot be written without materialization.
             auto calculate_indices_expression_step = std::make_unique<ExpressionStep>(
-                merge_column_query_plan.getCurrentDataStream(),
+                merge_column_query_plan.getCurrentHeader(),
                 std::move(indices_expression_dag));
             merge_column_query_plan.addStep(std::move(calculate_indices_expression_step));
         }
@@ -1397,7 +1397,7 @@ class MergePartsStep : public ITransformingStep
 {
 public:
     MergePartsStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const SortDescription & sort_description_,
         const Names partition_key_columns_,
         const MergeTreeData::MergingParams & merging_params_,
@@ -1407,7 +1407,7 @@ public:
         bool blocks_are_granules_size_,
         bool cleanup_,
         time_t time_of_merge_)
-        : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+        : ITransformingStep(input_header_, input_header_, getTraits())
         , sort_description(sort_description_)
         , partition_key_columns(partition_key_columns_)
         , merging_params(merging_params_)
@@ -1504,9 +1504,9 @@ public:
 #endif
     }
 
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
 private:
@@ -1540,16 +1540,16 @@ class TTLStep : public ITransformingStep
 {
 public:
     TTLStep(
-        const DataStream & input_stream_,
+        const Header & input_header_,
         const ContextPtr & context_,
         const MergeTreeData & storage_,
         const StorageMetadataPtr & metadata_snapshot_,
         const MergeTreeData::MutableDataPartPtr & data_part_,
         time_t current_time,
         bool force_)
-        : ITransformingStep(input_stream_, input_stream_.header, getTraits())
+        : ITransformingStep(input_header_, input_header_, getTraits())
     {
-        transform = std::make_shared<TTLTransform>(context_, input_stream_.header, storage_, metadata_snapshot_, data_part_, current_time, force_);
+        transform = std::make_shared<TTLTransform>(context_, input_header_, storage_, metadata_snapshot_, data_part_, current_time, force_);
         subqueries_for_sets = transform->getSubqueries();
     }
 
@@ -1562,9 +1562,9 @@ public:
         pipeline.addTransform(transform);
     }
 
-    void updateOutputStream() override
+    void updateOutputHeader() override
     {
-        output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits());
+        output_header = input_headers.front();
     }
 
 private:
@@ -1651,12 +1651,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
 
     /// Union of all parts streams
     {
-        DataStreams input_streams;
-        input_streams.reserve(plans.size());
+        Headers input_headers;
+        input_headers.reserve(plans.size());
         for (auto & plan : plans)
-            input_streams.emplace_back(plan->getCurrentDataStream());
+            input_headers.emplace_back(plan->getCurrentHeader());
 
-        auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+        auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
         merge_parts_query_plan.unitePlans(std::move(union_step), std::move(plans));
     }
 
@@ -1665,7 +1665,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
         /// Calculate sorting key expressions so that they are available for merge sorting.
         auto sorting_key_expression_dag = global_ctx->metadata_snapshot->getSortingKey().expression->getActionsDAG().clone();
         auto calculate_sorting_key_expression_step = std::make_unique<ExpressionStep>(
-            merge_parts_query_plan.getCurrentDataStream(),
+            merge_parts_query_plan.getCurrentHeader(),
             std::move(sorting_key_expression_dag));
         merge_parts_query_plan.addStep(std::move(calculate_sorting_key_expression_step));
     }
@@ -1693,7 +1693,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
             throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed");
 
         auto merge_step = std::make_unique<MergePartsStep>(
-            merge_parts_query_plan.getCurrentDataStream(),
+            merge_parts_query_plan.getCurrentHeader(),
             sort_description,
             partition_key_columns,
             global_ctx->merging_params,
@@ -1725,7 +1725,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
         }
 
         auto deduplication_step = std::make_unique<DistinctStep>(
-            merge_parts_query_plan.getCurrentDataStream(),
+            merge_parts_query_plan.getCurrentHeader(),
             SizeLimits(), 0 /*limit_hint*/,
             global_ctx->deduplicate_by_columns,
             false /*pre_distinct*/);
@@ -1740,7 +1740,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
     if (ctx->need_remove_expired_values)
     {
         auto ttl_step = std::make_unique<TTLStep>(
-            merge_parts_query_plan.getCurrentDataStream(), global_ctx->context, *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
+            merge_parts_query_plan.getCurrentHeader(), global_ctx->context, *global_ctx->data, global_ctx->metadata_snapshot, global_ctx->new_data_part, global_ctx->time_of_merge, ctx->force_ttl);
         subqueries = ttl_step->getSubqueries();
         ttl_step->setStepDescription("TTL step");
         merge_parts_query_plan.addStep(std::move(ttl_step));
@@ -1752,7 +1752,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() const
         auto indices_expression_dag = global_ctx->merging_skip_indexes.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())->getActionsDAG().clone();
         indices_expression_dag.addMaterializingOutputActions(/*materialize_sparse=*/ true); /// Const columns cannot be written without materialization.
         auto calculate_indices_expression_step = std::make_unique<ExpressionStep>(
-            merge_parts_query_plan.getCurrentDataStream(),
+            merge_parts_query_plan.getCurrentHeader(),
             std::move(indices_expression_dag));
         merge_parts_query_plan.addStep(std::move(calculate_indices_expression_step));
     }
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 1ac26d70877..87e5ae21a03 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -136,6 +136,8 @@ namespace ProfileEvents
     extern const Event DelayedMutationsMilliseconds;
     extern const Event PartsLockWaitMicroseconds;
     extern const Event PartsLockHoldMicroseconds;
+    extern const Event LoadedDataParts;
+    extern const Event LoadedDataPartsMicroseconds;
 }
 
 namespace CurrentMetrics
@@ -346,9 +348,8 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re
             disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME);
         }
 
-        if (disk->exists(format_version_path))
+        if (auto buf = disk->readFileIfExists(format_version_path, getReadSettings()))
         {
-            auto buf = disk->readFile(format_version_path, getReadSettings());
             UInt32 current_format_version{0};
             readIntText(current_format_version, *buf);
             if (!buf->eof())
@@ -401,6 +402,8 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re
             throw Exception(ErrorCodes::METADATA_MISMATCH, "MergeTree data format version on disk doesn't support custom partitioning");
     }
 }
+
+
 DataPartsLock::DataPartsLock(std::mutex & data_parts_mutex_)
     : wait_watch(Stopwatch(CLOCK_MONOTONIC))
     , lock(data_parts_mutex_)
@@ -1462,7 +1465,6 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
     auto data_part_storage = std::make_shared<DataPartStorageOnDiskFull>(single_disk_volume, relative_data_path, part_name);
 
     String part_path = fs::path(relative_data_path) / part_name;
-    String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME_DEPRECATED;
 
     /// Ignore broken parts that can appear as a result of hard server restart.
     auto mark_broken = [&]
@@ -1510,25 +1512,9 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
         return res;
     }
 
-    if (part_disk_ptr->exists(marker_path))
-    {
-        /// NOTE: getBytesOnDisk() cannot be used here, since it may be zero if checksums.txt does not exist.
-        res.size_of_part = calculatePartSizeSafe(res.part, log.load());
-        res.is_broken = true;
-
-        auto part_size_str = res.size_of_part ? formatReadableSizeWithBinarySuffix(*res.size_of_part) : "failed to calculate size";
-
-        LOG_WARNING(log,
-            "Detaching stale part {} (size: {}), which should have been deleted after a move. "
-            "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.",
-            res.part->getDataPartStorage().getFullPath(), part_size_str);
-
-        return res;
-    }
-
     try
     {
-        res.part->loadColumnsChecksumsIndexes(require_part_metadata, true);
+        res.part->loadColumnsChecksumsIndexes(require_part_metadata, !part_disk_ptr->isReadOnly());
     }
     catch (...)
     {
@@ -1759,6 +1745,7 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
 
 void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::unordered_set<std::string>> expected_parts)
 {
+    Stopwatch watch;
     LOG_DEBUG(log, "Loading data parts");
 
     auto metadata_snapshot = getInMemoryMetadataPtr();
@@ -1812,16 +1799,15 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
 
             bool is_disk_defined = defined_disk_names.contains(disk_name);
 
-            if (!is_disk_defined && disk->exists(relative_data_path))
+            if (!is_disk_defined && disk->existsDirectory(relative_data_path))
             {
                 /// There still a chance that underlying disk is defined in storage policy
                 const auto & delegate = disk->getDelegateDiskIfExists();
-                is_disk_defined = delegate && !delegate->isBroken() && !delegate->isCustomDisk()
-                               && delegate->getPath() == disk->getPath()
-                               && defined_disk_names.contains(delegate->getName());
+                is_disk_defined = delegate && !delegate->isBroken() && !delegate->isCustomDisk() && delegate->getPath() == disk->getPath()
+                    && defined_disk_names.contains(delegate->getName());
             }
 
-            if (!is_disk_defined && disk->exists(relative_data_path))
+            if (!is_disk_defined && disk->existsDirectory(relative_data_path))
             {
                 for (const auto it = disk->iterateDirectory(relative_data_path); it->isValid(); it->next())
                 {
@@ -1856,12 +1842,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
         auto & disk_parts = parts_to_load_by_disk[i];
         auto & unexpected_disk_parts = unexpected_parts_to_load_by_disk[i];
 
-        runner([&, disk_ptr]()
+        runner([&expected_parts, &unexpected_disk_parts, &disk_parts, this, disk_ptr]()
         {
             for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next())
             {
                 /// Skip temporary directories, file 'format_version.txt' and directory 'detached'.
-                if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME
+                if (startsWith(it->name(), "tmp")
+                    || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME
                     || it->name() == DETACHED_DIR_NAME)
                     continue;
 
@@ -2066,7 +2053,10 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
             [this] { loadOutdatedDataParts(/*is_async=*/ true); });
     }
 
-    LOG_DEBUG(log, "Loaded data parts ({} items)", data_parts_indexes.size());
+    watch.stop();
+    LOG_DEBUG(log, "Loaded data parts ({} items) took {} seconds", data_parts_indexes.size(), watch.elapsedSeconds());
+    ProfileEvents::increment(ProfileEvents::LoadedDataParts, data_parts_indexes.size());
+    ProfileEvents::increment(ProfileEvents::LoadedDataPartsMicroseconds, watch.elapsedMicroseconds());
     data_parts_loading_finished = true;
 }
 
@@ -2341,7 +2331,7 @@ void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask()
 /// (Only files on the first level of nesting are considered).
 static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold)
 {
-    if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold)
+    if (!disk->existsDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold)
         return false;
 
     for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next())
@@ -2386,7 +2376,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz
         if (disk->isBroken())
             continue;
 
-        if (!disk->exists(root_path))
+        if (!disk->existsDirectory(root_path))
             continue;
 
         for (auto it = disk->iterateDirectory(root_path); it->isValid(); it->next())
@@ -2420,7 +2410,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, siz
                         LOG_INFO(LogFrequencyLimiter(log.load(), 10), "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path);
                         continue;
                     }
-                    if (!disk->exists(it->path()))
+                    if (!disk->existsDirectory(it->path()))
                     {
                         /// We should recheck that the dir exists, otherwise we can get "No such file or directory"
                         /// due to a race condition with "Renaming temporary part" (temporary part holder could be already released, so the check above is not enough)
@@ -3004,7 +2994,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_
 
     for (const auto & disk : disks)
     {
-        if (disk->exists(new_table_path))
+        if (disk->existsDirectory(new_table_path))
             throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Target path already exists: {}", fullPath(disk, new_table_path));
     }
 
@@ -3120,7 +3110,7 @@ void MergeTreeData::dropAllData()
 
         /// It can naturally happen if we cannot drop table from the first time
         /// i.e. get exceptions after remove recursive
-        if (!disk->exists(relative_data_path))
+        if (!disk->existsDirectory(relative_data_path))
         {
             LOG_INFO(log, "dropAllData: path {} is already removed from disk {}", relative_data_path, disk->getName());
             continue;
@@ -3129,10 +3119,10 @@ void MergeTreeData::dropAllData()
         LOG_INFO(log, "dropAllData: remove format_version.txt, detached, moving and write ahead logs");
         disk->removeFileIfExists(fs::path(relative_data_path) / FORMAT_VERSION_FILE_NAME);
 
-        if (disk->exists(fs::path(relative_data_path) / DETACHED_DIR_NAME))
+        if (disk->existsDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME))
             disk->removeSharedRecursive(fs::path(relative_data_path) / DETACHED_DIR_NAME, /*keep_all_shared_data*/ true, {});
 
-        if (disk->exists(fs::path(relative_data_path) / MOVING_DIR_NAME))
+        if (disk->existsDirectory(fs::path(relative_data_path) / MOVING_DIR_NAME))
             disk->removeRecursive(fs::path(relative_data_path) / MOVING_DIR_NAME);
 
         try
@@ -3780,7 +3770,7 @@ void MergeTreeData::changeSettings(
                     for (const String & disk_name : all_diff_disk_names)
                     {
                         auto disk = new_storage_policy->getDiskByName(disk_name);
-                        if (disk->exists(relative_data_path))
+                        if (disk->existsDirectory(relative_data_path))
                             throw Exception(ErrorCodes::LOGICAL_ERROR, "New storage policy contain disks which already contain data of a table with the same name");
                     }
 
@@ -5597,7 +5587,7 @@ MergeTreeData::PartsBackupEntries MergeTreeData::backupParts(
             if (endsWith(name, proj_suffix) && !defined_projections.contains(projection_name))
             {
                 auto projection_storage = part->getDataPartStorage().getProjection(projection_name + proj_suffix);
-                if (projection_storage->exists("checksums.txt"))
+                if (projection_storage->existsFile("checksums.txt"))
                 {
                     auto projection_part = const_cast<IMergeTreeDataPart &>(*part).getProjectionPartBuilder(
                         projection_name, /* is_temp_projection */false).withPartFormatFromDisk().build();
@@ -6341,7 +6331,7 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const
         String detached_path = fs::path(relative_data_path) / DETACHED_DIR_NAME;
 
         /// Note: we don't care about TOCTOU issue here.
-        if (disk->exists(detached_path))
+        if (disk->existsDirectory(detached_path))
         {
             for (auto it = disk->iterateDirectory(detached_path); it->isValid(); it->next())
             {
@@ -7606,7 +7596,7 @@ DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const
     const auto disks = getStoragePolicy()->getDisks();
 
     for (const DiskPtr & disk : disks)
-        if (disk->exists(fs::path(relative_data_path) / DETACHED_DIR_NAME / part_name))
+        if (disk->existsDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME / part_name))
             return disk;
 
     return nullptr;
@@ -8075,7 +8065,7 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co
 
         auto reserved_disk = reservation->getDisk();
 
-        if (reserved_disk->exists(relative_data_path + part->name))
+        if (reserved_disk->existsDirectory(relative_data_path + part->name))
             throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Move is not possible: {} already exists",
                 fullPath(reserved_disk, relative_data_path + part->name));
 
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
index b05466ae9b9..8b3c7bdf3fb 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@@ -2,9 +2,9 @@
 
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
-#include <Storages/MergeTree/SimpleMergeSelector.h>
-#include <Storages/MergeTree/AllMergeSelector.h>
-#include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/AllMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/TTLMergeSelector.h>
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/MergeTreeDataWriter.h>
 #include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
@@ -15,6 +15,7 @@
 #include <Storages/MergeTree/MergeProgress.h>
 #include <Storages/MergeTree/MergeTask.h>
 #include <Storages/MergeTree/ActiveDataPartSet.h>
+#include <Storages/MergeTree/MergeSelectors/MergeSelectorFactory.h>
 
 #include <Processors/Transforms/TTLTransform.h>
 #include <Processors/Transforms/TTLCalcTransform.h>
@@ -59,12 +60,16 @@ namespace MergeTreeSetting
     extern const MergeTreeSettingsUInt64 max_parts_to_merge_at_once;
     extern const MergeTreeSettingsInt64 merge_with_recompression_ttl_timeout;
     extern const MergeTreeSettingsInt64 merge_with_ttl_timeout;
+    extern const MergeTreeSettingsUInt64 merge_selector_blurry_base_scale_factor;
+    extern const MergeTreeSettingsUInt64 merge_selector_window_size;
     extern const MergeTreeSettingsBool min_age_to_force_merge_on_partition_only;
     extern const MergeTreeSettingsUInt64 min_age_to_force_merge_seconds;
     extern const MergeTreeSettingsUInt64 number_of_free_entries_in_pool_to_execute_optimize_entire_partition;
     extern const MergeTreeSettingsUInt64 number_of_free_entries_in_pool_to_execute_mutation;
     extern const MergeTreeSettingsUInt64 number_of_free_entries_in_pool_to_lower_max_size_of_merge;
     extern const MergeTreeSettingsBool ttl_only_drop_parts;
+    extern const MergeTreeSettingsUInt64 parts_to_throw_insert;
+    extern const MergeTreeSettingsMergeSelectorAlgorithm merge_selector_algorithm;
 }
 
 namespace ErrorCodes
@@ -469,13 +474,17 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
     if (metadata_snapshot->hasAnyTTL() && merge_with_ttl_allowed && !ttl_merges_blocker.isCancelled())
     {
+        TTLDeleteMergeSelector::Params params_drop
+        {
+            .merge_due_times = next_delete_ttl_merge_times_by_partition,
+            .current_time = current_time,
+            .merge_cooldown_time = (*data_settings)[MergeTreeSetting::merge_with_ttl_timeout],
+            .only_drop_parts = true,
+            .dry_run = dry_run
+        };
+
         /// TTL delete is preferred to recompression
-        TTLDeleteMergeSelector drop_ttl_selector(
-                next_delete_ttl_merge_times_by_partition,
-                current_time,
-                (*data_settings)[MergeTreeSetting::merge_with_ttl_timeout],
-                /*only_drop_parts*/ true,
-                dry_run);
+        TTLDeleteMergeSelector drop_ttl_selector(params_drop);
 
         /// The size of the completely expired part of TTL drop is not affected by the merge pressure and the size of the storage space
         parts_to_merge = drop_ttl_selector.select(parts_ranges, (*data_settings)[MergeTreeSetting::max_bytes_to_merge_at_max_space_in_pool]);
@@ -485,12 +494,15 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
         }
         else if (!(*data_settings)[MergeTreeSetting::ttl_only_drop_parts])
         {
-            TTLDeleteMergeSelector delete_ttl_selector(
-                next_delete_ttl_merge_times_by_partition,
-                current_time,
-                (*data_settings)[MergeTreeSetting::merge_with_ttl_timeout],
-                /*only_drop_parts*/ false,
-                dry_run);
+            TTLDeleteMergeSelector::Params params_delete
+            {
+                .merge_due_times = next_delete_ttl_merge_times_by_partition,
+                .current_time = current_time,
+                .merge_cooldown_time = (*data_settings)[MergeTreeSetting::merge_with_ttl_timeout],
+                .only_drop_parts = false,
+                .dry_run = dry_run
+            };
+            TTLDeleteMergeSelector delete_ttl_selector(params_delete);
 
             parts_to_merge = delete_ttl_selector.select(parts_ranges, max_total_size_to_merge);
             if (!parts_to_merge.empty())
@@ -499,12 +511,16 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
         if (parts_to_merge.empty() && metadata_snapshot->hasAnyRecompressionTTL())
         {
-            TTLRecompressMergeSelector recompress_ttl_selector(
-                    next_recompress_ttl_merge_times_by_partition,
-                    current_time,
-                    (*data_settings)[MergeTreeSetting::merge_with_recompression_ttl_timeout],
-                    metadata_snapshot->getRecompressionTTLs(),
-                    dry_run);
+            TTLRecompressMergeSelector::Params params
+            {
+                .merge_due_times = next_recompress_ttl_merge_times_by_partition,
+                .current_time = current_time,
+                .merge_cooldown_time = (*data_settings)[MergeTreeSetting::merge_with_recompression_ttl_timeout],
+                .recompression_ttls = metadata_snapshot->getRecompressionTTLs(),
+                .dry_run = dry_run,
+            };
+
+            TTLRecompressMergeSelector recompress_ttl_selector(params);
 
             parts_to_merge = recompress_ttl_selector.select(parts_ranges, max_total_size_to_merge);
             if (!parts_to_merge.empty())
@@ -514,17 +530,34 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges(
 
     if (parts_to_merge.empty())
     {
-        SimpleMergeSelector::Settings merge_settings;
-        /// Override value from table settings
-        merge_settings.max_parts_to_merge_at_once = (*data_settings)[MergeTreeSetting::max_parts_to_merge_at_once];
-        if (!(*data_settings)[MergeTreeSetting::min_age_to_force_merge_on_partition_only])
-            merge_settings.min_age_to_force_merge = (*data_settings)[MergeTreeSetting::min_age_to_force_merge_seconds];
+        auto merge_selector_algorithm = (*data_settings)[MergeTreeSetting::merge_selector_algorithm];
 
-        if (aggressive)
-            merge_settings.base = 1;
+        std::any merge_settings;
+        if (merge_selector_algorithm == MergeSelectorAlgorithm::SIMPLE
+            || merge_selector_algorithm == MergeSelectorAlgorithm::STOCHASTIC_SIMPLE)
+        {
+            SimpleMergeSelector::Settings simple_merge_settings;
+            /// Override value from table settings
+            simple_merge_settings.window_size = (*data_settings)[MergeTreeSetting::merge_selector_window_size];
+            simple_merge_settings.max_parts_to_merge_at_once = (*data_settings)[MergeTreeSetting::max_parts_to_merge_at_once];
+            if (!(*data_settings)[MergeTreeSetting::min_age_to_force_merge_on_partition_only])
+                simple_merge_settings.min_age_to_force_merge = (*data_settings)[MergeTreeSetting::min_age_to_force_merge_seconds];
 
-        parts_to_merge = SimpleMergeSelector(merge_settings)
-                            .select(parts_ranges, max_total_size_to_merge);
+            if (aggressive)
+                simple_merge_settings.base = 1;
+
+            if (merge_selector_algorithm == MergeSelectorAlgorithm::STOCHASTIC_SIMPLE)
+            {
+                simple_merge_settings.parts_to_throw_insert = (*data_settings)[MergeTreeSetting::parts_to_throw_insert];
+                simple_merge_settings.blurry_base_scale_factor = (*data_settings)[MergeTreeSetting::merge_selector_blurry_base_scale_factor];
+                simple_merge_settings.use_blurry_base = simple_merge_settings.blurry_base_scale_factor != 0;
+                simple_merge_settings.enable_stochastic_sliding = true;
+            }
+
+            merge_settings = simple_merge_settings;
+        }
+
+        parts_to_merge = MergeSelectorFactory::instance().get(merge_selector_algorithm, merge_settings)->select(parts_ranges, max_total_size_to_merge);
 
         /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl
         if (parts_to_merge.size() == 1)
diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
index ca3fae7ec90..71fcb93f369 100644
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@@ -7,7 +7,7 @@
 #include <Common/ActionBlocker.h>
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MutationCommands.h>
-#include <Storages/MergeTree/TTLMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/TTLMergeSelector.h>
 #include <Storages/MergeTree/MergeAlgorithm.h>
 #include <Storages/MergeTree/MergeType.h>
 #include <Storages/MergeTree/MergeTask.h>
diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
index 3ef36ce364c..ec7aed53f1b 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@@ -65,13 +65,13 @@ void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, cons
     if (isGinFile(name))
         return;
 
-    if (!storage.exists(name))
-        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "{} doesn't exist", fs::path(storage.getRelativePath()) / name);
-
     // This is a projection, no need to check its size.
-    if (storage.isDirectory(name))
+    if (storage.existsDirectory(name))
         return;
 
+    if (!storage.existsFile(name))
+        throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "{} doesn't exist", fs::path(storage.getRelativePath()) / name);
+
     UInt64 size = storage.getFileSize(name);
     if (size != file_size)
         throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
index d628fd6b529..fd46b3b9540 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp
@@ -10,7 +10,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int CANNOT_READ_ALL_DATA;
     extern const int NOT_IMPLEMENTED;
     extern const int NO_FILE_IN_DATA_PART;
     extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
@@ -100,20 +99,17 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl(
     size_t columns_count, const IDataPartStorage & data_part_storage_)
 {
     if (!index_granularity_info_.mark_type.adaptive)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeDataPartCompact cannot be created with non-adaptive granulary.");
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeDataPartCompact cannot be created with non-adaptive granularity.");
 
     auto marks_file_path = index_granularity_info_.getMarksFilePath("data");
-    if (!data_part_storage_.exists(marks_file_path))
+
+    std::unique_ptr<ReadBufferFromFileBase> buffer = data_part_storage_.readFileIfExists(marks_file_path, {}, {}, {});
+    if (!buffer)
         throw Exception(
             ErrorCodes::NO_FILE_IN_DATA_PART,
             "Marks file '{}' doesn't exist",
             std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path));
 
-    size_t marks_file_size = data_part_storage_.getFileSize(marks_file_path);
-
-    std::unique_ptr<ReadBufferFromFileBase> buffer = data_part_storage_.readFile(
-        marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt);
-
     std::unique_ptr<ReadBuffer> marks_reader;
     bool marks_compressed = index_granularity_info_.mark_type.compressed;
     if (marks_compressed)
@@ -129,9 +125,6 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl(
         index_granularity_.appendMark(granularity);
     }
 
-    if (!marks_compressed && index_granularity_.getMarksCount() * index_granularity_info_.getMarkSizeInBytes(columns_count) != marks_file_size)
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all marks from file {}", marks_file_path);
-
     index_granularity_.setInitialized();
 }
 
@@ -188,7 +181,7 @@ void MergeTreeDataPartCompact::doCheckConsistency(bool require_part_metadata) co
         {
             /// count.txt should be present even in non custom-partitioned parts
             std::string file_path = "count.txt";
-            if (!getDataPartStorage().exists(file_path) || getDataPartStorage().getFileSize(file_path) == 0)
+            if (!getDataPartStorage().existsFile(file_path) || getDataPartStorage().getFileSize(file_path) == 0)
                 throw Exception(
                     ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART,
                     "Part {} is broken: {} is empty",
@@ -198,7 +191,7 @@ void MergeTreeDataPartCompact::doCheckConsistency(bool require_part_metadata) co
 
         /// Check that marks are nonempty and have the consistent size with columns number.
 
-        if (getDataPartStorage().exists(mrk_file_name))
+        if (getDataPartStorage().existsFile(mrk_file_name))
         {
             UInt64 file_size = getDataPartStorage().getFileSize(mrk_file_name);
              if (!file_size)
@@ -225,6 +218,11 @@ bool MergeTreeDataPartCompact::isStoredOnRemoteDisk() const
     return getDataPartStorage().isStoredOnRemoteDisk();
 }
 
+bool MergeTreeDataPartCompact::isStoredOnReadonlyDisk() const
+{
+    return getDataPartStorage().isReadonly();
+}
+
 bool MergeTreeDataPartCompact::isStoredOnRemoteDiskWithZeroCopySupport() const
 {
     return getDataPartStorage().supportZeroCopyReplication();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
index 1fb84424774..9512485c54e 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h
@@ -42,6 +42,8 @@ public:
 
     bool isStoredOnDisk() const override { return true; }
 
+    bool isStoredOnReadonlyDisk() const override;
+
     bool isStoredOnRemoteDisk() const override;
 
     bool isStoredOnRemoteDiskWithZeroCopySupport() const override;
@@ -65,7 +67,7 @@ private:
      /// Loads marks index granularity into memory
      void loadIndexGranularity() override;
 
-     /// Compact parts doesn't support per column size, only total size
+     /// Compact parts don't support per column size, only total size
      void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
 };
 
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
index d9d4b6e4b8f..9bbf0ad9739 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp
@@ -121,7 +121,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl(
 
     /// We can use any column, it doesn't matter
     std::string marks_file_path = index_granularity_info_.getMarksFilePath(any_column_file_name);
-    if (!data_part_storage_.exists(marks_file_path))
+    if (!data_part_storage_.existsFile(marks_file_path))
         throw Exception(
             ErrorCodes::NO_FILE_IN_DATA_PART, "Marks file '{}' doesn't exist",
             std::string(fs::path(data_part_storage_.getFullPath()) / marks_file_path));
@@ -188,6 +188,11 @@ bool MergeTreeDataPartWide::isStoredOnRemoteDisk() const
     return getDataPartStorage().isStoredOnRemoteDisk();
 }
 
+bool MergeTreeDataPartWide::isStoredOnReadonlyDisk() const
+{
+    return getDataPartStorage().isReadonly();
+}
+
 bool MergeTreeDataPartWide::isStoredOnRemoteDiskWithZeroCopySupport() const
 {
     return getDataPartStorage().supportZeroCopyReplication();
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h
index 7465e08b7c4..42893f47573 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h
@@ -37,6 +37,8 @@ public:
 
     bool isStoredOnDisk() const override { return true; }
 
+    bool isStoredOnReadonlyDisk() const override;
+
     bool isStoredOnRemoteDisk() const override;
 
     bool isStoredOnRemoteDiskWithZeroCopySupport() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 3e1643152b8..459ddc1ca79 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -528,7 +528,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai
     String bin_path = escaped_name + DATA_FILE_EXTENSION;
 
     /// Some columns may be removed because of ttl. Skip them.
-    if (!getDataPartStorage().exists(mrk_path))
+    if (!getDataPartStorage().existsFile(mrk_path))
         return;
 
     auto mrk_file_in = getDataPartStorage().readFile(mrk_path, {}, std::nullopt, std::nullopt);
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
index 2c487e0ce5a..13918ae8e91 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@@ -1503,7 +1503,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
 {
     for (const auto & index_helper : indices)
     {
-        if (!part->getDataPartStorage().exists(index_helper->getFileName() + ".idx"))
+        if (!part->getDataPartStorage().existsFile(index_helper->getFileName() + ".idx"))
         {
             LOG_DEBUG(log, "File for index {} does not exist. Skipping it.", backQuote(index_helper->index.name));
             return ranges;
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index 5d1bf0294bb..66477f1ea75 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -93,13 +93,13 @@ MergeTreeDeduplicationLog::MergeTreeDeduplicationLog(
     , deduplication_map(deduplication_window)
     , disk(disk_)
 {
-    if (deduplication_window != 0 && !disk->exists(logs_dir))
+    if (deduplication_window != 0 && !disk->existsDirectory(logs_dir))
         disk->createDirectories(logs_dir);
 }
 
 void MergeTreeDeduplicationLog::load()
 {
-    if (!disk->exists(logs_dir))
+    if (!disk->existsDirectory(logs_dir))
         return;
 
     for (auto it = disk->iterateDirectory(logs_dir); it->isValid(); it->next())
@@ -320,7 +320,7 @@ void MergeTreeDeduplicationLog::setDeduplicationWindowSize(size_t deduplication_
     rotate_interval = deduplication_window * 2;
 
     /// If settings was set for the first time with ALTER MODIFY SETTING query
-    if (deduplication_window != 0 && !disk->exists(logs_dir))
+    if (deduplication_window != 0 && !disk->existsDirectory(logs_dir))
         disk->createDirectories(logs_dir);
 
     deduplication_map.setMaxSize(deduplication_window);
diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
index 6aaa5aee14c..168721b3f66 100644
--- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp
@@ -102,9 +102,8 @@ std::optional<MarkType> MergeTreeIndexGranularityInfo::getMarksTypeFromFilesyste
 {
     if (data_part_storage.exists())
         for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
-            if (it->isFile())
-                if (std::string ext = fs::path(it->name()).extension(); MarkType::isMarkFileExtension(ext))
-                    return MarkType(ext);
+            if (std::string ext = fs::path(it->name()).extension(); MarkType::isMarkFileExtension(ext))
+                return MarkType(ext);
     return {};
 }
 
diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
index db0481f4434..07fd873a000 100644
--- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp
@@ -205,9 +205,9 @@ MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition(
 
 MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const
 {
-    if (data_part_storage.exists(relative_path_prefix + ".idx2"))
+    if (data_part_storage.existsFile(relative_path_prefix + ".idx2"))
         return {2, ".idx2"};
-    if (data_part_storage.exists(relative_path_prefix + ".idx"))
+    if (data_part_storage.existsFile(relative_path_prefix + ".idx"))
         return {1, ".idx"};
     return {0 /* unknown */, ""};
 }
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index b36425c0405..9a358cb4b58 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -163,7 +163,7 @@ struct IMergeTreeIndex
     /// Return pair<extension, version>.
     virtual MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const
     {
-        if (data_part_storage.exists(relative_path_prefix + ".idx"))
+        if (data_part_storage.existsFile(relative_path_prefix + ".idx"))
             return {1, ".idx"};
         return {0 /*unknown*/, ""};
     }
diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
index 9f79b282a1c..3c3a6cd84a9 100644
--- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
+++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp
@@ -99,10 +99,10 @@ void MergeTreeMutationEntry::removeFile()
 {
     if (!file_name.empty())
     {
-        if (!disk->exists(path_prefix + file_name))
+        if (!disk->existsFile(path_prefix + file_name))
             return;
 
-        disk->removeFile(path_prefix + file_name);
+        disk->removeFileIfExists(path_prefix + file_name);
         file_name.clear();
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp
index a826cd3b794..48a4a37f444 100644
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@@ -243,7 +243,7 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me
         moving_part.part->assertOnDisk();
         String path_to_clone = fs::path(data->getRelativeDataPath()) / MergeTreeData::MOVING_DIR_NAME / "";
         String relative_path = part->getDataPartStorage().getPartDirectory();
-        if (disk->exists(path_to_clone + relative_path))
+        if (disk->existsFile(path_to_clone + relative_path))
         {
             // If setting is on, we should've already cleaned moving/ dir on startup
             if (data->allowRemoveStaleMovingParts())
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 92dfcdce16c..653973e9db7 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -366,7 +366,7 @@ public:
         bool prefetch_,
         ContextPtr context_,
         LoggerPtr log_)
-        : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)})
+        : ISourceStep(storage_snapshot_->getSampleBlockForColumns(columns_to_read_))
         , type(type_)
         , storage(storage_)
         , storage_snapshot(storage_snapshot_)
@@ -409,7 +409,7 @@ public:
 
             if (mark_ranges && mark_ranges->empty())
             {
-                pipeline.init(Pipe(std::make_unique<NullSource>(output_stream->header)));
+                pipeline.init(Pipe(std::make_unique<NullSource>(*output_header)));
                 return;
             }
         }
diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp
index 5bc84a73334..b1e1a431ff9 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.cpp
+++ b/src/Storages/MergeTree/MergeTreeSettings.cpp
@@ -1,5 +1,6 @@
 #include <Core/BaseSettings.h>
 #include <Core/BaseSettingsProgramOptions.h>
+#include <Core/MergeSelectorAlgorithm.h>
 #include <Core/SettingsChangesHistory.h>
 #include <Disks/DiskFomAST.h>
 #include <Parsers/ASTCreateQuery.h>
@@ -52,6 +53,10 @@ namespace ErrorCodes
     M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \
     M(Bool, use_compact_variant_discriminators_serialization, true, "Use compact version of Variant discriminators serialization.", 0) \
     \
+    /** Merge selector settings. */ \
+    M(UInt64, merge_selector_blurry_base_scale_factor, 0, "Controls when the logic kicks in relatively to the number of parts in partition. The bigger the factor the more belated reaction will be.", 0) \
+    M(UInt64, merge_selector_window_size, 1000, "How many parts to look at once.", 0) \
+    \
     /** Merge settings. */ \
     M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \
     M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \
@@ -93,6 +98,8 @@ namespace ErrorCodes
     M(String, mutation_workload, "", "Name of workload to be used to access resources for mutations", 0) \
     M(Milliseconds, background_task_preferred_step_execution_time_ms, 50, "Target time to execution of one step of merge or mutation. Can be exceeded if one step takes longer time", 0) \
     M(Bool, check_table_structure_completely, false, "Whether to check table structure completely when manipulate partitions. If true, the source and target tables must have identical definitions including projections and secondary indices. Otherwise, the source table's projections and secondary indices must be a subset of those in the target table.", 0) \
+    M(MergeSelectorAlgorithm, merge_selector_algorithm, MergeSelectorAlgorithm::SIMPLE, "The algorithm to select parts for merges assignment", 0) \
+    \
     /** Inserts settings. */ \
     M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
     M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 794a79f89cf..f0712dcf27a 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -42,6 +42,7 @@ struct MutableColumnsAndConstraints;
     M(CLASS_NAME, Int64) \
     M(CLASS_NAME, LightweightMutationProjectionMode) \
     M(CLASS_NAME, MaxThreads) \
+    M(CLASS_NAME, MergeSelectorAlgorithm) \
     M(CLASS_NAME, Milliseconds) \
     M(CLASS_NAME, Seconds) \
     M(CLASS_NAME, String) \
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index cc33628e84d..91df86caa03 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -24,6 +24,7 @@ namespace Setting
     extern const SettingsUInt64 log_queries_cut_to_length;
     extern const SettingsBool move_all_conditions_to_prewhere;
     extern const SettingsBool move_primary_key_columns_to_end_of_prewhere;
+    extern const SettingsBool allow_reorder_prewhere_conditions;
 }
 
 /// Conditions like "x = N" are considered good if abs(N) > threshold.
@@ -100,6 +101,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef()[Setting::move_all_conditions_to_prewhere];
     where_optimizer_context.move_primary_key_columns_to_end_of_prewhere
         = context->getSettingsRef()[Setting::move_primary_key_columns_to_end_of_prewhere];
+    where_optimizer_context.allow_reorder_prewhere_conditions = context->getSettingsRef()[Setting::allow_reorder_prewhere_conditions];
     where_optimizer_context.is_final = select.final();
     where_optimizer_context.use_statistics = context->getSettingsRef()[Setting::allow_statistics_optimize];
 
@@ -134,6 +136,7 @@ MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::op
     where_optimizer_context.move_all_conditions_to_prewhere = context->getSettingsRef()[Setting::move_all_conditions_to_prewhere];
     where_optimizer_context.move_primary_key_columns_to_end_of_prewhere
         = context->getSettingsRef()[Setting::move_primary_key_columns_to_end_of_prewhere];
+    where_optimizer_context.allow_reorder_prewhere_conditions = context->getSettingsRef()[Setting::allow_reorder_prewhere_conditions];
     where_optimizer_context.is_final = is_final;
     where_optimizer_context.use_statistics = context->getSettingsRef()[Setting::allow_statistics_optimize];
 
@@ -379,16 +382,28 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
     for (const auto & condition : where_conditions)
         condition_positions[&condition] = position++;
 
+    auto move_to_prewhere_conditions = [&](Conditions::iterator cond_it)
+    {
+        LOG_TRACE(log, "Condition {} moved to PREWHERE", cond_it->node.getColumnName());
+        if (where_optimizer_context.allow_reorder_prewhere_conditions)
+        {
+            prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, cond_it);
+        }
+        else
+        {
+            /// Keep the original order of conditions in prewhere_conditions.
+            position = condition_positions[&(*cond_it)];
+            auto prewhere_it = prewhere_conditions.begin();
+            while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
+                ++prewhere_it;
+            prewhere_conditions.splice(prewhere_it, where_conditions, cond_it);
+        }
+    };
+
     /// Move condition and all other conditions depend on the same set of columns.
     auto move_condition = [&](Conditions::iterator cond_it)
     {
-        LOG_TRACE(log, "Condition {} moved to PREWHERE", cond_it->node.getColumnName());
-        /// Keep the original order of conditions in prewhere_conditions.
-        position = condition_positions[&(*cond_it)];
-        auto prewhere_it = prewhere_conditions.begin();
-        while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
-            ++prewhere_it;
-        prewhere_conditions.splice(prewhere_it, where_conditions, cond_it);
+        move_to_prewhere_conditions(cond_it);
         total_size_of_moved_conditions += cond_it->columns_size;
         total_number_of_moved_columns += cond_it->table_columns.size();
 
@@ -397,13 +412,7 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
         {
             if (jt->viable && jt->columns_size == cond_it->columns_size && jt->table_columns == cond_it->table_columns)
             {
-                LOG_TRACE(log, "Condition {} moved to PREWHERE", jt->node.getColumnName());
-                /// Keep the original order of conditions in prewhere_conditions.
-                position = condition_positions[&(*jt)];
-                prewhere_it = prewhere_conditions.begin();
-                while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
-                    ++prewhere_it;
-                prewhere_conditions.splice(prewhere_it, where_conditions, jt++);
+                move_to_prewhere_conditions(jt++);
             }
             else
             {
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index a3d035675c6..47575accdca 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -103,6 +103,7 @@ private:
         NameSet array_joined_names;
         bool move_all_conditions_to_prewhere = false;
         bool move_primary_key_columns_to_end_of_prewhere = false;
+        bool allow_reorder_prewhere_conditions = false;
         bool is_final = false;
         bool use_statistics = false;
     };
diff --git a/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp b/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp
index 6eac71eeaf7..2f5b71afe97 100644
--- a/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp
+++ b/src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp
@@ -10,12 +10,11 @@ namespace DB
 
 std::unique_ptr<ReadBuffer> PartMetadataManagerOrdinary::read(const String & file_name) const
 {
-    size_t file_size = part->getDataPartStorage().getFileSize(file_name);
-    auto read_settings = getReadSettings().adjustBufferSize(file_size);
+    constexpr size_t size_hint = 4096; /// These files are small.
+    auto read_settings = getReadSettings().adjustBufferSize(size_hint);
     /// Default read method is pread_threadpool, but there is not much point in it here.
     read_settings.local_fs_method = LocalFSReadMethod::pread;
-
-    auto res = part->getDataPartStorage().readFile(file_name, read_settings, file_size, std::nullopt);
+    auto res = part->getDataPartStorage().readFile(file_name, read_settings, size_hint, std::nullopt);
 
     if (isCompressedFromFileName(file_name))
         return std::make_unique<CompressedReadBufferFromFile>(std::move(res));
@@ -23,10 +22,22 @@ std::unique_ptr<ReadBuffer> PartMetadataManagerOrdinary::read(const String & fil
     return res;
 }
 
+std::unique_ptr<ReadBuffer> PartMetadataManagerOrdinary::readIfExists(const String & file_name) const
+{
+    constexpr size_t size_hint = 4096;  /// These files are small.
+    if (auto res = part->getDataPartStorage().readFileIfExists(file_name, ReadSettings().adjustBufferSize(size_hint), size_hint, std::nullopt))
+    {
+        if (isCompressedFromFileName(file_name))
+            return std::make_unique<CompressedReadBufferFromFile>(std::move(res));
+
+        return res;
+    }
+    return {};
+}
+
 bool PartMetadataManagerOrdinary::exists(const String & file_name) const
 {
-    return part->getDataPartStorage().exists(file_name);
+    return part->getDataPartStorage().existsFile(file_name);
 }
 
-
 }
diff --git a/src/Storages/MergeTree/PartMetadataManagerOrdinary.h b/src/Storages/MergeTree/PartMetadataManagerOrdinary.h
index 428b6d4710a..13a1860c8ac 100644
--- a/src/Storages/MergeTree/PartMetadataManagerOrdinary.h
+++ b/src/Storages/MergeTree/PartMetadataManagerOrdinary.h
@@ -13,6 +13,7 @@ public:
     ~PartMetadataManagerOrdinary() override = default;
 
     std::unique_ptr<ReadBuffer> read(const String & file_name) const override;
+    std::unique_ptr<ReadBuffer> readIfExists(const String & file_name) const override;
 
     bool exists(const String & file_name) const override;
 
diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp
index 4b9ff276bfd..2a1ddf32431 100644
--- a/src/Storages/MergeTree/checkDataPart.cpp
+++ b/src/Storages/MergeTree/checkDataPart.cpp
@@ -191,7 +191,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
     auto ratio_of_defaults = (*data_part->storage.getSettings())[MergeTreeSetting::ratio_of_defaults_for_sparse_serialization];
     SerializationInfoByName serialization_infos;
 
-    if (data_part_storage.exists(IMergeTreeDataPart::SERIALIZATION_FILE_NAME))
+    if (data_part_storage.existsFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME))
     {
         try
         {
@@ -263,7 +263,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
     /// Checksums from the rest files listed in checksums.txt. May be absent. If present, they are subsequently compared with the actual data checksums.
     IMergeTreeDataPart::Checksums checksums_txt;
 
-    if (require_checksums || data_part_storage.exists("checksums.txt"))
+    if (require_checksums || data_part_storage.existsFile("checksums.txt"))
     {
         auto buf = data_part_storage.readFile("checksums.txt", read_settings, std::nullopt, std::nullopt);
         checksums_txt.read(*buf);
@@ -277,7 +277,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
         auto file_name = it->name();
 
         /// We will check projections later.
-        if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
+        if (data_part_storage.existsDirectory(file_name) && file_name.ends_with(".proj"))
         {
             projections_on_disk.insert(file_name);
             continue;
@@ -413,7 +413,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
         for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
         {
             auto file_name = it->name();
-            if (!data_part_storage.isDirectory(file_name))
+            if (!data_part_storage.existsDirectory(file_name))
             {
                 auto remote_paths = data_part_storage.getRemotePaths(file_name);
                 for (const auto & remote_path : remote_paths)
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index b16fa7b88a4..bbf7813351d 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -3,6 +3,7 @@
 #include <Storages/MergeTree/MergeTreeIndices.h>
 #include <Storages/MergeTree/MergeTreeSettings.h>
 #include <Storages/MergeTree/extractZooKeeperPathFromReplicatedTableDef.h>
+#include <Storages/MergeTree/MergeSelectors/registerMergeSelectors.h>
 #include <Storages/StorageFactory.h>
 #include <Storages/StorageMergeTree.h>
 #include <Storages/StorageReplicatedMergeTree.h>
@@ -850,6 +851,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
 
 void registerStorageMergeTree(StorageFactory & factory)
 {
+    /// Part of MergeTree
+    registerMergeSelectors();
+
     StorageFactory::StorageFeatures features{
         .supports_settings = true,
         .supports_skipping_indices = true,
diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
index 98b181d04fa..8f9bd5b19b8 100644
--- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp
@@ -14,6 +14,7 @@
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/ReadHelpers.h>
 #include <Storages/ObjectStorage/DataLakes/Common.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 
 #include <Processors/Formats/Impl/ArrowBufferedStreams.h>
 #include <Processors/Formats/Impl/ParquetBlockInputFormat.h>
@@ -185,7 +186,8 @@ struct DeltaLakeMetadataImpl
         std::set<String> & result)
     {
         auto read_settings = context->getReadSettings();
-        auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
+        StorageObjectStorageSource::ObjectInfo object_info(metadata_file_path);
+        auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
 
         char c;
         while (!buf->eof())
@@ -492,7 +494,8 @@ struct DeltaLakeMetadataImpl
 
         String json_str;
         auto read_settings = context->getReadSettings();
-        auto buf = object_storage->readObject(StoredObject(last_checkpoint_file), read_settings);
+        StorageObjectStorageSource::ObjectInfo object_info(last_checkpoint_file);
+        auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
         readJSONObjectPossiblyInvalid(json_str, *buf);
 
         const JSON json(json_str);
@@ -557,7 +560,8 @@ struct DeltaLakeMetadataImpl
         LOG_TRACE(log, "Using checkpoint file: {}", checkpoint_path.string());
 
         auto read_settings = context->getReadSettings();
-        auto buf = object_storage->readObject(StoredObject(checkpoint_path), read_settings);
+        StorageObjectStorageSource::ObjectInfo object_info(checkpoint_path);
+        auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
         auto format_settings = getFormatSettings(context);
 
         /// Force nullable, because this parquet file for some reason does not have nullable
diff --git a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
index a7740db4e0f..e27612ca4de 100644
--- a/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
+++ b/src/Storages/ObjectStorage/DataLakes/IcebergMetadata.cpp
@@ -26,6 +26,7 @@
 #include <Processors/Formats/Impl/AvroRowInputFormat.h>
 #include <Storages/ObjectStorage/DataLakes/IcebergMetadata.h>
 #include <Storages/ObjectStorage/DataLakes/Common.h>
+#include <Storages/ObjectStorage/StorageObjectStorageSource.h>
 
 #include <Poco/JSON/Array.h>
 #include <Poco/JSON/Object.h>
@@ -387,9 +388,13 @@ DataLakeMetadataPtr IcebergMetadata::create(
     ContextPtr local_context)
 {
     const auto [metadata_version, metadata_file_path] = getMetadataFileAndVersion(object_storage, *configuration);
-    LOG_DEBUG(getLogger("IcebergMetadata"), "Parse metadata {}", metadata_file_path);
-    auto read_settings = local_context->getReadSettings();
-    auto buf = object_storage->readObject(StoredObject(metadata_file_path), read_settings);
+
+    auto log = getLogger("IcebergMetadata");
+    LOG_DEBUG(log, "Parse metadata {}", metadata_file_path);
+
+    StorageObjectStorageSource::ObjectInfo object_info(metadata_file_path);
+    auto buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, local_context, log);
+
     String json_str;
     readJSONObjectPossiblyInvalid(json_str, *buf);
 
@@ -456,8 +461,8 @@ Strings IcebergMetadata::getDataFiles() const
     LOG_TEST(log, "Collect manifest files from manifest list {}", manifest_list_file);
 
     auto context = getContext();
-    auto read_settings = context->getReadSettings();
-    auto manifest_list_buf = object_storage->readObject(StoredObject(manifest_list_file), read_settings);
+    StorageObjectStorageSource::ObjectInfo object_info(manifest_list_file);
+    auto manifest_list_buf = StorageObjectStorageSource::createReadBuffer(object_info, object_storage, context, log);
     auto manifest_list_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*manifest_list_buf));
 
     auto data_type = AvroSchemaReader::avroNodeToDataType(manifest_list_file_reader->dataSchema().root()->leafAt(0));
@@ -487,7 +492,8 @@ Strings IcebergMetadata::getDataFiles() const
     {
         LOG_TEST(log, "Process manifest file {}", manifest_file);
 
-        auto buffer = object_storage->readObject(StoredObject(manifest_file), read_settings);
+        StorageObjectStorageSource::ObjectInfo manifest_object_info(manifest_file);
+        auto buffer = StorageObjectStorageSource::createReadBuffer(manifest_object_info, object_storage, context, log);
         auto manifest_file_reader = std::make_unique<avro::DataFileReaderBase>(std::make_unique<AvroInputStreamReadBufferAdapter>(*buffer));
 
         /// Manifest file should always have table schema in avro file metadata. By now we don't support tables with evolved schema,
diff --git a/src/Storages/ObjectStorage/ReadBufferIterator.cpp b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
index d1ef90518e7..7dd3ad0d79f 100644
--- a/src/Storages/ObjectStorage/ReadBufferIterator.cpp
+++ b/src/Storages/ObjectStorage/ReadBufferIterator.cpp
@@ -150,7 +150,7 @@ std::unique_ptr<ReadBuffer> ReadBufferIterator::recreateLastReadBuffer()
     auto context = getContext();
 
     const auto & path = current_object_info->isArchive() ? current_object_info->getPathToArchive() : current_object_info->getPath();
-    auto impl = object_storage->readObject(StoredObject(path), context->getReadSettings());
+    auto impl = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, context, getLogger("ReadBufferIterator"));
 
     const auto compression_method = chooseCompressionMethod(current_object_info->getFileName(), configuration->compression_method);
     const auto zstd_window = static_cast<int>(context->getSettingsRef()[Setting::zstd_window_log_max]);
@@ -276,11 +276,7 @@ ReadBufferIterator::Data ReadBufferIterator::next()
         else
         {
             compression_method = chooseCompressionMethod(filename, configuration->compression_method);
-            read_buf = object_storage->readObject(
-                StoredObject(current_object_info->getPath()),
-                getContext()->getReadSettings(),
-                {},
-                current_object_info->metadata->size_bytes);
+            read_buf = StorageObjectStorageSource::createReadBuffer(*current_object_info, object_storage, getContext(), getLogger("ReadBufferIterator"));
         }
 
         if (!query_settings.skip_empty_files || !read_buf->eof())
diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
index 7714fb6e86b..579d8e95059 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@@ -152,7 +152,7 @@ public:
         ContextPtr context_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = info_.source_header}, columns_to_read, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(info_.source_header, columns_to_read, query_info_, storage_snapshot_, context_)
         , object_storage(object_storage_)
         , configuration(configuration_)
         , info(std::move(info_))
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
index baf6970517d..52b0f00f71a 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@@ -7,6 +7,9 @@
 #include <Processors/Executors/PullingPipelineExecutor.h>
 #include <Processors/Transforms/ExtractColumnsTransform.h>
 #include <IO/ReadBufferFromFileBase.h>
+#include <Interpreters/Cache/FileCacheFactory.h>
+#include <Interpreters/Cache/FileCache.h>
+#include <Disks/IO/CachedOnDiskReadBufferFromFile.h>
 #include <IO/Archives/createArchiveReader.h>
 #include <Formats/FormatFactory.h>
 #include <Disks/IO/AsynchronousBoundedReadBuffer.h>
@@ -37,6 +40,7 @@ namespace Setting
     extern const SettingsUInt64 max_download_buffer_size;
     extern const SettingsMaxThreads max_threads;
     extern const SettingsBool use_cache_for_count_from_files;
+    extern const SettingsString filesystem_cache_name;
 }
 
 namespace ErrorCodes
@@ -420,44 +424,110 @@ std::future<StorageObjectStorageSource::ReaderHolder> StorageObjectStorageSource
     return create_reader_scheduler([=, this] { return createReader(); }, Priority{});
 }
 
-std::unique_ptr<ReadBuffer> StorageObjectStorageSource::createReadBuffer(
-    const ObjectInfo & object_info, const ObjectStoragePtr & object_storage, const ContextPtr & context_, const LoggerPtr & log)
+std::unique_ptr<ReadBufferFromFileBase> StorageObjectStorageSource::createReadBuffer(
+    ObjectInfo & object_info, const ObjectStoragePtr & object_storage, const ContextPtr & context_, const LoggerPtr & log)
 {
+    const auto & settings = context_->getSettingsRef();
+    const auto & read_settings = context_->getReadSettings();
+
+    const auto filesystem_cache_name = settings[Setting::filesystem_cache_name].value;
+    bool use_cache = read_settings.enable_filesystem_cache
+        && !filesystem_cache_name.empty()
+        && (object_storage->getType() == ObjectStorageType::Azure
+            || object_storage->getType() == ObjectStorageType::S3);
+
+    if (!object_info.metadata)
+    {
+        if (!use_cache)
+        {
+            return object_storage->readObject(StoredObject(object_info.getPath()), read_settings);
+        }
+        object_info.metadata = object_storage->getObjectMetadata(object_info.getPath());
+    }
+
     const auto & object_size = object_info.metadata->size_bytes;
 
-    auto read_settings = context_->getReadSettings().adjustBufferSize(object_size);
+    auto modified_read_settings = read_settings.adjustBufferSize(object_size);
     /// FIXME: Changing this setting to default value breaks something around parquet reading
-    read_settings.remote_read_min_bytes_for_seek = read_settings.remote_fs_buffer_size;
+    modified_read_settings.remote_read_min_bytes_for_seek = modified_read_settings.remote_fs_buffer_size;
     /// User's object may change, don't cache it.
-    read_settings.enable_filesystem_cache = false;
-    read_settings.use_page_cache_for_disks_without_file_cache = false;
-
-    const bool object_too_small = object_size <= 2 * context_->getSettingsRef()[Setting::max_download_buffer_size];
-    const bool use_prefetch = object_too_small
-        && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool
-        && read_settings.remote_fs_prefetch;
-
-    if (use_prefetch)
-        read_settings.remote_read_buffer_use_external_buffer = true;
-
-    auto impl = object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), read_settings);
+    modified_read_settings.use_page_cache_for_disks_without_file_cache = false;
 
     // Create a read buffer that will prefetch the first ~1 MB of the file.
     // When reading lots of tiny files, this prefetching almost doubles the throughput.
     // For bigger files, parallel reading is more useful.
-    if (!use_prefetch)
+    const bool object_too_small = object_size <= 2 * context_->getSettingsRef()[Setting::max_download_buffer_size];
+    const bool use_prefetch = object_too_small
+        && modified_read_settings.remote_fs_method == RemoteFSReadMethod::threadpool
+        && modified_read_settings.remote_fs_prefetch;
+
+    /// FIXME: Use async buffer if use_cache,
+    /// because CachedOnDiskReadBufferFromFile does not work as an independent buffer currently.
+    const bool use_async_buffer = use_prefetch || use_cache;
+
+    if (use_async_buffer)
+        modified_read_settings.remote_read_buffer_use_external_buffer = true;
+
+    std::unique_ptr<ReadBufferFromFileBase> impl;
+    if (use_cache)
+    {
+        if (object_info.metadata->etag.empty())
+        {
+            LOG_WARNING(log, "Cannot use filesystem cache, no etag specified");
+        }
+        else
+        {
+            SipHash hash;
+            hash.update(object_info.getPath());
+            hash.update(object_info.metadata->etag);
+
+            const auto cache_key = FileCacheKey::fromKey(hash.get128());
+            auto cache = FileCacheFactory::instance().get(filesystem_cache_name);
+
+            auto read_buffer_creator = [path = object_info.getPath(), object_size, modified_read_settings, object_storage]()
+            {
+                return object_storage->readObject(StoredObject(path, "", object_size), modified_read_settings);
+            };
+
+            impl = std::make_unique<CachedOnDiskReadBufferFromFile>(
+                object_info.getPath(),
+                cache_key,
+                cache,
+                FileCache::getCommonUser(),
+                read_buffer_creator,
+                modified_read_settings,
+                std::string(CurrentThread::getQueryId()),
+                object_size,
+                /* allow_seeks */true,
+                /* use_external_buffer */true,
+                /* read_until_position */std::nullopt,
+                context_->getFilesystemCacheLog());
+
+            LOG_TEST(log, "Using filesystem cache `{}` (path: {}, etag: {}, hash: {})",
+                     filesystem_cache_name, object_info.getPath(),
+                     object_info.metadata->etag, toString(hash.get128()));
+        }
+    }
+
+    if (!impl)
+        impl = object_storage->readObject(StoredObject(object_info.getPath(), "", object_size), modified_read_settings);
+
+    if (!use_async_buffer)
         return impl;
 
     LOG_TRACE(log, "Downloading object of size {} with initial prefetch", object_size);
 
     auto & reader = context_->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER);
     impl = std::make_unique<AsynchronousBoundedReadBuffer>(
-        std::move(impl), reader, read_settings,
+        std::move(impl), reader, modified_read_settings,
         context_->getAsyncReadCounters(),
         context_->getFilesystemReadPrefetchesLog());
 
-    impl->setReadUntilEnd();
-    impl->prefetch(DEFAULT_PREFETCH_PRIORITY);
+    if (use_prefetch)
+    {
+        impl->setReadUntilEnd();
+        impl->prefetch(DEFAULT_PREFETCH_PRIORITY);
+    }
     return impl;
 }
 
@@ -787,8 +857,7 @@ StorageObjectStorageSource::ArchiveIterator::createArchiveReader(ObjectInfoPtr o
         /* path_to_archive */object_info->getPath(),
         /* archive_read_function */[=, this]()
         {
-            StoredObject stored_object(object_info->getPath(), "", size);
-            return object_storage->readObject(stored_object, getContext()->getReadSettings());
+            return StorageObjectStorageSource::createReadBuffer(*object_info, object_storage, getContext(), logger);
         },
         /* archive_size */size);
 }
diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.h b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
index 8ee3b023638..f313b1bf3c3 100644
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@@ -66,6 +66,11 @@ public:
         const ObjectInfo & object_info,
         bool include_connection_info = true);
 
+    static std::unique_ptr<ReadBufferFromFileBase> createReadBuffer(
+        ObjectInfo & object_info,
+        const ObjectStoragePtr & object_storage,
+        const ContextPtr & context_,
+        const LoggerPtr & log);
 protected:
     const String name;
     ObjectStoragePtr object_storage;
@@ -135,11 +140,6 @@ protected:
     ReaderHolder createReader();
 
     std::future<ReaderHolder> createReaderAsync();
-    static std::unique_ptr<ReadBuffer> createReadBuffer(
-        const ObjectInfo & object_info,
-        const ObjectStoragePtr & object_storage,
-        const ContextPtr & context_,
-        const LoggerPtr & log);
 
     void addNumRowsToCache(const ObjectInfo & object_info, size_t num_rows);
     void lazyInitialize();
diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
index 17cd1b5ac1f..6c8daea2e93 100644
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@@ -234,7 +234,7 @@ public:
         std::shared_ptr<StorageObjectStorageQueue> storage_,
         size_t max_block_size_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp
index 6674202fe81..69c7a1191c8 100644
--- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp
+++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp
@@ -78,12 +78,12 @@ void readFinalFromNestedStorage(
 
     if (!expressions->children.empty())
     {
-        const auto & header = query_plan.getCurrentDataStream().header;
+        const auto & header = query_plan.getCurrentHeader();
         auto syntax = TreeRewriter(context).analyze(expressions, header.getNamesAndTypesList());
         auto actions = ExpressionAnalyzer(expressions, syntax, context).getActionsDAG(true /* add_aliases */, false /* project_result */);
 
         auto step = std::make_unique<FilterStep>(
-            query_plan.getCurrentDataStream(),
+            query_plan.getCurrentHeader(),
             std::move(actions),
             filter_column_name,
             false);
diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
index 617e9331795..86346f25d08 100644
--- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
+++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp
@@ -570,7 +570,7 @@ public:
         const StorageEmbeddedRocksDB & storage_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
         , storage(storage_)
         , max_block_size(max_block_size_)
         , num_streams(num_streams_)
@@ -608,7 +608,7 @@ void StorageEmbeddedRocksDB::read(
 
 void ReadFromEmbeddedRocksDB::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    const auto & sample_block = getOutputStream().header;
+    const auto & sample_block = getOutputHeader();
 
     if (all_scan)
     {
@@ -655,7 +655,7 @@ void ReadFromEmbeddedRocksDB::applyFilters(ActionDAGNodes added_filter_nodes)
 {
     SourceStepWithFilter::applyFilters(std::move(added_filter_nodes));
 
-    const auto & sample_block = getOutputStream().header;
+    const auto & sample_block = getOutputHeader();
     auto primary_key_data_type = sample_block.getByName(storage.primary_key).type;
     std::tie(keys, all_scan) = getFilterKeys(storage.primary_key, primary_key_data_type, filter_actions_dag, context);
 }
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index f56a5dbf704..ea64c8a05e1 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -380,24 +380,24 @@ void StorageBuffer::read(
                       * Instead, we rely on the converting actions at the end of this function.
                       */
                     auto actions = addMissingDefaults(
-                            query_plan.getCurrentDataStream().header,
+                            query_plan.getCurrentHeader(),
                             header_after_adding_defaults.getNamesAndTypesList(),
                             metadata_snapshot->getColumns(),
                             local_context);
 
                     auto adding_missed = std::make_unique<ExpressionStep>(
-                            query_plan.getCurrentDataStream(),
+                            query_plan.getCurrentHeader(),
                             std::move(actions));
 
                     adding_missed->setStepDescription("Add columns missing in destination table");
                     query_plan.addStep(std::move(adding_missed));
 
                     auto actions_dag = ActionsDAG::makeConvertingActions(
-                            query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+                            query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
                             header.getColumnsWithTypeAndName(),
                             ActionsDAG::MatchColumnsMode::Name);
 
-                    auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(actions_dag));
+                    auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(actions_dag));
 
                     converting->setStepDescription("Convert destination table columns to Buffer table structure");
                     query_plan.addStep(std::move(converting));
@@ -508,30 +508,30 @@ void StorageBuffer::read(
         return;
     }
 
-    auto result_header = buffers_plan.getCurrentDataStream().header;
+    auto result_header = buffers_plan.getCurrentHeader();
 
     /// Convert structure from table to structure from buffer.
-    if (!blocksHaveEqualStructure(query_plan.getCurrentDataStream().header, result_header))
+    if (!blocksHaveEqualStructure(query_plan.getCurrentHeader(), result_header))
     {
         auto convert_actions_dag = ActionsDAG::makeConvertingActions(
-                query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+                query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
                 result_header.getColumnsWithTypeAndName(),
                 ActionsDAG::MatchColumnsMode::Name);
 
-        auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(convert_actions_dag));
+        auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(convert_actions_dag));
         query_plan.addStep(std::move(converting));
     }
 
-    DataStreams input_streams;
-    input_streams.emplace_back(query_plan.getCurrentDataStream());
-    input_streams.emplace_back(buffers_plan.getCurrentDataStream());
+    Headers input_headers;
+    input_headers.emplace_back(query_plan.getCurrentHeader());
+    input_headers.emplace_back(buffers_plan.getCurrentHeader());
 
     std::vector<std::unique_ptr<QueryPlan>> plans;
     plans.emplace_back(std::make_unique<QueryPlan>(std::move(query_plan)));
     plans.emplace_back(std::make_unique<QueryPlan>(std::move(buffers_plan)));
     query_plan = QueryPlan();
 
-    auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+    auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
     union_step->setStepDescription("Unite sources from Buffer table");
     query_plan.unitePlans(std::move(union_step), std::move(plans));
 }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index b26abda21f9..b961b856672 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -1328,7 +1328,7 @@ void StorageDistributed::drop()
     auto disks = data_volume->getDisks();
     for (const auto & disk : disks)
     {
-        if (!disk->exists(relative_data_path))
+        if (!disk->existsDirectory(relative_data_path))
         {
             LOG_INFO(log, "Path {} is already removed from disk {}", relative_data_path, disk->getName());
             continue;
diff --git a/src/Storages/StorageDummy.cpp b/src/Storages/StorageDummy.cpp
index 0525a004099..7ff5ca645c5 100644
--- a/src/Storages/StorageDummy.cpp
+++ b/src/Storages/StorageDummy.cpp
@@ -51,10 +51,8 @@ ReadFromDummy::ReadFromDummy(
     const StorageSnapshotPtr & storage_snapshot_,
     const ContextPtr & context_,
     const StorageDummy & storage_)
-    : SourceStepWithFilter(
-        DataStream{
-            .header = SourceStepWithFilter::applyPrewhereActions(
-                storage_snapshot_->getSampleBlockForColumns(column_names_), query_info_.prewhere_info)},
+    : SourceStepWithFilter(SourceStepWithFilter::applyPrewhereActions(
+                storage_snapshot_->getSampleBlockForColumns(column_names_), query_info_.prewhere_info),
         column_names_,
         query_info_,
         storage_snapshot_,
@@ -66,7 +64,7 @@ ReadFromDummy::ReadFromDummy(
 
 void ReadFromDummy::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    Pipe pipe(std::make_shared<SourceFromSingleChunk>(getOutputStream().header));
+    Pipe pipe(std::make_shared<SourceFromSingleChunk>(getOutputHeader()));
     pipeline.init(std::move(pipe));
 }
 
diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp
index 450f4384af4..ac560b58962 100644
--- a/src/Storages/StorageExternalDistributed.cpp
+++ b/src/Storages/StorageExternalDistributed.cpp
@@ -93,12 +93,12 @@ void StorageExternalDistributed::read(
         return;
     }
 
-    DataStreams input_streams;
-    input_streams.reserve(plans.size());
+    Headers input_headers;
+    input_headers.reserve(plans.size());
     for (auto & plan : plans)
-        input_streams.emplace_back(plan->getCurrentDataStream());
+        input_headers.emplace_back(plan->getCurrentHeader());
 
-    auto union_step = std::make_unique<UnionStep>(std::move(input_streams));
+    auto union_step = std::make_unique<UnionStep>(std::move(input_headers));
     query_plan.unitePlans(std::move(union_step), std::move(plans));
 }
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 7485a19e67f..eefd60128a6 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -1562,7 +1562,7 @@ public:
         const bool need_only_count_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
         , storage(std::move(storage_))
         , info(std::move(info_))
         , need_only_count(need_only_count_)
diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp
index 4eb8ca88223..fcd6006a6f9 100644
--- a/src/Storages/StorageFuzzJSON.cpp
+++ b/src/Storages/StorageFuzzJSON.cpp
@@ -149,7 +149,6 @@ void traverse(const ParserImpl::Element & e, std::shared_ptr<JSONNode> node)
 
 std::shared_ptr<JSONNode> parseJSON(const String & json)
 {
-    std::string_view view{json.begin(), json.end()};
     ParserImpl::Element document;
     ParserImpl p;
 
diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp
index 4c319ed9414..41e335e4dc3 100644
--- a/src/Storages/StorageInput.cpp
+++ b/src/Storages/StorageInput.cpp
@@ -64,7 +64,7 @@ public:
         Block sample_block,
         Pipe pipe_,
         StorageInput & storage_)
-        : ISourceStep(DataStream{.header = std::move(sample_block)})
+        : ISourceStep(std::move(sample_block))
         , pipe(std::move(pipe_))
         , storage(storage_)
     {
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index 9f8a8cc1a3c..f1f183ed18b 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -156,7 +156,7 @@ void StorageJoin::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPt
     std::lock_guard mutate_lock(mutate_mutex);
     TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context);
 
-    if (disk->exists(path))
+    if (disk->existsDirectory(path))
         disk->removeRecursive(path);
     else
         LOG_INFO(getLogger("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName());
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 43a8ccf969c..08b4fd74899 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -683,7 +683,7 @@ void StorageLog::loadMarks(const WriteLock & lock /* already locked exclusively
         return;
 
     size_t num_marks = 0;
-    if (disk->exists(marks_file_path))
+    if (disk->existsFile(marks_file_path))
     {
         size_t file_size = disk->getFileSize(marks_file_path);
         if (file_size % (num_data_files * sizeof(Mark)) != 0)
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 117e3e8e7de..e3946648290 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -344,7 +344,7 @@ void StorageMaterializedView::read(
     if (query_plan.isInitialized())
     {
         auto mv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, context, processed_stage);
-        auto target_header = query_plan.getCurrentDataStream().header;
+        auto target_header = query_plan.getCurrentHeader();
 
         /// No need to convert columns that does not exist in MV
         removeNonCommonColumns(mv_header, target_header);
@@ -366,7 +366,7 @@ void StorageMaterializedView::read(
              * In that case underlying table returns joined columns as well.
              */
             converting_actions.removeUnusedActions();
-            auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting_actions));
+            auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(converting_actions));
             converting_step->setStepDescription("Convert target table structure to MaterializedView structure");
             query_plan.addStep(std::move(converting_step));
         }
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index f954859f151..981f133791a 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -408,7 +408,7 @@ ReadFromMerge::ReadFromMerge(
     size_t num_streams,
     StoragePtr storage,
     QueryProcessingStage::Enum processed_stage)
-    : SourceStepWithFilter(DataStream{.header = common_header_}, column_names_, query_info_, storage_snapshot_, context_)
+    : SourceStepWithFilter(common_header_, column_names_, query_info_, storage_snapshot_, context_)
     , required_max_block_size(max_block_size)
     , requested_num_streams(num_streams)
     , common_header(std::move(common_header_))
@@ -421,8 +421,8 @@ ReadFromMerge::ReadFromMerge(
 
 void ReadFromMerge::addFilter(FilterDAGInfo filter)
 {
-    output_stream->header = FilterTransform::transformHeader(
-            output_stream->header,
+    output_header = FilterTransform::transformHeader(
+            *output_header,
             &filter.actions,
             filter.column_name,
             filter.do_remove_column);
@@ -435,7 +435,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
     if (selected_tables.empty())
     {
-        pipeline.init(Pipe(std::make_shared<NullSource>(output_stream->header)));
+        pipeline.init(Pipe(std::make_shared<NullSource>(*output_header)));
         return;
     }
 
@@ -469,7 +469,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
     if (pipelines.empty())
     {
-        pipeline.init(Pipe(std::make_shared<NullSource>(output_stream->header)));
+        pipeline.init(Pipe(std::make_shared<NullSource>(*output_header)));
         return;
     }
 
@@ -673,7 +673,7 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
             for (const auto & filter_info : pushed_down_filters)
             {
                 auto filter_step = std::make_unique<FilterStep>(
-                    child.plan.getCurrentDataStream(),
+                    child.plan.getCurrentHeader(),
                     filter_info.actions.clone(),
                     filter_info.column_name,
                     filter_info.do_remove_column);
@@ -1056,7 +1056,7 @@ void ReadFromMerge::addVirtualColumns(
 
     /// Add virtual columns if we don't already have them.
 
-    Block plan_header = child.plan.getCurrentDataStream().header;
+    Block plan_header = child.plan.getCurrentHeader();
 
     if (context->getSettingsRef()[Setting::allow_experimental_analyzer])
     {
@@ -1074,9 +1074,9 @@ void ReadFromMerge::addVirtualColumns(
             column.column = column.type->createColumnConst(0, Field(database_name));
 
             auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(adding_column_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(adding_column_dag));
             child.plan.addStep(std::move(expression_step));
-            plan_header = child.plan.getCurrentDataStream().header;
+            plan_header = child.plan.getCurrentHeader();
         }
 
         if (has_table_virtual_column && common_header.has(table_column)
@@ -1088,9 +1088,9 @@ void ReadFromMerge::addVirtualColumns(
             column.column = column.type->createColumnConst(0, Field(table_name));
 
             auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(adding_column_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(adding_column_dag));
             child.plan.addStep(std::move(expression_step));
-            plan_header = child.plan.getCurrentDataStream().header;
+            plan_header = child.plan.getCurrentHeader();
         }
     }
     else
@@ -1103,9 +1103,9 @@ void ReadFromMerge::addVirtualColumns(
             column.column = column.type->createColumnConst(0, Field(database_name));
 
             auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(adding_column_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(adding_column_dag));
             child.plan.addStep(std::move(expression_step));
-            plan_header = child.plan.getCurrentDataStream().header;
+            plan_header = child.plan.getCurrentHeader();
         }
 
         if (has_table_virtual_column && common_header.has("_table") && !plan_header.has("_table"))
@@ -1116,9 +1116,9 @@ void ReadFromMerge::addVirtualColumns(
             column.column = column.type->createColumnConst(0, Field(table_name));
 
             auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column));
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(adding_column_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(adding_column_dag));
             child.plan.addStep(std::move(expression_step));
-            plan_header = child.plan.getCurrentDataStream().header;
+            plan_header = child.plan.getCurrentHeader();
         }
     }
 }
@@ -1297,7 +1297,7 @@ void ReadFromMerge::RowPolicyData::addStorageFilter(SourceStepWithFilter * step)
 
 void ReadFromMerge::RowPolicyData::addFilterTransform(QueryPlan & plan) const
 {
-    auto filter_step = std::make_unique<FilterStep>(plan.getCurrentDataStream(), actions_dag.clone(), filter_column_name, true /* remove filter column */);
+    auto filter_step = std::make_unique<FilterStep>(plan.getCurrentHeader(), actions_dag.clone(), filter_column_name, true /* remove filter column */);
     plan.addStep(std::move(filter_step));
 }
 
@@ -1477,7 +1477,7 @@ void ReadFromMerge::convertAndFilterSourceStream(
     ContextPtr local_context,
     ChildPlan & child)
 {
-    Block before_block_header = child.plan.getCurrentDataStream().header;
+    Block before_block_header = child.plan.getCurrentHeader();
 
     auto storage_sample_block = snapshot->metadata->getSampleBlock();
     auto pipe_columns = before_block_header.getNamesAndTypesList();
@@ -1503,7 +1503,7 @@ void ReadFromMerge::convertAndFilterSourceStream(
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected to have 1 output but got {}", nodes.size());
 
             actions_dag.addOrReplaceInOutputs(actions_dag.addAlias(*nodes.front(), alias.name));
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(actions_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(actions_dag));
             child.plan.addStep(std::move(expression_step));
         }
     }
@@ -1518,7 +1518,7 @@ void ReadFromMerge::convertAndFilterSourceStream(
 
             auto dag = std::make_shared<ActionsDAG>(pipe_columns);
             auto actions_dag = expression_analyzer.getActionsDAG(true, false);
-            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(actions_dag));
+            auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(actions_dag));
             child.plan.addStep(std::move(expression_step));
         }
     }
@@ -1532,11 +1532,11 @@ void ReadFromMerge::convertAndFilterSourceStream(
     if (row_policy_data_opt)
         row_policy_data_opt->addFilterTransform(child.plan);
 
-    auto convert_actions_dag = ActionsDAG::makeConvertingActions(child.plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+    auto convert_actions_dag = ActionsDAG::makeConvertingActions(child.plan.getCurrentHeader().getColumnsWithTypeAndName(),
                                                                 header.getColumnsWithTypeAndName(),
                                                                 convert_actions_match_columns_mode);
 
-    auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentDataStream(), std::move(convert_actions_dag));
+    auto expression_step = std::make_unique<ExpressionStep>(child.plan.getCurrentHeader(), std::move(convert_actions_dag));
     child.plan.addStep(std::move(expression_step));
 }
 
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index e33f38bd231..abc66df0d8b 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -141,7 +141,6 @@ StorageMergeTree::StorageMergeTree(
 {
     initializeDirectoriesAndFormatVersion(relative_data_path_, LoadingStrictnessLevel::ATTACH <= mode, date_column_name);
 
-
     loadDataParts(LoadingStrictnessLevel::FORCE_RESTORE <= mode, std::nullopt);
 
     if (mode < LoadingStrictnessLevel::ATTACH && !getDataPartsForInternalUsage().empty() && !isStaticStorage())
@@ -925,8 +924,8 @@ void StorageMergeTree::loadDeduplicationLog()
     auto disk = getDisks()[0];
     std::string path = fs::path(relative_data_path) / "deduplication_logs";
 
-    /// If either there is already a deduplication log, or we will be able to use it.
-    if (!disk->isReadOnly() || disk->exists(path))
+    /// Deduplication log only matters on INSERTs.
+    if (!disk->isReadOnly())
     {
         deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, (*settings)[MergeTreeSetting::non_replicated_deduplication_window], format_version, disk);
         deduplication_log->load();
@@ -2499,7 +2498,7 @@ std::optional<CheckResult> StorageMergeTree::checkDataNext(DataValidationTasksPt
         /// If the checksums file is not present, calculate the checksums and write them to disk.
         static constexpr auto checksums_path = "checksums.txt";
         bool noop;
-        if (part->isStoredOnDisk() && !part->getDataPartStorage().exists(checksums_path))
+        if (part->isStoredOnDisk() && !part->getDataPartStorage().existsFile(checksums_path))
         {
             try
             {
diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp
index 15728290f19..1d641add275 100644
--- a/src/Storages/StorageMergeTreeIndex.cpp
+++ b/src/Storages/StorageMergeTreeIndex.cpp
@@ -260,7 +260,7 @@ public:
         Block sample_block,
         std::shared_ptr<StorageMergeTreeIndex> storage_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -351,7 +351,7 @@ void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline,
         filtered_parts.size(),
         storage->source_table->getStorageID().getNameForLogs());
 
-    pipeline.init(Pipe(std::make_shared<MergeTreeIndexSource>(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks)));
+    pipeline.init(Pipe(std::make_shared<MergeTreeIndexSource>(getOutputHeader(), storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks)));
 }
 
 MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ExpressionActionsPtr & virtual_columns_filter) const
diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp
index 0784154853e..085b60ae9b0 100644
--- a/src/Storages/StoragePostgreSQL.cpp
+++ b/src/Storages/StoragePostgreSQL.cpp
@@ -138,7 +138,7 @@ public:
         String remote_table_schema_,
         String remote_table_name_,
         postgres::ConnectionHolderPtr connection_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
         , logger(getLogger("ReadFromPostgreSQL"))
         , max_block_size(max_block_size_)
         , remote_table_schema(remote_table_schema_)
@@ -169,7 +169,7 @@ public:
             transform_query_limit);
         LOG_TRACE(logger, "Query: {}", query);
 
-        pipeline.init(Pipe(std::make_shared<PostgreSQLSource<>>(std::move(connection), query, getOutputStream().header, max_block_size)));
+        pipeline.init(Pipe(std::make_shared<PostgreSQLSource<>>(std::move(connection), query, getOutputHeader(), max_block_size)));
     }
 
     LoggerPtr logger;
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index de1ecb43bb1..b5b07a129bd 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1623,7 +1623,7 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(
 
         bool found = false;
         for (const DiskPtr & disk : disks)
-            if (disk->exists(fs::path(path) / part_name))
+            if (disk->existsDirectory(fs::path(path) / part_name))
                 found = true;
 
         if (!found)
@@ -9624,7 +9624,7 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
     }
 
     /// If part is temporary refcount file may be absent
-    if (part.getDataPartStorage().exists(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK))
+    if (part.getDataPartStorage().existsFile(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK))
     {
         auto ref_count = part.getDataPartStorage().getRefCount(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK);
         if (ref_count > 0) /// Keep part shard info for frozen backups
@@ -10552,7 +10552,7 @@ bool StorageReplicatedMergeTree::removeSharedDetachedPart(DiskPtr disk, const St
         throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Invalid detached part name {} on disk {}", path, disk->getName());
 
     fs::path checksums = fs::path(path) / IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK;
-    if (disk->exists(checksums))
+    if (disk->existsFile(checksums))
     {
         if (disk->getRefCount(checksums) == 0)
         {
diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp
index 990a2019cc9..2562378e10b 100644
--- a/src/Storages/StorageSet.cpp
+++ b/src/Storages/StorageSet.cpp
@@ -214,7 +214,7 @@ std::optional<UInt64> StorageSet::totalBytes(const Settings &) const
 
 void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &)
 {
-    if (disk->exists(path))
+    if (disk->existsDirectory(path))
         disk->removeRecursive(path);
     else
         LOG_INFO(getLogger("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName());
@@ -237,9 +237,9 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn
 
 void StorageSetOrJoinBase::restore()
 {
-    if (!disk->exists(fs::path(path) / "tmp/"))
+    if (!disk->existsDirectory(fs::path(path) / "tmp"))
     {
-        disk->createDirectories(fs::path(path) / "tmp/");
+        disk->createDirectories(fs::path(path) / "tmp");
         return;
     }
 
@@ -253,7 +253,7 @@ void StorageSetOrJoinBase::restore()
         const auto & name = dir_it->name();
         const auto & file_path = dir_it->path();
 
-        if (disk->isFile(file_path)
+        if (disk->existsFile(file_path)
             && endsWith(name, file_suffix)
             && disk->getFileSize(file_path) > 0)
         {
diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp
index 6fd8bfe5b6b..b16e5c0cc4c 100644
--- a/src/Storages/StorageStripeLog.cpp
+++ b/src/Storages/StorageStripeLog.cpp
@@ -476,7 +476,7 @@ void StorageStripeLog::loadIndices(const WriteLock & lock /* already locked excl
     if (indices_loaded)
         return;
 
-    if (disk->exists(index_file_path))
+    if (disk->existsFile(index_file_path))
     {
         CompressedReadBufferFromFile index_in(disk->readFile(index_file_path, getContext()->getReadSettings().adjustBufferSize(4096)));
         indices.read(index_in);
diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h
index 345dd62c687..f7848ea83b6 100644
--- a/src/Storages/StorageTableFunction.h
+++ b/src/Storages/StorageTableFunction.h
@@ -62,6 +62,7 @@ public:
     /// Avoid loading nested table by returning nullptr/false for all table functions.
     StoragePolicyPtr getStoragePolicy() const override { return nullptr; }
     bool storesDataOnDisk() const override { return false; }
+    bool supportsReplication() const override { return false; }
 
     void startup() override { }
     void shutdown(bool is_drop) override
@@ -101,7 +102,7 @@ public:
                                   processed_stage, max_block_size, num_streams);
         if (add_conversion)
         {
-            auto from_header = query_plan.getCurrentDataStream().header;
+            auto from_header = query_plan.getCurrentHeader();
             auto to_header = getHeaderForProcessingStage(column_names, storage_snapshot,
                                                          query_info, context, processed_stage);
 
@@ -111,7 +112,7 @@ public:
                     ActionsDAG::MatchColumnsMode::Name);
 
             auto step = std::make_unique<ExpressionStep>(
-                query_plan.getCurrentDataStream(),
+                query_plan.getCurrentHeader(),
                 std::move(convert_actions_dag));
 
             step->setStepDescription("Converting columns");
diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp
index 6dea6436a5a..75136ce9a8a 100644
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@@ -1074,7 +1074,7 @@ public:
         std::function<void(std::ostream &)> read_post_data_callback_,
         size_t max_block_size_,
         size_t num_streams_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
         , storage(std::move(storage_))
         , uri_options(uri_options_)
         , info(std::move(info_))
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index bcbcd4f66c8..6794c442f66 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -186,16 +186,16 @@ void StorageView::read(
 
     /// It's expected that the columns read from storage are not constant.
     /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
-    ActionsDAG materializing_actions(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+    ActionsDAG materializing_actions(query_plan.getCurrentHeader().getColumnsWithTypeAndName());
     materializing_actions.addMaterializingOutputActions(/*materialize_sparse=*/ true);
 
-    auto materializing = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(materializing_actions));
+    auto materializing = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(materializing_actions));
     materializing->setStepDescription("Materialize constants after VIEW subquery");
     query_plan.addStep(std::move(materializing));
 
     /// And also convert to expected structure.
     const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names);
-    const auto & header = query_plan.getCurrentDataStream().header;
+    const auto & header = query_plan.getCurrentHeader();
 
     const auto * select_with_union = current_inner_query->as<ASTSelectWithUnionQuery>();
     if (select_with_union && hasJoin(*select_with_union) && changedNullabilityOneWay(header, expected_header))
@@ -212,7 +212,7 @@ void StorageView::read(
             expected_header.getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Name);
 
-    auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(convert_actions_dag));
+    auto converting = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(convert_actions_dag));
     converting->setStepDescription("Convert VIEW subquery result to VIEW table structure");
     query_plan.addStep(std::move(converting));
 }
diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp
index b8f32fcdb83..92f07f7ab27 100644
--- a/src/Storages/System/IStorageSystemOneBlock.cpp
+++ b/src/Storages/System/IStorageSystemOneBlock.cpp
@@ -30,7 +30,7 @@ public:
         std::shared_ptr<IStorageSystemOneBlock> storage_,
         std::vector<UInt8> columns_mask_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -80,7 +80,7 @@ void IStorageSystemOneBlock::read(
 
 void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    const Block & sample_block = getOutputStream().header;
+    const Block & sample_block = getOutputHeader();
     MutableColumns res_columns = sample_block.cloneEmptyColumns();
     const ActionsDAG::Node * predicate = filter ? filter->getOutputs().at(0) : nullptr;
     storage->fillData(res_columns, context, predicate, std::move(columns_mask));
diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp
index 47ca3ad89a8..359d353a78f 100644
--- a/src/Storages/System/StorageSystemColumns.cpp
+++ b/src/Storages/System/StorageSystemColumns.cpp
@@ -337,7 +337,7 @@ public:
         std::vector<UInt8> columns_mask_,
         size_t max_block_size_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -404,7 +404,7 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline,
     Block block_to_filter;
     Storages storages;
     Pipes pipes;
-    auto header = getOutputStream().header;
+    auto header = getOutputHeader();
 
     {
         /// Add `database` column.
diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp
index a41771df406..9c0f3c3dd59 100644
--- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp
+++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp
@@ -197,7 +197,7 @@ public:
         std::vector<UInt8> columns_mask_,
         size_t max_block_size_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -282,7 +282,7 @@ void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder
 
     ColumnPtr & filtered_databases = block.getByPosition(0).column;
     pipeline.init(Pipe(std::make_shared<DataSkippingIndicesSource>(
-        std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases), context)));
+        std::move(columns_mask), getOutputHeader(), max_block_size, std::move(filtered_databases), context)));
 }
 
 }
diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index 0d0ae666c10..ee7336b155b 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -29,7 +29,7 @@ namespace
 void calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from, UInt64 & total_size)
 {
     /// Files or directories of detached part may not exist. Only count the size of existing files.
-    if (disk->isFile(from))
+    if (disk->existsFile(from))
     {
         total_size += disk->getFileSize(from);
     }
@@ -288,7 +288,7 @@ public:
         size_t max_block_size_,
         size_t num_streams_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -365,7 +365,7 @@ void ReadFromSystemDetachedParts::initializePipeline(QueryPipelineBuilder & pipe
 
     for (size_t i = 0; i < num_streams; ++i)
     {
-        auto source = std::make_shared<DetachedPartsSource>(getOutputStream().header, state, columns_mask, max_block_size);
+        auto source = std::make_shared<DetachedPartsSource>(getOutputHeader(), state, columns_mask, max_block_size);
         pipe.addSource(std::move(source));
     }
 
diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp
index 56c5e49b467..f3124a4acb0 100644
--- a/src/Storages/System/StorageSystemDetachedTables.cpp
+++ b/src/Storages/System/StorageSystemDetachedTables.cpp
@@ -209,7 +209,7 @@ ReadFromSystemDetachedTables::ReadFromSystemDetachedTables(
     Block sample_block,
     std::vector<UInt8> columns_mask_,
     size_t max_block_size_)
-    : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+    : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
     , columns_mask(std::move(columns_mask_))
     , max_block_size(max_block_size_)
 {
@@ -231,7 +231,7 @@ void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pip
 {
     auto pipe = Pipe(std::make_shared<DetachedTablesBlockSource>(
         std::move(columns_mask),
-        getOutputStream().header,
+        getOutputHeader(),
         max_block_size,
         std::move(filtered_databases_column),
         std::move(filtered_tables_column),
diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp
index 1b800fd64a9..56a45d7b51d 100644
--- a/src/Storages/System/StorageSystemParts.cpp
+++ b/src/Storages/System/StorageSystemParts.cpp
@@ -141,7 +141,7 @@ void StorageSystemParts::processNextStorage(
         auto part_state = all_parts_state[part_number];
 
         ColumnSize columns_size = part->getTotalColumnsSize();
-        ColumnSize secondary_indexes_size = part->getTotalSeconaryIndicesSize();
+        ColumnSize secondary_indexes_size = part->getTotalSecondaryIndicesSize();
 
         size_t src_index = 0, res_index = 0;
         if (columns_mask[src_index++])
diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp
index 5ba2089ac14..61175e33a23 100644
--- a/src/Storages/System/StorageSystemPartsBase.cpp
+++ b/src/Storages/System/StorageSystemPartsBase.cpp
@@ -259,7 +259,7 @@ ReadFromSystemPartsBase::ReadFromSystemPartsBase(
     std::vector<UInt8> columns_mask_,
     bool has_state_column_)
     : SourceStepWithFilter(
-        DataStream{.header = std::move(sample_block)},
+        std::move(sample_block),
         column_names_,
         query_info_,
         storage_snapshot_,
@@ -328,7 +328,7 @@ void StorageSystemPartsBase::read(
 void ReadFromSystemPartsBase::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     auto stream = storage->getStoragesInfoStream(std::move(filter_by_database), std::move(filter_by_other_columns), context);
-    auto header = getOutputStream().header;
+    auto header = getOutputHeader();
 
     MutableColumns res_columns = header.cloneEmptyColumns();
 
diff --git a/src/Storages/System/StorageSystemProjections.cpp b/src/Storages/System/StorageSystemProjections.cpp
index 7082853e4f9..ae76f11f7cf 100644
--- a/src/Storages/System/StorageSystemProjections.cpp
+++ b/src/Storages/System/StorageSystemProjections.cpp
@@ -185,7 +185,7 @@ public:
         std::vector<UInt8> columns_mask_,
         size_t max_block_size_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -269,7 +269,7 @@ void ReadFromSystemProjections::initializePipeline(QueryPipelineBuilder & pipeli
 
     ColumnPtr & filtered_databases = block.getByPosition(0).column;
     pipeline.init(Pipe(std::make_shared<ProjectionsSource>(
-        std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases), context)));
+        std::move(columns_mask), getOutputHeader(), max_block_size, std::move(filtered_databases), context)));
 }
 
 }
diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
index 783efee4cc4..975f92fb7d8 100644
--- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp
+++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp
@@ -90,10 +90,12 @@ private:
     static bool skipPredicateForShadowDir(const String & local_path)
     {
         // `shadow/{backup_name}/revision.txt` is not an object metadata file
+        // `shadow/../{part_name}/frozen_metadata.txt` is not an object metadata file
         const auto path = fs::path(local_path);
-        return path.filename() == "revision.txt" &&
+        return (path.filename() == "revision.txt" &&
                 path.parent_path().has_parent_path() &&
-                path.parent_path().parent_path().filename() == "shadow";
+                path.parent_path().parent_path().filename() == "shadow") ||
+                path.filename() == "frozen_metadata.txt";
     }
 
     const UInt64 max_block_size;
@@ -130,7 +132,7 @@ public:
         const Block & header,
         UInt64 max_block_size_)
         : SourceStepWithFilter(
-            {.header = header},
+            header,
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -197,7 +199,7 @@ void StorageSystemRemoteDataPaths::read(
 
 void ReadFromSystemRemoteDataPaths::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & /*settings*/)
 {
-    const auto & header = getOutputStream().header;
+    const auto & header = getOutputHeader();
     auto source = std::make_shared<SystemRemoteDataPathsSource>(std::move(disks), header, max_block_size, context);
     source->setStorageLimits(storage_limits);
     processors.emplace_back(source);
@@ -269,14 +271,14 @@ bool SystemRemoteDataPathsSource::nextFile()
         {
             const auto & disk = disks[current_disk].second;
 
-            /// Files or directories can disappear due to concurrent operations
-            if (!disk->exists(current_path))
-                continue;
-
             /// Stop if current path is a file
-            if (disk->isFile(current_path))
+            if (disk->existsFile(current_path))
                 return true;
 
+            /// Files or directories can disappear due to concurrent operations
+            if (!disk->existsFileOrDirectory(current_path))
+                continue;
+
             /// If current path is a directory list its contents and step into it
             std::vector<std::string> children;
             disk->listFiles(current_path, children);
@@ -401,7 +403,7 @@ Chunk SystemRemoteDataPathsSource::generate()
 
             if (cache)
             {
-                auto cache_paths = cache->tryGetCachePaths(cache->createKeyForPath(object.remote_path));
+                auto cache_paths = cache->tryGetCachePaths(FileCacheKey::fromPath(object.remote_path));
                 col_cache_paths->insert(Array(cache_paths.begin(), cache_paths.end()));
             }
             else
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index e86fcac4f3e..70c1f5ef6ff 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -275,7 +275,7 @@ public:
         size_t max_block_size_,
         std::shared_ptr<StorageSystemReplicasImpl> impl_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -421,7 +421,7 @@ private:
 
 void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    auto header = getOutputStream().header;
+    auto header = getOutputHeader();
 
     MutableColumnPtr col_database_mut = ColumnString::create();
     MutableColumnPtr col_table_mut = ColumnString::create();
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index e79c42f00cc..fd0bcccc6b0 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -473,7 +473,7 @@ public:
     {
         Pipe pipe(std::make_shared<StackTraceSource>(
             column_names,
-            getOutputStream().header,
+            getOutputHeader(),
             std::move(filter_actions_dag),
             context,
             max_block_size,
@@ -489,7 +489,7 @@ public:
         Block sample_block,
         size_t max_block_size_,
         LoggerPtr log_)
-        : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_)
+        : SourceStepWithFilter(std::move(sample_block), column_names_, query_info_, storage_snapshot_, context_)
         , column_names(column_names_)
         , max_block_size(max_block_size_)
         , log(log_)
diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp
index a828b1e189f..9d5c68c261f 100644
--- a/src/Storages/System/StorageSystemTables.cpp
+++ b/src/Storages/System/StorageSystemTables.cpp
@@ -728,7 +728,7 @@ public:
         std::vector<UInt8> columns_mask_,
         size_t max_block_size_)
         : SourceStepWithFilter(
-            DataStream{.header = std::move(sample_block)},
+            std::move(sample_block),
             column_names_,
             query_info_,
             storage_snapshot_,
@@ -784,7 +784,7 @@ void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes)
 void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
     Pipe pipe(std::make_shared<TablesBlockSource>(
-        std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context));
+        std::move(columns_mask), getOutputHeader(), max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context));
     pipeline.init(std::move(pipe));
 }
 
diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp
index 8dab26c7eb3..468fa3c58fa 100644
--- a/src/Storages/System/StorageSystemZooKeeper.cpp
+++ b/src/Storages/System/StorageSystemZooKeeper.cpp
@@ -704,7 +704,7 @@ ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(
     const Block & header,
     UInt64 max_block_size_)
     : SourceStepWithFilter(
-        {.header = header},
+        header,
         column_names_,
         query_info_,
         storage_snapshot_,
@@ -716,7 +716,7 @@ ReadFromSystemZooKeeper::ReadFromSystemZooKeeper(
 
 void ReadFromSystemZooKeeper::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
 {
-    const auto & header = getOutputStream().header;
+    const auto & header = getOutputHeader();
     auto source = std::make_shared<SystemZooKeeperSource>(std::move(paths), header, max_block_size, context);
     source->setStorageLimits(storage_limits);
     processors.emplace_back(source);
diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp
index 22eb7aaee9c..e75d6185064 100644
--- a/src/Storages/WindowView/StorageWindowView.cpp
+++ b/src/Storages/WindowView/StorageWindowView.cpp
@@ -1164,13 +1164,13 @@ void StorageWindowView::read(
     if (query_plan.isInitialized())
     {
         auto wv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage);
-        auto target_header = query_plan.getCurrentDataStream().header;
+        auto target_header = query_plan.getCurrentHeader();
 
         if (!blocksHaveEqualStructure(wv_header, target_header))
         {
             auto converting_actions = ActionsDAG::makeConvertingActions(
                 target_header.getColumnsWithTypeAndName(), wv_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name);
-            auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(converting_actions));
+            auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(converting_actions));
             converting_step->setStepDescription("Convert Target table structure to WindowView structure");
             query_plan.addStep(std::move(converting_step));
         }
diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp
index d7387c51878..bbf32c68d19 100644
--- a/src/Storages/buildQueryTreeForShard.cpp
+++ b/src/Storages/buildQueryTreeForShard.cpp
@@ -264,16 +264,16 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node,
     InterpreterSelectQueryAnalyzer interpreter(subquery_node, context_copy, subquery_options);
     auto & query_plan = interpreter.getQueryPlan();
 
-    auto sample_block_with_unique_names = query_plan.getCurrentDataStream().header;
+    auto sample_block_with_unique_names = query_plan.getCurrentHeader();
     makeUniqueColumnNamesInBlock(sample_block_with_unique_names);
 
-    if (!blocksHaveEqualStructure(sample_block_with_unique_names, query_plan.getCurrentDataStream().header))
+    if (!blocksHaveEqualStructure(sample_block_with_unique_names, query_plan.getCurrentHeader()))
     {
         auto actions_dag = ActionsDAG::makeConvertingActions(
-            query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+            query_plan.getCurrentHeader().getColumnsWithTypeAndName(),
             sample_block_with_unique_names.getColumnsWithTypeAndName(),
             ActionsDAG::MatchColumnsMode::Position);
-        auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(actions_dag));
+        auto converting_step = std::make_unique<ExpressionStep>(query_plan.getCurrentHeader(), std::move(actions_dag));
         query_plan.addStep(std::move(converting_step));
     }
 
diff --git a/src/Storages/examples/merge_selector.cpp b/src/Storages/examples/merge_selector.cpp
index a60d1ce402c..b029a51a074 100644
--- a/src/Storages/examples/merge_selector.cpp
+++ b/src/Storages/examples/merge_selector.cpp
@@ -1,7 +1,7 @@
 #include <iostream>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
-#include <Storages/MergeTree/SimpleMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h>
 
 
 /** This program tests merge-selecting algorithm.
diff --git a/src/Storages/examples/merge_selector2.cpp b/src/Storages/examples/merge_selector2.cpp
index 49cbb892dda..291ba19c868 100644
--- a/src/Storages/examples/merge_selector2.cpp
+++ b/src/Storages/examples/merge_selector2.cpp
@@ -2,7 +2,7 @@
 #include <iostream>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/Operators.h>
-#include <Storages/MergeTree/SimpleMergeSelector.h>
+#include <Storages/MergeTree/MergeSelectors/SimpleMergeSelector.h>
 #include <Common/formatReadable.h>
 
 
diff --git a/tests/ci/ci_buddy.py b/tests/ci/ci_buddy.py
index d3c2ec60b96..164af72f4be 100644
--- a/tests/ci/ci_buddy.py
+++ b/tests/ci/ci_buddy.py
@@ -9,6 +9,9 @@ from botocore.exceptions import ClientError
 
 from ci_config import CI
 from ci_utils import WithIter
+from commit_status_helper import get_commit_filtered_statuses, get_repo
+from get_robot_token import get_best_robot_token
+from github_helper import GitHub
 from pr_info import PRInfo
 
 
@@ -52,7 +55,8 @@ class CIBuddy:
         self.pr_number = pr_info.number
         self.head_ref = pr_info.head_ref
         self.commit_url = pr_info.commit_html_url
-        self.sha = pr_info.sha[:10]
+        self.sha_full = pr_info.sha
+        self.sha = self.sha_full[:10]
 
     def check_workflow(self):
         CI.GH.print_workflow_results()
@@ -61,13 +65,25 @@ class CIBuddy:
                 self.post_job_error(
                     f"{CI.Envs.GITHUB_WORKFLOW} Workflow Failed", critical=True
                 )
-        else:
-            res = CI.GH.get_workflow_job_result(CI.GH.ActionsNames.RunConfig)
-            if res != CI.GH.ActionStatuses.SUCCESS:
-                print(f"ERROR: RunConfig status is [{res}] - post report to slack")
-                self.post_job_error(
-                    f"{CI.Envs.GITHUB_WORKFLOW} Workflow Failed", critical=True
-                )
+            return
+
+        res = CI.GH.get_workflow_job_result(CI.GH.ActionsNames.RunConfig)
+        if res == CI.GH.ActionStatuses.SUCCESS:
+            # the normal case
+            return
+
+        gh = GitHub(get_best_robot_token())
+        commit = get_repo(gh).get_commit(self.sha_full)
+        statuses = get_commit_filtered_statuses(commit)
+        if any(True for st in statuses if st.context == CI.StatusNames.PR_CHECK):
+            print(
+                f"INFO: RunConfig status is [{res}], but it "
+                f'contains "{CI.StatusNames.PR_CHECK}" status, do not report error'
+            )
+            return
+
+        print(f"ERROR: RunConfig status is [{res}] - post report to slack")
+        self.post_job_error(f"{CI.Envs.GITHUB_WORKFLOW} Workflow Failed", critical=True)
 
     @staticmethod
     def _get_webhooks():
diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py
index f7b2e12bb63..28323b6786c 100644
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@@ -552,7 +552,7 @@ CHECK_DESCRIPTIONS = [
     CheckDescription(
         CI.StatusNames.PR_CHECK,
         "Checks correctness of the PR's body",
-        lambda x: x == "PR Check",
+        lambda x: x == CI.StatusNames.PR_CHECK,
     ),
     CheckDescription(
         CI.StatusNames.SYNC,
diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py
index 2d3ac4f1485..7f665165c59 100644
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@@ -4,8 +4,7 @@ import re
 import sys
 from typing import Tuple
 
-from github import Github
-
+from build_download_helper import APIException
 from ci_config import CI
 from commit_status_helper import (
     create_ci_report,
@@ -17,6 +16,7 @@ from commit_status_helper import (
 )
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
+from github_helper import GitHub
 from pr_info import PRInfo
 from report import FAILURE, PENDING, SUCCESS, StatusType
 
@@ -46,7 +46,6 @@ TRUSTED_CONTRIBUTORS = {
 }
 
 OK_SKIP_LABELS = {CI.Labels.RELEASE, CI.Labels.PR_BACKPORT, CI.Labels.PR_CHERRYPICK}
-PR_CHECK = "PR Check"
 
 
 LABEL_CATEGORIES = {
@@ -206,11 +205,33 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]:
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
+    fail_early = False
+    try:
+        pr_info = PRInfo(
+            need_orgs=True, pr_event_from_api=True, need_changed_files=True
+        )
+    except APIException as e:
+        logging.exception(
+            "Failed to receive the PRInfo, backport to a simple case and exit with error",
+            exc_info=e,
+        )
+        pr_info = PRInfo()
+        fail_early = True
+
     # The case for special branches like backports and releases without created
     # PRs, like merged backport branches that are reset immediately after merge
-    if pr_info.number == 0:
+    if pr_info.number == 0 or fail_early:
         print("::notice ::Cannot run, no PR exists for the commit")
+        gh = GitHub(get_best_robot_token(), per_page=100)
+        commit = get_commit(gh, pr_info.sha)
+        post_commit_status(
+            commit,
+            FAILURE,
+            "",
+            "No PRs found for the commit, finished early",
+            CI.StatusNames.PR_CHECK,
+            pr_info,
+        )
         sys.exit(1)
 
     can_run, description = should_run_ci_for_pr(pr_info)
@@ -219,7 +240,7 @@ def main():
         sys.exit(0)
 
     description = format_description(description)
-    gh = Github(get_best_robot_token(), per_page=100)
+    gh = GitHub(get_best_robot_token(), per_page=100)
     commit = get_commit(gh, pr_info.sha)
     status = SUCCESS  # type: StatusType
 
@@ -284,7 +305,7 @@ def main():
             status,
             url,
             format_description(description_error),
-            PR_CHECK,
+            CI.StatusNames.PR_CHECK,
             pr_info,
         )
         sys.exit(1)
@@ -309,7 +330,7 @@ def main():
             status,
             "",
             description,
-            PR_CHECK,
+            CI.StatusNames.PR_CHECK,
             pr_info,
         )
         print("::error ::Cannot run")
@@ -321,7 +342,7 @@ def main():
         status,
         "",
         description,
-        PR_CHECK,
+        CI.StatusNames.PR_CHECK,
         pr_info,
     )
 
diff --git a/tests/ci/sqltest.py b/tests/ci/sqltest.py
index 34a204746ae..b3e4233f654 100644
--- a/tests/ci/sqltest.py
+++ b/tests/ci/sqltest.py
@@ -86,17 +86,18 @@ def main():
                 logging.info("Run failed")
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+    report_file_path = workspace_path / "report.html"
 
     paths = {
         "run.log": run_log_path,
         "server.log.zst": workspace_path / "server.log.zst",
         "server.err.log.zst": workspace_path / "server.err.log.zst",
-        "report.html": workspace_path / "report.html",
+        "report.html": report_file_path,
         "test.log": workspace_path / "test.log",
     }
     status = SUCCESS
     description = "See the report"
-    test_results = [TestResult(description, "OK")]
+    test_results = [TestResult(description, "OK", log_files=[report_file_path])]
 
     JobReport(
         description=description,
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 8fddebb8217..1ba88025277 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -292,6 +292,7 @@ def clickhouse_execute_http(
         "http_receive_timeout": timeout,
         "http_send_timeout": timeout,
         "output_format_parallel_formatting": 0,
+        "max_rows_to_read": 0,  # Some queries read from system.text_log which might get too big
     }
     if settings is not None:
         params.update(settings)
diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml
index 091071f0637..74bad7528c8 100644
--- a/tests/config/config.d/storage_conf.xml
+++ b/tests/config/config.d/storage_conf.xml
@@ -19,7 +19,7 @@
                 <type>cache</type>
                 <disk>s3_disk</disk>
                 <path>s3_cache/</path>
-                <max_size>104857600</max_size>
+                <max_size>209715200</max_size>
                 <max_file_segment_size>5Mi</max_file_segment_size>
                 <cache_on_write_operations>1</cache_on_write_operations>
                 <delayed_cleanup_interval_ms>100</delayed_cleanup_interval_ms>
diff --git a/tests/integration/compose/docker_compose_ldap.yml b/tests/integration/compose/docker_compose_ldap.yml
index 440a271272b..f4067575395 100644
--- a/tests/integration/compose/docker_compose_ldap.yml
+++ b/tests/integration/compose/docker_compose_ldap.yml
@@ -1,6 +1,6 @@
 services:
     openldap:
-        image: bitnami/openldap:2.6.6
+        image: bitnami/openldap:2.6.8
         restart: always
         environment:
             LDAP_ROOT: dc=example,dc=org
@@ -13,12 +13,14 @@ services:
             LDAP_PORT_NUMBER: ${LDAP_INTERNAL_PORT:-1389}
         ports:
             - ${LDAP_EXTERNAL_PORT:-1389}:${LDAP_INTERNAL_PORT:-1389}
+        volumes:
+            - /misc/openldap/initialized.sh:/docker-entrypoint-initdb.d/initialized.sh
         healthcheck:
             test: >
-                ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT
+                test -f /tmp/.openldap-initialized
+                && ldapsearch -x -H ldap://localhost:$$LDAP_PORT_NUMBER -D $$LDAP_ADMIN_DN -w $$LDAP_ADMIN_PASSWORD -b $$LDAP_ROOT
                 | grep -c -E "member: cn=j(ohn|ane)doe"
                 | grep 2 >> /dev/null
-                && cat /run/slapd/slapd.pid
             interval: 10s
             retries: 10
             timeout: 2s
diff --git a/tests/integration/compose/docker_compose_postgres.yml b/tests/integration/compose/docker_compose_postgres.yml
index 20738ec7c6d..e072589ac4a 100644
--- a/tests/integration/compose/docker_compose_postgres.yml
+++ b/tests/integration/compose/docker_compose_postgres.yml
@@ -1,7 +1,7 @@
 services:
     postgres1:
         image: postgres
-        command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=4", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_statement=all", "-c", "max_connections=200"]
+        command: ["postgres", "-c", "wal_level=logical", "-c", "max_replication_slots=4", "-c", "logging_collector=on", "-c", "log_directory=/postgres/logs", "-c", "log_statement=all", "-c", "max_connections=200", "-c", "statement_timeout=180000"] # statement_timeout: avoid test timeout because of postgres getting unavailable
         restart: always
         expose:
             - ${POSTGRES_PORT:-5432}
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 9b7fc9dc5d7..dc50a29362a 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -2691,7 +2691,8 @@ class ClickHouseCluster:
                     [
                         "bash",
                         "-c",
-                        f"/opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org"
+                        "test -f /tmp/.openldap-initialized"
+                        f"&& /opt/bitnami/openldap/bin/ldapsearch -x -H ldap://{self.ldap_host}:{self.ldap_port} -D cn=admin,dc=example,dc=org -w clickhouse -b dc=example,dc=org"
                         f'| grep -c -E "member: cn=j(ohn|ane)doe"'
                         f"| grep 2 >> /dev/null",
                     ],
diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py
index f35f303b65f..220ee13cb25 100644
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@@ -1412,7 +1412,7 @@ def test_parallel_read(cluster):
 
     res = azure_query(
         node,
-        f"select count() from azureBlobStorage('{connection_string}', 'cont', 'test_parallel_read.parquet')",
+        f"select count() from azureBlobStorage('{connection_string}', 'cont', 'test_parallel_read.parquet') settings remote_filesystem_read_method='read'",
     )
     assert int(res) == 10000
     assert_logs_contain_with_retry(node, "AzureBlobStorage readBigAt read bytes")
diff --git a/tests/integration/test_storage_delta/configs/config.d/filesystem_caches.xml b/tests/integration/test_storage_delta/configs/config.d/filesystem_caches.xml
new file mode 100644
index 00000000000..e91362640fe
--- /dev/null
+++ b/tests/integration/test_storage_delta/configs/config.d/filesystem_caches.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+  <filesystem_caches>
+    <cache1>
+      <max_size>1Gi</max_size>
+      <path>cache1</path>
+    </cache1>
+  </filesystem_caches>
+</clickhouse>
diff --git a/tests/integration/test_storage_delta/configs/config.d/remote_servers.xml b/tests/integration/test_storage_delta/configs/config.d/remote_servers.xml
new file mode 100644
index 00000000000..84d16206080
--- /dev/null
+++ b/tests/integration/test_storage_delta/configs/config.d/remote_servers.xml
@@ -0,0 +1,16 @@
+<clickhouse>
+    <remote_servers>
+        <cluster>
+            <shard>
+                <replica>
+                    <host>node1</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>node2</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster>
+    </remote_servers>
+</clickhouse>
diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py
index c95193cc765..ec1cfc374d7 100644
--- a/tests/integration/test_storage_delta/test.py
+++ b/tests/integration/test_storage_delta/test.py
@@ -5,6 +5,7 @@ import os
 import random
 import string
 import time
+import uuid
 from datetime import datetime
 
 import delta
@@ -34,6 +35,7 @@ from pyspark.sql.window import Window
 
 import helpers.client
 from helpers.cluster import ClickHouseCluster
+from helpers.network import PartitionManager
 from helpers.s3_tools import (
     get_file_contents,
     list_s3_objects,
@@ -70,10 +72,26 @@ def started_cluster():
         cluster = ClickHouseCluster(__file__, with_spark=True)
         cluster.add_instance(
             "node1",
-            main_configs=["configs/config.d/named_collections.xml"],
+            main_configs=[
+                "configs/config.d/named_collections.xml",
+                "configs/config.d/filesystem_caches.xml",
+                "configs/config.d/remote_servers.xml",
+            ],
             user_configs=["configs/users.d/users.xml"],
             with_minio=True,
             stay_alive=True,
+            with_zookeeper=True,
+        )
+        cluster.add_instance(
+            "node2",
+            main_configs=[
+                "configs/config.d/named_collections.xml",
+                "configs/config.d/remote_servers.xml",
+            ],
+            user_configs=["configs/users.d/users.xml"],
+            with_minio=True,
+            stay_alive=True,
+            with_zookeeper=True,
         )
 
         logging.info("Starting cluster...")
@@ -826,3 +844,161 @@ def test_complex_types(started_cluster):
             f"SELECT metadata FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')"
         )
     )
+
+
+@pytest.mark.parametrize("storage_type", ["s3"])
+def test_filesystem_cache(started_cluster, storage_type):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
+    minio_client = started_cluster.minio_client
+    TABLE_NAME = randomize_table_name("test_filesystem_cache")
+    bucket = started_cluster.minio_bucket
+
+    if not minio_client.bucket_exists(bucket):
+        minio_client.make_bucket(bucket)
+
+    parquet_data_path = create_initial_data_file(
+        started_cluster,
+        instance,
+        "SELECT number, toString(number) FROM numbers(100)",
+        TABLE_NAME,
+    )
+
+    write_delta_from_file(spark, parquet_data_path, f"/{TABLE_NAME}")
+    upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "")
+    create_delta_table(instance, TABLE_NAME, bucket=bucket)
+
+    query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
+    instance.query(
+        f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    count = int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 < int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
+    instance.query(
+        f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    assert count == int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 == int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+
+def test_replicated_database_and_unavailable_s3(started_cluster):
+    node1 = started_cluster.instances["node1"]
+    node2 = started_cluster.instances["node2"]
+
+    DB_NAME = randomize_table_name("db")
+    TABLE_NAME = randomize_table_name("test_replicated_database_and_unavailable_s3")
+    minio_client = started_cluster.minio_client
+    bucket = started_cluster.minio_restricted_bucket
+
+    if not minio_client.bucket_exists(bucket):
+        minio_client.make_bucket(bucket)
+
+    node1.query(
+        f"CREATE DATABASE {DB_NAME} ENGINE=Replicated('/clickhouse/databases/{DB_NAME}', 'shard1', 'node1')"
+    )
+    node2.query(
+        f"CREATE DATABASE {DB_NAME} ENGINE=Replicated('/clickhouse/databases/{DB_NAME}', 'shard1', 'node2')"
+    )
+
+    parquet_data_path = create_initial_data_file(
+        started_cluster,
+        node1,
+        "SELECT number, toString(number) FROM numbers(100)",
+        TABLE_NAME,
+    )
+
+    endpoint_url = f"http://{started_cluster.minio_ip}:{started_cluster.minio_port}"
+    aws_access_key_id = "minio"
+    aws_secret_access_key = "minio123"
+
+    schema = pa.schema(
+        [
+            ("id", pa.int32()),
+            ("name", pa.string()),
+        ]
+    )
+
+    data = [
+        pa.array([1, 2, 3], type=pa.int32()),
+        pa.array(["John Doe", "Jane Smith", "Jake Johnson"], type=pa.string()),
+    ]
+    storage_options = {
+        "AWS_ENDPOINT_URL": endpoint_url,
+        "AWS_ACCESS_KEY_ID": aws_access_key_id,
+        "AWS_SECRET_ACCESS_KEY": aws_secret_access_key,
+        "AWS_ALLOW_HTTP": "true",
+        "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
+    }
+    path = f"s3://root/{TABLE_NAME}"
+    table = pa.Table.from_arrays(data, schema=schema)
+
+    write_deltalake(path, table, storage_options=storage_options)
+
+    with PartitionManager() as pm:
+        pm_rule_reject = {
+            "probability": 1,
+            "destination": node2.ip_address,
+            "source_port": started_cluster.minio_port,
+            "action": "REJECT --reject-with tcp-reset",
+        }
+        pm_rule_drop_all = {
+            "destination": node2.ip_address,
+            "source_port": started_cluster.minio_port,
+            "action": "DROP",
+        }
+        pm._add_rule(pm_rule_reject)
+
+        node1.query(
+            f"""
+            DROP TABLE IF EXISTS {DB_NAME}.{TABLE_NAME};
+            CREATE TABLE {DB_NAME}.{TABLE_NAME}
+            AS deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{TABLE_NAME}' , 'minio', 'minio123')
+            """
+        )
+
+        assert TABLE_NAME in node1.query(
+            f"select name from system.tables where database = '{DB_NAME}'"
+        )
+        assert TABLE_NAME in node2.query(
+            f"select name from system.tables where database = '{DB_NAME}'"
+        )
+
+        replica_path = f"/clickhouse/databases/{DB_NAME}/replicas/shard1|node2"
+        zk = started_cluster.get_kazoo_client("zoo1")
+        zk.set(replica_path + "/digest", "123456".encode())
+
+        assert "123456" in node2.query(
+            f"SELECT * FROM system.zookeeper WHERE path = '{replica_path}'"
+        )
+
+        node2.restart_clickhouse()
+
+        assert "123456" not in node2.query(
+            f"SELECT * FROM system.zookeeper WHERE path = '{replica_path}'"
+        )
diff --git a/tests/integration/test_storage_iceberg/configs/config.d/filesystem_caches.xml b/tests/integration/test_storage_iceberg/configs/config.d/filesystem_caches.xml
new file mode 100644
index 00000000000..e91362640fe
--- /dev/null
+++ b/tests/integration/test_storage_iceberg/configs/config.d/filesystem_caches.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+  <filesystem_caches>
+    <cache1>
+      <max_size>1Gi</max_size>
+      <path>cache1</path>
+    </cache1>
+  </filesystem_caches>
+</clickhouse>
diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py
index a2cbd8c4351..36aba550dbd 100644
--- a/tests/integration/test_storage_iceberg/test.py
+++ b/tests/integration/test_storage_iceberg/test.py
@@ -72,7 +72,10 @@ def started_cluster():
             with_hdfs = False
         cluster.add_instance(
             "node1",
-            main_configs=["configs/config.d/named_collections.xml"],
+            main_configs=[
+                "configs/config.d/named_collections.xml",
+                "configs/config.d/filesystem_caches.xml",
+            ],
             user_configs=["configs/users.d/users.xml"],
             with_minio=True,
             with_azurite=True,
@@ -870,3 +873,66 @@ def test_restart_broken_s3(started_cluster):
     )
 
     assert int(instance.query(f"SELECT count() FROM {TABLE_NAME}")) == 100
+
+
+@pytest.mark.parametrize("storage_type", ["s3"])
+def test_filesystem_cache(started_cluster, storage_type):
+    instance = started_cluster.instances["node1"]
+    spark = started_cluster.spark_session
+    TABLE_NAME = "test_filesystem_cache_" + storage_type + "_" + get_uuid_str()
+
+    write_iceberg_from_df(
+        spark,
+        generate_data(spark, 0, 10),
+        TABLE_NAME,
+        mode="overwrite",
+        format_version="1",
+        partition_by="a",
+    )
+
+    default_upload_directory(
+        started_cluster,
+        storage_type,
+        f"/iceberg_data/default/{TABLE_NAME}/",
+        f"/iceberg_data/default/{TABLE_NAME}/",
+    )
+
+    create_iceberg_table(storage_type, instance, TABLE_NAME, started_cluster)
+
+    query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
+    instance.query(
+        f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    count = int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 < int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    query_id = f"{TABLE_NAME}-{uuid.uuid4()}"
+    instance.query(
+        f"SELECT * FROM {TABLE_NAME} SETTINGS filesystem_cache_name = 'cache1'",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    assert count == int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 == int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
diff --git a/tests/integration/test_storage_s3/configs/filesystem_caches.xml b/tests/integration/test_storage_s3/configs/filesystem_caches.xml
new file mode 100644
index 00000000000..e91362640fe
--- /dev/null
+++ b/tests/integration/test_storage_s3/configs/filesystem_caches.xml
@@ -0,0 +1,8 @@
+<clickhouse>
+  <filesystem_caches>
+    <cache1>
+      <max_size>1Gi</max_size>
+      <path>cache1</path>
+    </cache1>
+  </filesystem_caches>
+</clickhouse>
diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py
index 7032c14d543..ad1842f4509 100644
--- a/tests/integration/test_storage_s3/test.py
+++ b/tests/integration/test_storage_s3/test.py
@@ -56,6 +56,7 @@ def started_cluster():
                 "configs/named_collections.xml",
                 "configs/schema_cache.xml",
                 "configs/blob_log.xml",
+                "configs/filesystem_caches.xml",
             ],
             user_configs=[
                 "configs/access.xml",
@@ -2394,3 +2395,61 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):
     )
 
     assert int(result) == 44
+
+
+def test_filesystem_cache(started_cluster):
+    id = uuid.uuid4()
+    bucket = started_cluster.minio_bucket
+    instance = started_cluster.instances["dummy"]
+    table_name = f"test_filesystem_cache-{uuid.uuid4()}"
+
+    instance.query(
+        f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv', auto, 'x UInt64') select number from numbers(100) SETTINGS s3_truncate_on_insert=1"
+    )
+
+    query_id = f"{table_name}-{uuid.uuid4()}"
+    instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv') SETTINGS filesystem_cache_name = 'cache1', enable_filesystem_cache=1",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    count = int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    assert count == 290
+    assert 0 < int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+
+    instance.query("SYSTEM DROP SCHEMA CACHE")
+
+    query_id = f"{table_name}-{uuid.uuid4()}"
+    instance.query(
+        f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}.tsv') SETTINGS filesystem_cache_name = 'cache1', enable_filesystem_cache=1",
+        query_id=query_id,
+    )
+
+    instance.query("SYSTEM FLUSH LOGS")
+
+    assert count * 2 == int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferReadFromCacheBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 == int(
+        instance.query(
+            f"SELECT ProfileEvents['CachedReadBufferCacheWriteBytes'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
+    assert 0 == int(
+        instance.query(
+            f"SELECT ProfileEvents['S3GetObject'] FROM system.query_log WHERE query_id = '{query_id}' AND type = 'QueryFinish'"
+        )
+    )
diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
index 0f13217c236..917789aec10 100644
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@@ -9,7 +9,7 @@ system flush logs;
 drop table if exists logs;
 create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time;
 
-SET max_rows_to_read = 0;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 
 -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
 -- 0.001 threshold should be always enough, the value was about 0.00025
diff --git a/tests/queries/0_stateless/00357_to_string_complex_types.reference b/tests/queries/0_stateless/00357_to_string_complex_types.reference
index 7fb7cd5132f..dd8039103b4 100644
--- a/tests/queries/0_stateless/00357_to_string_complex_types.reference
+++ b/tests/queries/0_stateless/00357_to_string_complex_types.reference
@@ -22,3 +22,4 @@
 (8,'8',[0,1,2,3,4,5,6,7])	(8,\'8\',[0,1,2,3,4,5,6,7])
 (9,'9',[0,1,2,3,4,5,6,7,8])	(9,\'9\',[0,1,2,3,4,5,6,7,8])
 0A
+2024-01-01 00:00:00	2024-01-01 00:00:00.100	(1,\'2024-01-01 00:00:00.120\')	[\'2024-01-01 00:00:00.123\',\'2024-01-01 00:00:00.123400\']	2024-01-01 00:00:00
diff --git a/tests/queries/0_stateless/00357_to_string_complex_types.sql b/tests/queries/0_stateless/00357_to_string_complex_types.sql
index 6ac2c6ee019..dcfb8db9f66 100644
--- a/tests/queries/0_stateless/00357_to_string_complex_types.sql
+++ b/tests/queries/0_stateless/00357_to_string_complex_types.sql
@@ -5,3 +5,10 @@ SELECT hex(toString(countState())) FROM (SELECT * FROM system.numbers LIMIT 10);
 SELECT CAST((1, 'Hello', toDate('2016-01-01')) AS String), CAST([1, 2, 3] AS String);
 SELECT (number, toString(number), range(number)) AS x, CAST(x AS String) FROM system.numbers LIMIT 10;
 SELECT hex(CAST(countState() AS String)) FROM (SELECT * FROM system.numbers LIMIT 10);
+
+SELECT toDateTime64('2024-01-01 00:00:00.00', 6),
+       cast(toDateTime64('2024-01-01 00:00:00.100', 6) as String),
+       toString((1, toDateTime64('2024-01-01 00:00:00.12000', 6))),
+       toString([toDateTime64('2024-01-01 00:00:00.123000', 6), toDateTime64('2024-01-01 00:00:00.123400', 6)]),
+       JSONExtractString('{"a" : "2024-01-01 00:00:00"}', 'a')::DateTime64(6)
+       SETTINGS date_time_64_output_format_cut_trailing_zeros_align_to_groups_of_thousands = true;
diff --git a/tests/queries/0_stateless/00632_aggregation_window_funnel.reference b/tests/queries/0_stateless/00632_aggregation_window_funnel.reference
index d586e5a4b67..ae087de3842 100644
--- a/tests/queries/0_stateless/00632_aggregation_window_funnel.reference
+++ b/tests/queries/0_stateless/00632_aggregation_window_funnel.reference
@@ -1,64 +1,179 @@
+-- { echoOn }
+drop table if exists funnel_test;
+create table funnel_test (timestamp UInt32, event UInt32) engine=Memory;
+insert into funnel_test values (0,1000),(1,1001),(2,1002),(3,1003),(4,1004),(5,1005),(6,1006),(7,1007),(8,1008);
+select 1 = windowFunnel(10000)(timestamp, event = 1000) from funnel_test;
 1
+select 2 = windowFunnel(10000)(timestamp, event = 1000, event = 1001) from funnel_test;
 1
+select 3 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002) from funnel_test;
 1
+select 4 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1008) from funnel_test;
 1
+select 1 = windowFunnel(1)(timestamp, event = 1000) from funnel_test;
 1
+select 3 = windowFunnel(2)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test;
 1
+select 4 = windowFunnel(3)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test;
 1
+select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test;
 1
+drop table if exists funnel_test2;
+create table funnel_test2 (uid UInt32 default 1,timestamp DateTime, event UInt32) engine=Memory;
+insert into funnel_test2(timestamp, event) values  ('2018-01-01 01:01:01',1001),('2018-01-01 01:01:02',1002),('2018-01-01 01:01:03',1003),('2018-01-01 01:01:04',1004),('2018-01-01 01:01:05',1005),('2018-01-01 01:01:06',1006),('2018-01-01 01:01:07',1007),('2018-01-01 01:01:08',1008);
+select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test2;
 1
+select 2 = windowFunnel(10000)(timestamp, event = 1001, event = 1008) from funnel_test2;
 1
+select 1 = windowFunnel(10000)(timestamp, event = 1008, event = 1001) from funnel_test2;
 1
+select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test2;
 1
+select 4 = windowFunnel(4)(timestamp, event <= 1007, event >= 1002, event <= 1006, event >= 1004) from funnel_test2;
 1
+drop table if exists funnel_test_u64;
+create table funnel_test_u64 (uid UInt32 default 1,timestamp UInt64, event UInt32) engine=Memory;
+insert into funnel_test_u64(timestamp, event) values  ( 1e14 + 1 ,1001),(1e14 + 2,1002),(1e14 + 3,1003),(1e14 + 4,1004),(1e14 + 5,1005),(1e14 + 6,1006),(1e14 + 7,1007),(1e14 + 8,1008);
+select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test_u64;
 1
+select 2 = windowFunnel(10000)(timestamp, event = 1001, event = 1008) from funnel_test_u64;
 1
+select 1 = windowFunnel(10000)(timestamp, event = 1008, event = 1001) from funnel_test_u64;
 1
+select 5 = windowFunnel(4)(timestamp, event = 1003, event = 1004, event = 1005, event = 1006, event = 1007) from funnel_test_u64;
 1
+select 4 = windowFunnel(4)(timestamp, event <= 1007, event >= 1002, event <= 1006, event >= 1004) from funnel_test_u64;
 1
+drop table if exists funnel_test_strict;
+create table funnel_test_strict (timestamp UInt32, event UInt32) engine=Memory;
+insert into funnel_test_strict values (00,1000),(10,1001),(20,1002),(30,1003),(40,1004),(50,1005),(51,1005),(60,1006),(70,1007),(80,1008);
+select 6 = windowFunnel(10000, 'strict_deduplication')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict;
 1
+select 7 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004, event = 1005, event = 1006) from funnel_test_strict;
 1
+drop table funnel_test;
+drop table funnel_test2;
+drop table funnel_test_u64;
+drop table funnel_test_strict;
+drop table if exists funnel_test_strict_order;
+create table funnel_test_strict_order (dt DateTime, user int, event String) engine = MergeTree() partition by dt order by user;
+insert into funnel_test_strict_order values (1, 1, 'a') (2, 1, 'b') (3, 1, 'c');
+insert into funnel_test_strict_order values (1, 2, 'a') (2, 2, 'd') (3, 2, 'b') (4, 2, 'c');
+insert into funnel_test_strict_order values (1, 3, 'a') (2, 3, 'a') (3, 3, 'b') (4, 3, 'b') (5, 3, 'c') (6, 3, 'c');
+insert into funnel_test_strict_order values (1, 4, 'a') (2, 4, 'a') (3, 4, 'a') (4, 4, 'a') (5, 4, 'b') (6, 4, 'b') (7, 4, 'c') (8, 4, 'c');
+insert into funnel_test_strict_order values (1, 5, 'a') (2, 5, 'a') (3, 5, 'b') (4, 5, 'b') (5, 5, 'd') (6, 5, 'c') (7, 5, 'c');
+insert into funnel_test_strict_order values (1, 6, 'c') (2, 6, 'c') (3, 6, 'b') (4, 6, 'b') (5, 6, 'a') (6, 6, 'a');
+select user, windowFunnel(86400)(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
 [1, 3]
 [2, 3]
 [3, 3]
 [4, 3]
 [5, 3]
 [6, 1]
+select user, windowFunnel(86400, 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
 [1, 3]
 [2, 1]
 [3, 3]
 [4, 3]
 [5, 2]
 [6, 1]
+select user, windowFunnel(86400, 'strict_deduplication', 'strict_order')(dt, event='a', event='b', event='c') as s from funnel_test_strict_order group by user order by user format JSONCompactEachRow;
 [1, 3]
 [2, 1]
 [3, 2]
 [4, 2]
 [5, 2]
 [6, 1]
+insert into funnel_test_strict_order values (1, 7, 'a') (2, 7, 'c') (3, 7, 'b');
+select user, windowFunnel(10, 'strict_order')(dt, event = 'a', event = 'b', event = 'c') as s from funnel_test_strict_order where user = 7 group by user format JSONCompactEachRow;
 [7, 1]
+drop table funnel_test_strict_order;
+--https://github.com/ClickHouse/ClickHouse/issues/27469
+drop table if exists strict_BiteTheDDDD;
+create table strict_BiteTheDDDD (ts UInt64, event String) engine = Log();
+insert into strict_BiteTheDDDD values (1,'a') (2,'b') (3,'c') (4,'b') (5,'d');
+select 3 = windowFunnel(86400, 'strict_deduplication')(ts, event='a', event='b', event='c', event='d') from strict_BiteTheDDDD format JSONCompactEachRow;
 [1]
+drop table strict_BiteTheDDDD;
+drop table if exists funnel_test_non_null;
+create table funnel_test_non_null (`dt` DateTime, `u` int, `a` Nullable(String), `b` Nullable(String)) engine = MergeTree() partition by dt order by u;
+insert into funnel_test_non_null values (1, 1, 'a1', 'b1') (2, 1, 'a2', 'b2');
+insert into funnel_test_non_null values (1, 2, 'a1', null) (2, 2, 'a2', null);
+insert into funnel_test_non_null values (1, 3, null, null);
+insert into funnel_test_non_null values (1, 4, null, 'b1') (2, 4, 'a2', null) (3, 4, null, 'b3');
+select u, windowFunnel(86400)(dt, COALESCE(a, '') = 'a1', COALESCE(a, '') = 'a2') as s from funnel_test_non_null group by u order by u format JSONCompactEachRow;
 [1, 2]
 [2, 2]
 [3, 0]
 [4, 0]
+select u, windowFunnel(86400)(dt, COALESCE(a, '') = 'a1', COALESCE(b, '') = 'b2') as s from funnel_test_non_null group by u order by u format JSONCompactEachRow;
 [1, 2]
 [2, 1]
 [3, 0]
 [4, 0]
+select u, windowFunnel(86400)(dt, a is null and b is null) as s from funnel_test_non_null group by u order by u format JSONCompactEachRow;
 [1, 0]
 [2, 0]
 [3, 1]
 [4, 0]
+select u, windowFunnel(86400)(dt, a is null, COALESCE(b, '') = 'b3') as s from funnel_test_non_null group by u order by u format JSONCompactEachRow;
 [1, 0]
 [2, 0]
 [3, 1]
 [4, 2]
+select u, windowFunnel(86400, 'strict_order')(dt, a is null, COALESCE(b, '') = 'b3') as s from funnel_test_non_null group by u order by u format JSONCompactEachRow;
 [1, 0]
 [2, 0]
 [3, 1]
 [4, 1]
+drop table funnel_test_non_null;
+create table funnel_test_strict_increase (timestamp UInt32, event UInt32) engine=Memory;
+insert into funnel_test_strict_increase values (0,1000),(1,1001),(1,1002),(1,1003),(2,1004);
+select 5 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004) from funnel_test_strict_increase;
 1
+select 2 = windowFunnel(10000, 'strict_increase')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004) from funnel_test_strict_increase;
 1
+select 3 = windowFunnel(10000)(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
 1
+select 1 = windowFunnel(10000, 'strict_once')(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
 1
+select 1 = windowFunnel(10000, 'strict_increase')(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
+1
+DROP TABLE IF EXISTS funnel_test2;
+create table funnel_test2 (event_ts UInt32, result String, uid UInt32) engine=Memory;
+insert into funnel_test2 SELECT data.1, data.2, data.3  FROM (
+        SELECT arrayJoin([
+            (100, 'failure', 234),
+            (200, 'success', 345),
+            (210, 'failure', 345),
+            (230, 'success', 345),
+            (250, 'failure', 234),
+            (180, 'failure', 123),
+            (220, 'failure', 123),
+            (250, 'success', 123)
+        ]) data);
+SELECT '-';
+-
+SELECT uid, windowFunnel(200, 'strict_once', 'strict_increase')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+123	3
+234	2
+345	1
+SELECT '-';
+-
+SELECT uid, windowFunnel(200, 'strict_once')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+123	3
+234	2
+345	1
+SELECT '-';
+-
+SELECT uid, windowFunnel(200, 'strict_once', 'strict_deduplication')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+123	3
+234	2
+345	1
+SELECT '-';
+-
+DROP TABLE IF EXISTS funnel_test2;
+drop table funnel_test_strict_increase;
diff --git a/tests/queries/0_stateless/00632_aggregation_window_funnel.sql b/tests/queries/0_stateless/00632_aggregation_window_funnel.sql
index d93f7127e4f..4c0d2140924 100644
--- a/tests/queries/0_stateless/00632_aggregation_window_funnel.sql
+++ b/tests/queries/0_stateless/00632_aggregation_window_funnel.sql
@@ -1,3 +1,4 @@
+-- { echoOn }
 drop table if exists funnel_test;
 
 create table funnel_test (timestamp UInt32, event UInt32) engine=Memory;
@@ -93,6 +94,37 @@ insert into funnel_test_strict_increase values (0,1000),(1,1001),(1,1002),(1,100
 select 5 = windowFunnel(10000)(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004) from funnel_test_strict_increase;
 select 2 = windowFunnel(10000, 'strict_increase')(timestamp, event = 1000, event = 1001, event = 1002, event = 1003, event = 1004) from funnel_test_strict_increase;
 select 3 = windowFunnel(10000)(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
+select 1 = windowFunnel(10000, 'strict_once')(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
 select 1 = windowFunnel(10000, 'strict_increase')(timestamp, event = 1004, event = 1004, event = 1004) from funnel_test_strict_increase;
 
+
+
+DROP TABLE IF EXISTS funnel_test2;
+create table funnel_test2 (event_ts UInt32, result String, uid UInt32) engine=Memory;
+insert into funnel_test2 SELECT data.1, data.2, data.3  FROM (
+        SELECT arrayJoin([
+            (100, 'failure', 234),
+            (200, 'success', 345),
+            (210, 'failure', 345),
+            (230, 'success', 345),
+            (250, 'failure', 234),
+            (180, 'failure', 123),
+            (220, 'failure', 123),
+            (250, 'success', 123)
+        ]) data);
+
+SELECT '-';
+SELECT uid, windowFunnel(200, 'strict_once', 'strict_increase')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+SELECT '-';
+SELECT uid, windowFunnel(200, 'strict_once')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+SELECT '-';
+SELECT uid, windowFunnel(200, 'strict_once', 'strict_deduplication')( toUInt32(event_ts), result='failure', result='failure', result='success' )
+FROM funnel_test2 WHERE event_ts >= 0 AND event_ts <= 300 GROUP BY uid ORDER BY uid;
+SELECT '-';
+
+DROP TABLE IF EXISTS funnel_test2;
+
+
 drop table funnel_test_strict_increase;
diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh
index bd65b937648..55f24b7e888 100755
--- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh
+++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh
@@ -144,8 +144,7 @@ echo 9
 $CLICKHOUSE_CLIENT \
    --server_logs_file=/dev/null \
    --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_date >= yesterday() and message like '%find_me%';
-   select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%';"  --ignore-error 
-
+   select * from system.text_log where event_date >= yesterday() and message like '%TOPSECRET=TOPSECRET%' SETTINGS max_rows_to_read = 0"  --ignore-error
 echo 'finish'
 rm -f "$tmp_file" >/dev/null 2>&1
 rm -f "$tmp_file2" >/dev/null 2>&1
diff --git a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh
index ab1b32ad90e..7fdf144a068 100755
--- a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh
+++ b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh
@@ -6,12 +6,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 ${CLICKHOUSE_CLIENT} --query="SELECT 6103"
 
-for (( i=1; i <= 50; i++ )) 
+for (( i=1; i <= 50; i++ ))
 do
 
 ${CLICKHOUSE_CLIENT} --query="SYSTEM FLUSH LOGS"
 sleep 0.1;
-if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday()") == 1 ]]; then echo 1; exit; fi;
+if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday() SETTINGS max_rows_to_read = 0") == 1 ]]; then echo 1; exit; fi;
 
 done;
 
diff --git a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
index b8998adbc52..787d4567218 100644
--- a/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
+++ b/tests/queries/0_stateless/01165_lost_part_empty_partition.sql
@@ -1,5 +1,7 @@
 -- Tags: zookeeper
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
+
 create table rmt1 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '1') order by n partition by toYYYYMMDD(d);
 create table rmt2 (d DateTime, n int) engine=ReplicatedMergeTree('/test/01165/{database}/rmt', '2') order by n partition by toYYYYMMDD(d);
 
diff --git a/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql b/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql
index dc88d3d48f7..4428d4fbf9c 100644
--- a/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql
+++ b/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql
@@ -2,5 +2,6 @@ SeLeCt 'ab
 cd' /* hello */ -- world
 , 1;
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SYSTEM FLUSH LOGS;
 SELECT extract(message, 'SeL.+?;') FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%SeLeCt \'ab\n%' ORDER BY event_time DESC LIMIT 1 FORMAT TSVRaw;
diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
index eca2db359bb..66cebf16662 100755
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_siz
 # Now wait for cleanup thread
 for _ in {1..60}; do
     $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
-    [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;
+    [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%' SETTINGS max_rows_to_read = 0") -gt $((SCALE - 10)) ]] && break;
     sleep 1
 done
 
diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
index ec318db98bf..b38d59cf6a6 100755
--- a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
+++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh
@@ -72,6 +72,6 @@ ${CLICKHOUSE_CLIENT} --query_id "$query_id" --query "select i from simple where
 # We have to search the server's error log because the following warning message
 # is generated during pipeline destruction and thus is not sent to the client.
 ${CLICKHOUSE_CLIENT} --query "system flush logs"
-if [[ $(${CLICKHOUSE_CLIENT} --query "select count() > 0 from system.text_log where query_id = '$query_id' and level = 'Warning' and message like '%We have query_id removed but it\'s not recorded. This is a bug%' format TSVRaw") == 1 ]]; then echo "We have query_id removed but it's not recorded. This is a bug." >&2; exit 1; fi
+if [[ $(${CLICKHOUSE_CLIENT} --query "select count() > 0 from system.text_log where query_id = '$query_id' and level = 'Warning' and message like '%We have query_id removed but it\'s not recorded. This is a bug%' format TSVRaw SETTINGS max_rows_to_read = 0") == 1 ]]; then echo "We have query_id removed but it's not recorded. This is a bug." >&2; exit 1; fi
 
 ${CLICKHOUSE_CLIENT} --query "drop table simple"
diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
index 8aa1f0b59d3..876cee60baa 100644
--- a/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
+++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.reference
@@ -1,25 +1,25 @@
           Prewhere info
             Prewhere filter
-            Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+            Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
           Prewhere info
             Prewhere filter
-            Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+            Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
           Prewhere info
             Prewhere filter
-            Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+            Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
           Prewhere info
             Prewhere filter
-            Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+            Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
 2
         Filter column: and(equals(k, 3), notEmpty(v)) (removed)
           Prewhere info
             Prewhere filter
-            Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+            Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
 2
               Prewhere info
                 Prewhere filter
-                Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+                Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
               Prewhere info
                 Prewhere filter
-                Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+                Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
 2
diff --git a/tests/queries/0_stateless/02262_column_ttl.sh b/tests/queries/0_stateless/02262_column_ttl.sh
index c620d3b6d9c..30730c2a074 100755
--- a/tests/queries/0_stateless/02262_column_ttl.sh
+++ b/tests/queries/0_stateless/02262_column_ttl.sh
@@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -m -q "
     -- OPTIMIZE TABLE x FINAL will be done in background
     -- attach to it's log, via table UUID in query_id (see merger/mutator code).
     create materialized view this_text_log engine=Memory() as
-    select * from system.text_log where query_id like '%${ttl_02262_uuid}%';
+    select * from system.text_log where query_id like '%${ttl_02262_uuid}%' SETTINGS max_rows_to_read = 0;
 
     optimize table ttl_02262 final;
     system flush logs;
diff --git a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.reference b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.reference
index d167d905636..9567f89b091 100644
--- a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.reference
+++ b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.reference
@@ -2,3 +2,5 @@
 {"query":"show databases format Null\n  "}
 {"query":"insert into opentelemetry_test values","read_rows":"3","written_rows":"3"}
 {"query":"select * from opentelemetry_test format Null\n  ","read_rows":"3","written_rows":""}
+{"query":"SELECT * FROM opentelemetry_test FORMAT Null\n  ","read_rows":"3","written_rows":""}
+{"min_compress_block_size":"present","max_block_size":"present","max_execution_time":"present"}
diff --git a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh
index 91e85eabcb8..59310b970d1 100755
--- a/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh
+++ b/tests/queries/0_stateless/02421_simple_queries_for_opentelemetry.sh
@@ -44,6 +44,37 @@ ${CLICKHOUSE_CLIENT} -q "
     ;"
 }
 
+function check_query_settings()
+{
+result=$(${CLICKHOUSE_CLIENT} -q "
+    SYSTEM FLUSH LOGS;
+    SELECT attribute['clickhouse.setting.min_compress_block_size'],
+           attribute['clickhouse.setting.max_block_size'],
+           attribute['clickhouse.setting.max_execution_time']
+    FROM system.opentelemetry_span_log
+    WHERE finish_date                      >= yesterday()
+    AND   operation_name                   = 'query'
+    AND   attribute['clickhouse.query_id'] = '${1}'
+    FORMAT JSONEachRow;
+  ")
+
+    local min_present="not found"
+    local max_present="not found"
+    local execution_time_present="not found"
+
+    if [[ $result == *"min_compress_block_size"* ]]; then
+       min_present="present"
+    fi
+    if [[ $result == *"max_block_size"* ]]; then
+       max_present="present"
+    fi
+    if [[ $result == *"max_execution_time"* ]]; then
+       execution_time_present="present"
+    fi
+
+    echo "{\"min_compress_block_size\":\"$min_present\",\"max_block_size\":\"$max_present\",\"max_execution_time\":\"$execution_time_present\"}"
+}
+
 #
 # Set up
 #
@@ -73,6 +104,11 @@ query_id=$(${CLICKHOUSE_CLIENT} -q "select generateUUIDv4()");
 execute_query $query_id 'select * from opentelemetry_test format Null'
 check_query_span $query_id
 
+# Test 5: A normal select query with a setting
+query_id=$(${CLICKHOUSE_CLIENT} -q "SELECT generateUUIDv4() SETTINGS max_execution_time=3600")
+execute_query "$query_id" 'SELECT * FROM opentelemetry_test FORMAT Null'
+check_query_span "$query_id"
+check_query_settings "$query_id" "max_execution_time"
 
 #
 # Tear down
diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
index 948ec9e9e8a..0e7a14ddf99 100644
--- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
+++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql
@@ -64,5 +64,6 @@ drop table rmt;
 drop table rmt2;
 
 system flush logs;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 select count() > 0 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts from filesystem (concurrently): Parts:%';
 select count() > 1, countDistinct(thread_id) > 1 from system.text_log where yesterday() <= event_date and logger_name like '%' || currentDatabase() || '%' and message like '%Removing % parts in blocks range%';
diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
index 1548bef857f..f3f8886f691 100755
--- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
+++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh
@@ -99,4 +99,4 @@ $CLICKHOUSE_CLIENT -q 'system flush logs'
 $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and (
   message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes',
   'Query was cancelled or a client has unexpectedly dropped the connection') or
-  message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')"
+  message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%') SETTINGS max_rows_to_read = 0"
diff --git a/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh
index 44076aeba18..af6d15b60e6 100755
--- a/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh
+++ b/tests/queries/0_stateless/02437_drop_mv_restart_replicas.sh
@@ -53,7 +53,7 @@ export -f thread_restart;
 
 TIMEOUT=15
 
-timeout $TIMEOUT bash -c thread_ddl 2>&1| grep -Fa "Exception: " | grep -Fv -e "TABLE_IS_DROPPED" -e "UNKNOWN_TABLE" -e "DATABASE_NOT_EMPTY" &
+timeout $TIMEOUT bash -c thread_ddl 2>&1| grep -Fa "Exception: " | grep -Fv -e "TABLE_IS_DROPPED" -e "UNKNOWN_TABLE" -e "DATABASE_NOT_EMPTY" -e "TABLE_IS_BEING_RESTARTED" &
 timeout $TIMEOUT bash -c thread_insert 2> /dev/null &
 timeout $TIMEOUT bash -c thread_restart 2>&1| grep -Fa "Exception: " | grep -Fv -e "is currently dropped or renamed" -e "is being dropped or detached" &
 
diff --git a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
index 0597ac10cd7..c89f92ac081 100644
--- a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
+++ b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql
@@ -15,6 +15,7 @@ alter table t materialize projection p_norm settings mutations_sync = 1;
 
 SYSTEM FLUSH LOGS;
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT * FROM system.text_log WHERE event_time >= now() - 30 and level == 'Error' and message like '%BAD_DATA_PART_NAME%'and message like '%p_norm%';
 
 DROP TABLE IF EXISTS t;
diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference
deleted file mode 100644
index d00491fd7e5..00000000000
--- a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference
+++ /dev/null
@@ -1 +0,0 @@
-1
diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
deleted file mode 100755
index 570851af450..00000000000
--- a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CUR_DIR"/../shell_config.sh
-
-# This is the regression test for clickhouse-local, that may use already freed
-# context from the suggestion thread on error.
-
-grep_options=(
-    -e ^$
-    -e 'Cannot create file: /no/such/directory'
-    -e 'Cannot load data for command line suggestions:'
-    -e 'ClickHouse local version'
-    -e 'Empty query'
-)
-
-ASAN_OPTIONS=$ASAN_OPTIONS:exitcode=3 $CLICKHOUSE_LOCAL --history_file /no/such/directory |& grep -v "${grep_options[@]}"
-# on sanitizer error the code will be not 1, but 3
-echo $?
diff --git a/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh b/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh
index d93fe59134f..e1bd64e19ae 100755
--- a/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh
+++ b/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh
@@ -12,5 +12,5 @@ do
     query_id=$(echo "select queryID() from (select sum(s), k from remote('127.0.0.{1,2}', view(select sum(number) s, bitAnd(number, 3) k from numbers_mt(1000000) group by k)) group by k) limit 1 settings group_by_two_level_threshold=1, max_threads=3, prefer_localhost_replica=1" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- 2>&1)
 
     ${CLICKHOUSE_CLIENT} --query="system flush logs"
-    ${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= yesterday() and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%'" | grep -P '^6$' && break;
+    ${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= yesterday() and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%' SETTINGS max_rows_to_read = 0" | grep -P '^6$' && break;
 done
diff --git a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
index d7c8944b89d..23417e5366b 100755
--- a/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
+++ b/tests/queries/0_stateless/02570_fallback_from_async_insert.sh
@@ -48,6 +48,7 @@ $CLICKHOUSE_CLIENT --query "
     SELECT 'id_' || splitByChar('_', query_id)[1] AS id FROM system.text_log
     WHERE query_id LIKE '%$query_id_suffix' AND message LIKE '%$message%'
     ORDER BY id
+    SETTINGS max_rows_to_read = 0
 "
 
 $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t_async_insert_fallback"
diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql
index 6b0677a80ae..8b6574562bf 100644
--- a/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql
+++ b/tests/queries/0_stateless/02581_share_big_sets_between_multiple_mutations_tasks_long.sql
@@ -30,6 +30,7 @@ SELECT count() FROM 02581_trips SETTINGS select_sequential_consistency = 1;
 DELETE FROM 02581_trips                        WHERE id IN (SELECT (number*10 + 9)::UInt32 FROM numbers(10000000)) SETTINGS lightweight_deletes_sync = 2;
 SELECT count(), _part from 02581_trips WHERE description = '' GROUP BY _part ORDER BY _part SETTINGS select_sequential_consistency=1;
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SYSTEM FLUSH LOGS;
 -- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
 -- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
diff --git a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
index 091a9c8171d..b1facadc790 100644
--- a/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
+++ b/tests/queries/0_stateless/02581_share_big_sets_between_mutation_tasks_long.sql
@@ -58,6 +58,7 @@ WHERE
 SETTINGS mutations_sync=2;
 SELECT count() from 02581_trips WHERE description = '';
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SYSTEM FLUSH LOGS;
 -- Check that in every mutation there were parts that built sets (log messages like 'Created Set with 10000000 entries from 10000000 rows in 0.388989187 sec.' )
 -- and parts that shared sets (log messages like 'Got set from cache in 0.388930505 sec.' )
diff --git a/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql
index 781030ef7b4..1dac8eb23d0 100644
--- a/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql
+++ b/tests/queries/0_stateless/02675_sparse_columns_clear_column.sql
@@ -30,5 +30,5 @@ ORDER BY column;
 DROP TABLE t_sparse_columns_clear SYNC;
 
 SYSTEM FLUSH LOGS;
-
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count(), groupArray(message) FROM system.text_log WHERE logger_name LIKE '%' || currentDatabase() || '.t_sparse_columns_clear' || '%' AND level = 'Error';
diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql
index 52d55bdbe11..dccac8fceb4 100644
--- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql
+++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql
@@ -1,5 +1,6 @@
 -- Tags: no-parallel
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
 SYSTEM FLUSH LOGS;
 
diff --git a/tests/queries/0_stateless/02813_starting_in_text_log.sql b/tests/queries/0_stateless/02813_starting_in_text_log.sql
index e007f58189e..a320ab7eead 100644
--- a/tests/queries/0_stateless/02813_starting_in_text_log.sql
+++ b/tests/queries/0_stateless/02813_starting_in_text_log.sql
@@ -1,2 +1,3 @@
 SYSTEM FLUSH LOGS;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count() > 0 FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%Starting ClickHouse%';
diff --git a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference
index 254e59d479a..b91a4dd2f68 100644
--- a/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference
+++ b/tests/queries/0_stateless/02842_move_pk_to_end_of_prewhere.reference
@@ -1,15 +1,15 @@
           Prewhere filter
-          Prewhere filter column: and(equals(k, 3), notEmpty(v)) (removed)
+          Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
 1
           Prewhere filter
-          Prewhere filter column: and(equals(a, 3), equals(b, \'3\'), less(c, 20), like(d, \'%es%\')) (removed)
+          Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), equals(b, \'3\'), equals(a, 3)) (removed)
 1
           Prewhere filter
-          Prewhere filter column: and(equals(a, 3), less(c, 20), greater(c, 0), like(d, \'%es%\')) (removed)
+          Prewhere filter column: and(like(d, \'%es%\'), less(c, 20), greater(c, 0), equals(a, 3)) (removed)
 1
           Prewhere filter
-          Prewhere filter column: and(equals(b, \'3\'), less(c, 20), like(d, \'%es%\')) (removed)
+          Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), less(c, 20)) (removed)
 1
           Prewhere filter
-          Prewhere filter column: and(equals(a, 3), equals(b, \'3\'), like(d, \'%es%\')) (removed)
+          Prewhere filter column: and(like(d, \'%es%\'), equals(b, \'3\'), equals(a, 3)) (removed)
 1
diff --git a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference
index c4ef127ebc0..eb5e685597c 100644
--- a/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference
+++ b/tests/queries/0_stateless/02864_statistics_delayed_materialization_in_merge.reference
@@ -5,8 +5,8 @@ After insert
 After merge
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
 After truncate, insert, and materialize
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
diff --git a/tests/queries/0_stateless/02864_statistics_usage.reference b/tests/queries/0_stateless/02864_statistics_usage.reference
index fd4181a59c3..a9f669b88c1 100644
--- a/tests/queries/0_stateless/02864_statistics_usage.reference
+++ b/tests/queries/0_stateless/02864_statistics_usage.reference
@@ -1,7 +1,7 @@
 After insert
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
 After drop statistic
         Prewhere info
           Prewhere filter
@@ -9,12 +9,12 @@ After drop statistic
 After add and materialize statistic
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
 After merge
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(b, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(b, 10_UInt8)) (removed)
 After rename
         Prewhere info
           Prewhere filter
-          Prewhere filter column: and(less(c, 10_UInt8), less(a, 10_UInt8)) (removed)
+          Prewhere filter column: and(less(a, 10_UInt8), less(c, 10_UInt8)) (removed)
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
index 51301fe4ea6..5b154686d88 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_cluster_all_replicas.sql
@@ -8,6 +8,7 @@ SELECT count() FROM clusterAllReplicas('test_cluster_two_shard_three_replicas_lo
 SYSTEM FLUSH LOGS;
 
 SET enable_parallel_replicas=0;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count() > 0 FROM system.text_log
 WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_190aed82-2423-413b-ad4c-24dcca50f65b')
     AND message LIKE '%Parallel reading from replicas is disabled for cluster%';
diff --git a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
index 259ed02b2a3..08afeb97708 100644
--- a/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
+++ b/tests/queries/0_stateless/02875_parallel_replicas_remote.sql
@@ -8,6 +8,7 @@ SELECT count() FROM remote('127.0.0.{1..6}', currentDatabase(), tt) settings log
 SYSTEM FLUSH LOGS;
 
 SET enable_parallel_replicas=0;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count() > 0 FROM system.text_log
 WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment = '02875_89f3c39b-1919-48cb-b66e-ef9904e73146')
     AND message LIKE '%Parallel reading from replicas is disabled for cluster%';
diff --git a/tests/queries/0_stateless/02882_replicated_fetch_checksums_doesnt_match.sql b/tests/queries/0_stateless/02882_replicated_fetch_checksums_doesnt_match.sql
index dc500aaff3b..a745625f17a 100644
--- a/tests/queries/0_stateless/02882_replicated_fetch_checksums_doesnt_match.sql
+++ b/tests/queries/0_stateless/02882_replicated_fetch_checksums_doesnt_match.sql
@@ -34,6 +34,7 @@ SELECT count() FROM checksums_r3;
 
 SYSTEM FLUSH LOGS;
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT * FROM system.text_log WHERE event_time >= now() - INTERVAL 120 SECOND and level == 'Error' and message like '%CHECKSUM_DOESNT_MATCH%' and logger_name like ('%' || currentDatabase() || '%checksums_r%');
 
 DROP TABLE IF EXISTS checksums_r3;
diff --git a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
index cbb4ff5334c..07c5a8329be 100644
--- a/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
+++ b/tests/queries/0_stateless/02898_parallel_replicas_progress_bar.sql
@@ -21,6 +21,7 @@ SELECT count(), min(k), max(k), avg(k) FROM t1 SETTINGS log_comment='02898_defau
 
 -- check logs
 SYSTEM FLUSH LOGS;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count() > 0 FROM system.text_log
 WHERE query_id in (select query_id from system.query_log where current_database = currentDatabase() AND log_comment='02898_default_190aed82-2423-413b-ad4c-24dcca50f65b')
     AND message LIKE '%Total rows to read: 3000%' SETTINGS enable_parallel_replicas=0;
diff --git a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
index b77e5b0b402..aaecc7301e4 100755
--- a/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
+++ b/tests/queries/0_stateless/02903_rmt_retriable_merge_exception.sh
@@ -56,7 +56,8 @@ for _ in {0..50}; do
             (
                 (logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or
                 (logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%')
-            );
+            )
+        SETTINGS max_rows_to_read = 0;
     ")
     if [[ $no_active_repilica_messages -gt 0 ]]; then
         break
@@ -78,5 +79,6 @@ $CLICKHOUSE_CLIENT -m -q "
             (logger_name = 'MergeTreeBackgroundExecutor' and message like '%{$table_uuid::$part_name}%No active replica has part $part_name or covering part%') or
             (logger_name like '$table_uuid::$part_name (MergeFromLogEntryTask)' and message like '%No active replica has part $part_name or covering part%')
         )
-    group by level;
+    group by level
+    SETTINGS max_rows_to_read = 0;
 "
diff --git a/tests/queries/0_stateless/02935_parallel_replicas_settings.sql b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql
index 5e3b1a13f1a..ccf1caddbb9 100644
--- a/tests/queries/0_stateless/02935_parallel_replicas_settings.sql
+++ b/tests/queries/0_stateless/02935_parallel_replicas_settings.sql
@@ -11,7 +11,7 @@ SET cluster_for_parallel_replicas='parallel_replicas';
 SELECT count() FROM test_parallel_replicas_settings WHERE NOT ignore(*) settings log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f';
 
 SYSTEM FLUSH LOGS;
-
+SET max_rows_to_read = 0; -- system.text_log can be really big
 SELECT count() > 0 FROM system.text_log
 WHERE yesterday() <= event_date
       AND query_id in (select query_id from system.query_log where current_database=currentDatabase() AND log_comment='0_f621c4f2-4da7-4a7c-bb6d-052c442d0f7f')
diff --git a/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.reference b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.reference
new file mode 100644
index 00000000000..99ede711976
--- /dev/null
+++ b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.reference
@@ -0,0 +1,347 @@
+1000
+bloom filter is off, all row groups should be read
+expect rows_read = select count()
+{
+  "data": [
+    {
+      "string": "AZSR",
+      "flba": "WNMM"
+    },
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 2,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+bloom filter is on, some row groups should be skipped
+expect rows_read much less than select count()
+{
+  "data": [
+    {
+      "string": "AZSR",
+      "flba": "WNMM"
+    },
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 2,
+  "statistics": {
+    "rows_read": 464,
+    "bytes_read": 21703
+  }
+}
+bloom filter is on, but where predicate contains data from 2 row groups out of 3.
+Rows read should be less than select count, but greater than previous selects
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    },
+    {
+      "string": "ZHZK",
+      "flba": "HRWD"
+    }
+  ],
+  "rows": 2,
+  "statistics": {
+    "rows_read": 536,
+    "bytes_read": 25708
+  }
+}
+bloom filter is on, but where predicate contains data from all row groups
+expect rows_read = select count()
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    },
+    {
+      "string": "OKAI",
+      "flba": "UXGT"
+    },
+    {
+      "string": "ZHZK",
+      "flba": "HRWD"
+    }
+  ],
+  "rows": 3,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+IN check
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    },
+    {
+      "string": "ZHZK",
+      "flba": "HRWD"
+    }
+  ],
+  "rows": 2,
+  "statistics": {
+    "rows_read": 536,
+    "bytes_read": 25708
+  }
+}
+tuple in case, bf is off.
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+tuple in case, bf is on.
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 464,
+    "bytes_read": 21703
+  }
+}
+complex tuple in case, bf is off
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+complex tuple in case, bf is on
+{
+  "data": [
+    {
+      "string": "PFJH",
+      "flba": "GKJC"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 464,
+    "bytes_read": 21703
+  }
+}
+complex tuple in case, bf is on. Non existent
+{
+  "data": [],
+  "rows": 0,
+  "statistics": {
+    "rows_read": 0,
+    "bytes_read": 0
+  }
+}
+Bloom filter for json column. BF is off
+{
+  "data": [
+    {
+      "json": "{\"key\":38, \"value\":\"NXONM\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+Bloom filter for json column. BF is on
+{
+  "data": [
+    {
+      "json": "{\"key\":38, \"value\":\"NXONM\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 72,
+    "bytes_read": 4005
+  }
+}
+Bloom filter for ipv4 column. BF is off
+{
+  "data": [
+    {
+      "json": "{\"key\":38, \"value\":\"NXONM\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+Bloom filter for ipv4 column. BF is on
+{
+  "data": [
+    {
+      "json": "{\"key\":38, \"value\":\"NXONM\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 72,
+    "bytes_read": 4005
+  }
+}
+Bloom filter for ipv4 column. BF is on. Specified in the schema
+{
+  "data": [
+    {
+      "ipv4": "0.0.1.143"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 72,
+    "bytes_read": 4005
+  }
+}
+Bloom filter on 64 bit column read as ipv4. We explicitly deny it, should read all rg
+{
+  "data": [
+    {
+      "uint64_logical": "22.230.220.164"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+BF off for parquet uint64 logical type. Should read everything
+{
+  "data": [
+    {
+      "json": "{\"key\":683, \"value\":\"YKCPD\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+BF on for parquet uint64 logical type. Uint64 is stored as a signed int 64, but with logical annotation. Make sure a value greater than int64 can be queried
+{
+  "data": [
+    {
+      "json": "{\"key\":683, \"value\":\"YKCPD\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 464,
+    "bytes_read": 21711
+  }
+}
+Uint16 is stored as physical type int32 with bidwidth = 16  and sign = false. Make sure a value greater than int16 can be queried. BF is on.
+{
+  "data": [
+    {
+      "json": "{\"key\":874, \"value\":\"JENHW\"}"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 464,
+    "bytes_read": 21703
+  }
+}
+BF off for parquet int8 logical type. Should read everything
+{
+  "data": [
+    {
+      "json": "{\"key\":89, \"value\":\"MFIYP\"}"
+    },
+    {
+      "json": "{\"key\":321, \"value\":\"JNOIA\"}"
+    },
+    {
+      "json": "{\"key\":938, \"value\":\"UBMLO\"}"
+    },
+    {
+      "json": "{\"key\":252, \"value\":\"ZVLKF\"}"
+    }
+  ],
+  "rows": 4,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+BF on for parquet int8 logical type. Should skip row groups
+{
+  "data": [
+    {
+      "json": "{\"key\":89, \"value\":\"MFIYP\"}"
+    },
+    {
+      "json": "{\"key\":321, \"value\":\"JNOIA\"}"
+    },
+    {
+      "json": "{\"key\":938, \"value\":\"UBMLO\"}"
+    },
+    {
+      "json": "{\"key\":252, \"value\":\"ZVLKF\"}"
+    }
+  ],
+  "rows": 4,
+  "statistics": {
+    "rows_read": 536,
+    "bytes_read": 25716
+  }
+}
+Invalid column conversion with in operation. String type can not be hashed against parquet int64 physical type. Should read everything
+{
+  "data": [],
+  "rows": 0,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
+Transformations on key column shall not be allowed. Should read everything
+{
+  "data": [
+    {
+      "uint64_logical": "7711695863945021976"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 1000,
+    "bytes_read": 47419
+  }
+}
diff --git a/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.sh b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.sh
new file mode 100755
index 00000000000..b7d40a1be63
--- /dev/null
+++ b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down.sh
@@ -0,0 +1,96 @@
+#!/usr/bin/env bash
+# Tags: no-ubsan, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+WORKING_DIR="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}"
+
+mkdir -p "${WORKING_DIR}"
+
+DATA_FILE="${CUR_DIR}/data_parquet/multi_column_bf.gz.parquet"
+
+DATA_FILE_USER_PATH="${WORKING_DIR}/multi_column_bf.gz.parquet"
+
+cp ${DATA_FILE} ${DATA_FILE_USER_PATH}
+
+${CLICKHOUSE_CLIENT} --query="select count(*) from file('${DATA_FILE_USER_PATH}', Parquet) SETTINGS use_cache_for_count_from_files=false;"
+
+echo "bloom filter is off, all row groups should be read"
+echo "expect rows_read = select count()"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where string='PFJH' or flba='WNMM' order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "bloom filter is on, some row groups should be skipped"
+echo "expect rows_read much less than select count()"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where string='PFJH' or flba='WNMM' order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "bloom filter is on, but where predicate contains data from 2 row groups out of 3."
+echo "Rows read should be less than select count, but greater than previous selects"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where string='PFJH' or string='ZHZK' order by uint16_logical asc Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "bloom filter is on, but where predicate contains data from all row groups"
+echo "expect rows_read = select count()"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where string='PFJH' or string='ZHZK' or uint64_logical=18441251162536403933 order by uint16_logical asc Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "IN check"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where string in ('PFJH', 'ZHZK') order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "tuple in case, bf is off."
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where (string, flba) in ('PFJH', 'GKJC') order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "tuple in case, bf is on."
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where (string, flba) in ('PFJH', 'GKJC') order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "complex tuple in case, bf is off"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where (string, flba) in (('NON1', 'NON1'), ('PFJH', 'GKJC'), ('NON2', 'NON2')) order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "complex tuple in case, bf is on"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where (string, flba) in (('NON1', 'NON1'), ('PFJH', 'GKJC'), ('NON2', 'NON2')) order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "complex tuple in case, bf is on. Non existent"
+${CLICKHOUSE_CLIENT} --query="select string, flba from file('${DATA_FILE_USER_PATH}', Parquet) where (string, flba) in (('NON1', 'NON1'), ('NON2', 'NON2'), ('NON3', 'NON3')) order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter for json column. BF is off"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where json = '{\"key\":38, \"value\":\"NXONM\"}' order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter for json column. BF is on"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where json = '{\"key\":38, \"value\":\"NXONM\"}' order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter for ipv4 column. BF is off"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where ipv4 = IPv4StringToNum('0.0.1.143') order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter for ipv4 column. BF is on"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where ipv4 = IPv4StringToNum('0.0.1.143') order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter for ipv4 column. BF is on. Specified in the schema"
+${CLICKHOUSE_CLIENT} --query="select ipv4 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv4 IPv4') where ipv4 = toIPv4('0.0.1.143') order by ipv4 asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Bloom filter on 64 bit column read as ipv4. We explicitly deny it, should read all rg"
+${CLICKHOUSE_CLIENT} --query="select uint64_logical from file ('${DATA_FILE_USER_PATH}', Parquet, 'uint64_logical IPv4') where uint64_logical = toIPv4(5552715629697883300) order by uint64_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "BF off for parquet uint64 logical type. Should read everything"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where uint64_logical=18441251162536403933 order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "BF on for parquet uint64 logical type. Uint64 is stored as a signed int 64, but with logical annotation. Make sure a value greater than int64 can be queried"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where uint64_logical=18441251162536403933 order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Uint16 is stored as physical type int32 with bidwidth = 16  and sign = false. Make sure a value greater than int16 can be queried. BF is on."
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where uint16_logical=65528 order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "BF off for parquet int8 logical type. Should read everything"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where int8_logical=-126 order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "BF on for parquet int8 logical type. Should skip row groups"
+${CLICKHOUSE_CLIENT} --query="select json from file('${DATA_FILE_USER_PATH}', Parquet) where int8_logical=-126 order by uint16_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;"  | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Invalid column conversion with in operation. String type can not be hashed against parquet int64 physical type. Should read everything"
+${CLICKHOUSE_CLIENT} --query="select uint64_logical from file('${DATA_FILE_USER_PATH}', Parquet, 'uint64_logical String') where uint64_logical in ('5') order by uint64_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "Transformations on key column shall not be allowed. Should read everything"
+${CLICKHOUSE_CLIENT} --query="select uint64_logical from file('${DATA_FILE_USER_PATH}', Parquet) where negate(uint64_logical) = -7711695863945021976 order by uint64_logical asc FORMAT Json SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+rm -rf ${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME:?}/*
diff --git a/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.reference b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.reference
new file mode 100644
index 00000000000..acb66d986e5
--- /dev/null
+++ b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.reference
@@ -0,0 +1,76 @@
+bloom filter is off, row groups should be read
+expect rows_read = select count()
+{
+  "data": [
+    {
+      "ipv6": "7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 5,
+    "bytes_read": 128
+  }
+}
+bloom filter is on for ipv6, row groups should also be read since there is only one. Below queries just make sure the data is properly returned
+{
+  "data": [
+    {
+      "ipv6": "7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 5,
+    "bytes_read": 128
+  }
+}
+{
+  "data": [
+    {
+      "ipv6": "7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 5,
+    "bytes_read": 128
+  }
+}
+{
+  "data": [
+    {
+      "toIPv6(ipv6)": "7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995"
+    }
+  ],
+  "rows": 1,
+  "statistics": {
+    "rows_read": 5,
+    "bytes_read": 128
+  }
+}
+non existent ipv6, row group should be skipped
+{
+  "data": [],
+  "rows": 0,
+  "statistics": {
+    "rows_read": 0,
+    "bytes_read": 0
+  }
+}
+{
+  "data": [],
+  "rows": 0,
+  "statistics": {
+    "rows_read": 0,
+    "bytes_read": 0
+  }
+}
+{
+  "data": [],
+  "rows": 0,
+  "statistics": {
+    "rows_read": 5,
+    "bytes_read": 128
+  }
+}
diff --git a/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.sh b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.sh
new file mode 100755
index 00000000000..752e7ed38a5
--- /dev/null
+++ b/tests/queries/0_stateless/03036_test_parquet_bloom_filter_push_down_ipv6.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Tags: no-ubsan, no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+
+USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+WORKING_DIR="${USER_FILES_PATH}/${CLICKHOUSE_TEST_UNIQUE_NAME}"
+
+mkdir -p "${WORKING_DIR}"
+
+DATA_FILE="${CUR_DIR}/data_parquet/ipv6_bloom_filter.gz.parquet"
+
+DATA_FILE_USER_PATH="${WORKING_DIR}/ipv6_bloom_filter.gz.parquet"
+
+cp ${DATA_FILE} ${DATA_FILE_USER_PATH}
+
+echo "bloom filter is off, row groups should be read"
+echo "expect rows_read = select count()"
+${CLICKHOUSE_CLIENT} --query="select ipv6 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv6 IPv6') where ipv6 = '7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995' Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=false, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "bloom filter is on for ipv6, row groups should also be read since there is only one. Below queries just make sure the data is properly returned"
+${CLICKHOUSE_CLIENT} --query="select ipv6 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv6 IPv6') where ipv6 = '7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995' Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+${CLICKHOUSE_CLIENT} --query="select ipv6 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv6 IPv6') where ipv6 = toIPv6('7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995') Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+${CLICKHOUSE_CLIENT} --query="select toIPv6(ipv6) from file('${DATA_FILE_USER_PATH}', Parquet) where ipv6 = toIPv6('7afe:b9d4:e754:4e78:8783:37f5:b2ea:9995') Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+
+echo "non existent ipv6, row group should be skipped"
+${CLICKHOUSE_CLIENT} --query="select ipv6 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv6 IPv6') where ipv6 = 'fafe:b9d4:e754:4e78:8783:37f5:b2ea:9995' Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+${CLICKHOUSE_CLIENT} --query="select ipv6 from file('${DATA_FILE_USER_PATH}', Parquet, 'ipv6 IPv6') where ipv6 = toIPv6('fafe:b9d4:e754:4e78:8783:37f5:b2ea:9995') Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
+${CLICKHOUSE_CLIENT} --query="select toIPv6(ipv6) from file('${DATA_FILE_USER_PATH}', Parquet) where ipv6 = toIPv6('fafe:b9d4:e754:4e78:8783:37f5:b2ea:9995') Format JSON SETTINGS input_format_parquet_bloom_filter_push_down=true, input_format_parquet_filter_push_down=false;" | jq 'del(.meta,.statistics.elapsed)'
diff --git a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql
index bb036a6c133..a08f35cfc1d 100644
--- a/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql
+++ b/tests/queries/0_stateless/03096_text_log_format_string_args_not_empty.sql
@@ -6,6 +6,7 @@ select conut(); -- { serverError UNKNOWN_FUNCTION }
 
 system flush logs;
 
+SET max_rows_to_read = 0; -- system.text_log can be really big
 select count() > 0 from system.text_log where message_format_string = 'Peak memory usage{}: {}.' and value1 is not null and value2 like '% MiB';
 
 select count() > 0 from system.text_log where level = 'Error' and message_format_string = 'Unknown {}{} identifier {} in scope {}{}' and value1 = 'expression' and value3 = '`count`' and value4 = 'SELECT count';
diff --git a/tests/queries/0_stateless/03141_fetches_errors_stress.sql b/tests/queries/0_stateless/03141_fetches_errors_stress.sql
index 2f6b0b08574..69829a44b58 100644
--- a/tests/queries/0_stateless/03141_fetches_errors_stress.sql
+++ b/tests/queries/0_stateless/03141_fetches_errors_stress.sql
@@ -11,6 +11,7 @@ system disable failpoint replicated_sends_failpoint;
 system sync replica data_r2;
 
 system flush logs;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 select event_time_microseconds, logger_name, message from system.text_log where level = 'Error' and message like '%Malformed chunked encoding%' order by 1 format LineAsString;
 
 -- { echoOn }
diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql
index 96221f27e73..968cc4a40a7 100644
--- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql
+++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_1.sql
@@ -24,7 +24,7 @@ SELECT count() FROM t_ind_merge_1 WHERE b < 100 SETTINGS force_data_skipping_ind
 EXPLAIN indexes = 1 SELECT count() FROM t_ind_merge_1 WHERE b < 100;
 
 SYSTEM FLUSH LOGS;
-
+SET max_rows_to_read = 0; -- system.text_log can be really big
 WITH
     (SELECT uuid FROM system.tables WHERE database = currentDatabase() AND table = 't_ind_merge_1') AS uuid,
     extractAllGroupsVertical(message, 'containing (\\d+) columns \((\\d+) merged, (\\d+) gathered\)')[1] AS groups
diff --git a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql
index b749e0c84b0..bf145d875fd 100644
--- a/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql
+++ b/tests/queries/0_stateless/03166_skip_indexes_vertical_merge_2.sql
@@ -26,6 +26,7 @@ INSERT INTO t_ind_merge_2 SELECT number, number, rand(), rand(), rand(), rand()
 
 OPTIMIZE TABLE t_ind_merge_2 FINAL;
 SYSTEM FLUSH LOGS;
+SET max_rows_to_read = 0; -- system.text_log can be really big
 
 --- merged: a, c, d; gathered: b, e, f
 WITH
diff --git a/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference b/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference
index 872eb7da3c8..9faa3b35848 100644
--- a/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference
+++ b/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference
@@ -27,5 +27,5 @@
 2	('aaa','bbb')	[1,NULL,3]
 3	('ccc','ddd')	[4,5,6]
 1	foo	bar	3	[0,1,0]
-2	foo	bar	3	[0,1,0]
+2	aaa	bbb	3	[0,1,0]
 3	ccc	ddd	3	[0,0,0]
diff --git a/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.reference b/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.reference
new file mode 100644
index 00000000000..9fc281554de
--- /dev/null
+++ b/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.reference
@@ -0,0 +1 @@
+Wed Jul 01 06:29:00 +0000 2020	1278214001395216384	1278214001395216384	È SICUROOOOO?	<a href="https://elenoireferruzzibot.com" rel="nofollow">BotElenoire</a>	false	\N	\N	\N	\N	\N	(false,'Mon Nov 04 18:14:32 +0000 2019',true,false,'L’importante è averli puliti',12,NULL,1239,NULL,3,false,1191418444324966403,'1191418444324966403',false,NULL,1,NULL,'Elenoire Ferruzzi Bot',NULL,'F5F8FA','','',false,'https://pbs.twimg.com/profile_banners/1191418444324966403/1578228379','http://pbs.twimg.com/profile_images/1191422843084521472/HEy5I5g8_normal.jpg','https://pbs.twimg.com/profile_images/1191422843084521472/HEy5I5g8_normal.jpg','1DA1F2','C0DEED','DDEEF6','333333',true,false,'BotElenoire',20769,NULL,'none',NULL,NULL,false)	\N	\N	\N	\N	false	0	0	0	0	([],[],[],[])	false	false	low	it	1593584940657
diff --git a/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.sh b/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.sh
new file mode 100755
index 00000000000..575ff01c88c
--- /dev/null
+++ b/tests/queries/0_stateless/03228_clickhouse_local_copy_argument.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_LOCAL} --copy < $CURDIR/data_json/twitter.jsonl
diff --git a/tests/queries/0_stateless/03231_prewhere_conditions_order.sql b/tests/queries/0_stateless/03231_prewhere_conditions_order.sql
index acaba12684c..6df5b139275 100644
--- a/tests/queries/0_stateless/03231_prewhere_conditions_order.sql
+++ b/tests/queries/0_stateless/03231_prewhere_conditions_order.sql
@@ -1,6 +1,6 @@
 drop table if exists test;
 create table test (x UInt32, arr1 Array(UInt32), arr2 Array(UInt32)) engine=MergeTree order by x;
 insert into test values (1, [0, 1], [0, 1]), (2, [0], [0, 1]);
-select * from test where x == 1 and arrayExists((x1, x2) -> (x1 == x2), arr1, arr2);
+select * from test where x == 1 and arrayExists((x1, x2) -> (x1 == x2), arr1, arr2) settings allow_reorder_prewhere_conditions = 0;
 drop table test;
 
diff --git a/tests/queries/0_stateless/03234_enable_secure_identifiers.sql b/tests/queries/0_stateless/03234_enable_secure_identifiers.sql
index dfeb53dd147..7381ffb78ad 100644
--- a/tests/queries/0_stateless/03234_enable_secure_identifiers.sql
+++ b/tests/queries/0_stateless/03234_enable_secure_identifiers.sql
@@ -8,7 +8,7 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true; -- { serverError BAD_ARGUMENTS }
+    enforce_strict_identifier_format=true; -- { serverError BAD_ARGUMENTS }
 DROP TABLE IF EXISTS `test_foo_#`;
 
 
@@ -23,7 +23,7 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true; -- { serverError BAD_ARGUMENTS }
+    enforce_strict_identifier_format=true; -- { serverError BAD_ARGUMENTS }
 
 DROP TABLE IF EXISTS test_foo;
 CREATE TABLE test_foo (
@@ -36,7 +36,7 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true; -- { serverError BAD_ARGUMENTS }
+    enforce_strict_identifier_format=true; -- { serverError BAD_ARGUMENTS }
 
 DROP TABLE IF EXISTS test_foo;
 CREATE TABLE test_foo (
@@ -49,7 +49,7 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true; -- { serverError BAD_ARGUMENTS }
+    enforce_strict_identifier_format=true; -- { serverError BAD_ARGUMENTS }
 
 DROP TABLE IF EXISTS test_foo;
 CREATE TABLE test_foo (
@@ -62,11 +62,11 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true;
+    enforce_strict_identifier_format=true;
 
-SHOW CREATE TABLE test_foo 
+SHOW CREATE TABLE test_foo
 SETTINGS
-    enable_secure_identifiers=true;
+    enforce_strict_identifier_format=true;
 
 DROP TABLE IF EXISTS test_foo;
 CREATE TABLE test_foo (
@@ -79,13 +79,13 @@ PRIMARY KEY (town, date)
 PARTITION BY toYear(date)
 COMMENT 'test' -- to end ENGINE definition, so SETTINGS will be in the query level
 SETTINGS
-    enable_secure_identifiers=true;
+    enforce_strict_identifier_format=true;
 
-SHOW CREATE TABLE test_foo 
+SHOW CREATE TABLE test_foo
 SETTINGS
-    enable_secure_identifiers=true;
+    enforce_strict_identifier_format=true;
 
--- CREATE TABLE without `enable_secure_identifiers`
+-- CREATE TABLE without `enforce_strict_identifier_format`
 DROP TABLE IF EXISTS test_foo;
 CREATE TABLE `test_foo` (
     `insecure_$` Int8,
@@ -95,17 +95,17 @@ CREATE TABLE `test_foo` (
 ENGINE = MergeTree
 PRIMARY KEY (town, date)
 PARTITION BY toYear(date);
--- Then SHOW CREATE .. with `enable_secure_identifiers`
+-- Then SHOW CREATE .. with `enforce_strict_identifier_format`
 -- While the result contains insecure identifiers (`insecure_$`), the `SHOW CREATE TABLE ...` query does not have any. So the query is expected to succeed.
-SHOW CREATE TABLE test_foo 
+SHOW CREATE TABLE test_foo
 SETTINGS
-    enable_secure_identifiers=true;
+    enforce_strict_identifier_format=true;
 
 DROP TABLE IF EXISTS test_foo;
 
--- SHOW CREATE .. query contains an insecure identifier (`test_foo$`) with `enable_secure_identifiers`
+-- SHOW CREATE .. query contains an insecure identifier (`test_foo$`) with `enforce_strict_identifier_format`
 SHOW CREATE TABLE `test_foo$`
 SETTINGS
-    enable_secure_identifiers=true; -- { serverError BAD_ARGUMENTS }
+    enforce_strict_identifier_format=true; -- { serverError BAD_ARGUMENTS }
 
-DROP TABLE IF EXISTS test_foo;
\ No newline at end of file
+DROP TABLE IF EXISTS test_foo;
diff --git a/tests/queries/0_stateless/03237_create_or_replace_view_atomically_with_atomic_engine.reference b/tests/queries/0_stateless/03237_create_or_replace_view_atomically_with_atomic_engine.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03237_create_or_replace_view_atomically_with_atomic_engine.sh b/tests/queries/0_stateless/03237_create_or_replace_view_atomically_with_atomic_engine.sh
new file mode 100755
index 00000000000..cc0e6c0d113
--- /dev/null
+++ b/tests/queries/0_stateless/03237_create_or_replace_view_atomically_with_atomic_engine.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# with Atomic engine
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE IF NOT EXISTS ${CLICKHOUSE_DATABASE}_db ENGINE=Atomic"
+
+function create_or_replace_view_thread
+{
+    for _ in {1..20}; do
+        $CLICKHOUSE_CLIENT --query "CREATE OR REPLACE VIEW ${CLICKHOUSE_DATABASE}_db.test_view AS SELECT 'abcdef'" > /dev/null
+    done
+}
+export -f create_or_replace_view_thread;
+
+function select_view_thread
+{
+    for _ in {1..20}; do
+        $CLICKHOUSE_CLIENT --query "SELECT * FROM ${CLICKHOUSE_DATABASE}_db.test_view" > /dev/null
+    done
+}
+export -f select_view_thread;
+
+$CLICKHOUSE_CLIENT --query "CREATE OR REPLACE VIEW ${CLICKHOUSE_DATABASE}_db.test_view AS SELECT 'abcdef'" > /dev/null
+
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+
+wait
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03238_create_or_replace_view_atomically_with_replicated_engine.reference b/tests/queries/0_stateless/03238_create_or_replace_view_atomically_with_replicated_engine.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03238_create_or_replace_view_atomically_with_replicated_engine.sh b/tests/queries/0_stateless/03238_create_or_replace_view_atomically_with_replicated_engine.sh
new file mode 100755
index 00000000000..04adc38e34b
--- /dev/null
+++ b/tests/queries/0_stateless/03238_create_or_replace_view_atomically_with_replicated_engine.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# with Replicated engine
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE IF NOT EXISTS ${CLICKHOUSE_DATABASE}_db ENGINE=Replicated('/test/clickhouse/db/${CLICKHOUSE_DATABASE}_db', 's1', 'r1')"
+
+function create_or_replace_view_thread
+{
+    for _ in {1..15}; do
+        $CLICKHOUSE_CLIENT --query "CREATE OR REPLACE VIEW ${CLICKHOUSE_DATABASE}_db.test_view AS SELECT 'abcdef'" > /dev/null
+    done
+}
+export -f create_or_replace_view_thread;
+
+function select_view_thread
+{
+    for _ in {1..15}; do
+        $CLICKHOUSE_CLIENT --query "SELECT * FROM ${CLICKHOUSE_DATABASE}_db.test_view" > /dev/null
+    done
+}
+export -f select_view_thread;
+
+$CLICKHOUSE_CLIENT --query "CREATE OR REPLACE VIEW ${CLICKHOUSE_DATABASE}_db.test_view AS SELECT 'abcdef'" > /dev/null
+
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+bash -c select_view_thread &
+
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+bash -c create_or_replace_view_thread &
+
+wait
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03241_orc_dictionary_encode.reference b/tests/queries/0_stateless/03241_orc_dictionary_encode.reference
new file mode 100644
index 00000000000..adc9e846818
--- /dev/null
+++ b/tests/queries/0_stateless/03241_orc_dictionary_encode.reference
@@ -0,0 +1,84 @@
+c	Nullable(String)					
+c	LowCardinality(Nullable(String))					
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+\N	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+\N	10000
+	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+c	Nullable(String)					
+c	Nullable(String)					
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+\N	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+\N	10000
+	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
+	10000
+1	10000
+2	10000
+3	10000
+4	10000
+5	10000
+6	10000
+7	10000
+8	10000
+9	10000
diff --git a/tests/queries/0_stateless/03241_orc_dictionary_encode.sql b/tests/queries/0_stateless/03241_orc_dictionary_encode.sql
new file mode 100644
index 00000000000..d7837ac19a9
--- /dev/null
+++ b/tests/queries/0_stateless/03241_orc_dictionary_encode.sql
@@ -0,0 +1,38 @@
+-- Tags: no-fasttest
+set input_format_orc_use_fast_decoder = 1;
+
+set input_format_orc_dictionary_as_low_cardinality = 1;
+insert into function file(concat(currentDatabase(), '_03241_data1_without_dict.orc'))
+select toLowCardinality(cast(if (number % 10 = 0, null, number % 10) as Nullable(String))) as c from numbers(100000)
+settings output_format_orc_dictionary_key_size_threshold = 0, engine_file_truncate_on_insert = 1;
+
+insert into function file(concat(currentDatabase(), '_03241_data1_with_dict.orc'))
+select toLowCardinality(cast(if (number % 10 = 0, null, number % 10) as Nullable(String))) as c from numbers(100000)
+settings output_format_orc_dictionary_key_size_threshold = 0.1, engine_file_truncate_on_insert = 1;
+
+desc file(concat(currentDatabase(), '_03241_data1_without_dict.orc'));
+desc file(concat(currentDatabase(), '_03241_data1_with_dict.orc'));
+
+select c, count(1) from file(concat(currentDatabase(), '_03241_data1_without_dict.orc')) group by c order by c;
+select c, count(1) from file(concat(currentDatabase(), '_03241_data1_with_dict.orc')) group by c order by c;
+
+select c, count(1) from file(concat(currentDatabase(), '_03241_data1_without_dict.orc'), ORC, 'c String') group by c order by c;
+select c, count(1) from file(concat(currentDatabase(), '_03241_data1_with_dict.orc'), ORC, 'c LowCardinality(String)') group by c order by c;
+
+set input_format_orc_dictionary_as_low_cardinality = 0;
+insert into function file(concat(currentDatabase(), '_03241_data2_without_dict.orc'))
+select toLowCardinality(cast(if (number % 10 = 0, null, number % 10) as Nullable(String))) as c from numbers(100000)
+settings output_format_orc_dictionary_key_size_threshold = 0, engine_file_truncate_on_insert = 1;
+
+insert into function file(concat(currentDatabase(), '_03241_data2_with_dict.orc'))
+select toLowCardinality(cast(if (number % 10 = 0, null, number % 10) as Nullable(String))) as c from numbers(100000)
+settings output_format_orc_dictionary_key_size_threshold = 0.1, engine_file_truncate_on_insert = 1;
+
+desc file(concat(currentDatabase(), '_03241_data2_without_dict.orc'));
+desc file(concat(currentDatabase(), '_03241_data2_with_dict.orc'));
+
+select c, count(1) from file(concat(currentDatabase(), '_03241_data2_without_dict.orc')) group by c order by c;
+select c, count(1) from file(concat(currentDatabase(), '_03241_data2_with_dict.orc')) group by c order by c;
+
+select c, count(1) from file(concat(currentDatabase(), '_03241_data2_without_dict.orc'), ORC, 'c String') group by c order by c;
+select c, count(1) from file(concat(currentDatabase(), '_03241_data2_with_dict.orc'), ORC, 'c LowCardinality(String)') group by c order by c;
diff --git a/tests/queries/0_stateless/03247_create-same-table-concurrently-with-atomic-engine.reference b/tests/queries/0_stateless/03247_create-same-table-concurrently-with-atomic-engine.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03247_create-same-table-concurrently-with-atomic-engine.sh b/tests/queries/0_stateless/03247_create-same-table-concurrently-with-atomic-engine.sh
new file mode 100755
index 00000000000..1a05d5bf890
--- /dev/null
+++ b/tests/queries/0_stateless/03247_create-same-table-concurrently-with-atomic-engine.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# with Atomic engine
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE IF NOT EXISTS ${CLICKHOUSE_DATABASE}_db ENGINE=Atomic"
+
+function create_or_replace_table_thread
+{
+    for _ in {1..20}; do
+        $CLICKHOUSE_CLIENT --query "CREATE OR REPLACE TABLE ${CLICKHOUSE_DATABASE}_db.test_table (x Int) ENGINE=Memory" > /dev/null
+    done
+}
+export -f create_or_replace_table_thread;
+
+for _ in {1..20}; do
+    bash -c create_or_replace_table_thread &
+done
+
+wait
+
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS ${CLICKHOUSE_DATABASE}_db SYNC";
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03248_create-same-table-concurrently-with-replicated-engine.reference b/tests/queries/0_stateless/03248_create-same-table-concurrently-with-replicated-engine.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03248_create-same-table-concurrently-with-replicated-engine.sh b/tests/queries/0_stateless/03248_create-same-table-concurrently-with-replicated-engine.sh
new file mode 100755
index 00000000000..27bb65ff2db
--- /dev/null
+++ b/tests/queries/0_stateless/03248_create-same-table-concurrently-with-replicated-engine.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+# Tags: long
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# with Replicated engine
+$CLICKHOUSE_CLIENT --query "CREATE DATABASE IF NOT EXISTS ${CLICKHOUSE_DATABASE}_db ENGINE=Replicated('/test/clickhouse/db/${CLICKHOUSE_DATABASE}_db', 's1', 'r1')"
+
+function create_or_replace_table_thread
+{
+    for _ in {1..15}; do
+        $CLICKHOUSE_CLIENT --query "CREATE OR REPLACE TABLE ${CLICKHOUSE_DATABASE}_db.test_table (x Int) ENGINE=Memory" > /dev/null
+    done
+}
+export -f create_or_replace_table_thread;
+
+for _ in {1..15}; do
+    bash -c create_or_replace_table_thread &
+done
+
+wait
+
+$CLICKHOUSE_CLIENT --query "DROP DATABASE IF EXISTS ${CLICKHOUSE_DATABASE}_db SYNC";
\ No newline at end of file
diff --git a/tests/queries/0_stateless/03250_SYSTEM_DROP_FORMAT_SCHEMA_CACHE_FOR_Protobuf.reference b/tests/queries/0_stateless/03250_SYSTEM_DROP_FORMAT_SCHEMA_CACHE_FOR_Protobuf.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03250_SYSTEM_DROP_FORMAT_SCHEMA_CACHE_FOR_Protobuf.sh b/tests/queries/0_stateless/03250_SYSTEM_DROP_FORMAT_SCHEMA_CACHE_FOR_Protobuf.sh
new file mode 100755
index 00000000000..aae213a3d53
--- /dev/null
+++ b/tests/queries/0_stateless/03250_SYSTEM_DROP_FORMAT_SCHEMA_CACHE_FOR_Protobuf.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Tags: race
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+SCHEMADIR=$CLICKHOUSE_SCHEMA_FILES
+CLIENT_SCHEMADIR=$CURDIR/format_schemas
+export SERVER_SCHEMADIR=$CLICKHOUSE_DATABASE
+mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR
+cp -r $CLIENT_SCHEMADIR/03250.proto $SCHEMADIR/$SERVER_SCHEMADIR/
+
+$CLICKHOUSE_CLIENT --query "SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf"
+
+BINARY_FILE_PATH=$(mktemp "$CLICKHOUSE_USER_FILES/03250.XXXXXX.binary")
+export BINARY_FILE_PATH
+$CLICKHOUSE_CLIENT --query "SELECT * FROM numbers(10) FORMAT Protobuf SETTINGS format_schema = '$CLIENT_SCHEMADIR/03250:Numbers'" > $BINARY_FILE_PATH
+chmod 666 "$BINARY_FILE_PATH"
+
+function protobuf_reader()
+{
+    while true; do
+        $CLICKHOUSE_CLIENT --query "SELECT count() FROM file('$(basename $BINARY_FILE_PATH)', 'Protobuf') FORMAT Null SETTINGS max_threads=1, format_schema='$SERVER_SCHEMADIR/03250:Numbers'"
+    done
+}
+export -f protobuf_reader
+
+function protobuf_cache_drainer()
+{
+    while true; do
+        $CLICKHOUSE_CLIENT --query "SYSTEM DROP FORMAT SCHEMA CACHE FOR Protobuf"
+    done
+}
+export -f protobuf_cache_drainer
+
+timeout 20 bash -c protobuf_reader &
+timeout 20 bash -c protobuf_cache_drainer &
+wait
+
+rm -f "${BINARY_FILE_PATH:?}"
+rm -fr "${SCHEMADIR:?}/${SERVER_SCHEMADIR:?}/"
diff --git a/tests/queries/0_stateless/03251_check_query_formatting.reference b/tests/queries/0_stateless/03251_check_query_formatting.reference
new file mode 100644
index 00000000000..1ea8bd5f462
--- /dev/null
+++ b/tests/queries/0_stateless/03251_check_query_formatting.reference
@@ -0,0 +1,6 @@
+CHECK TABLE test PART 'Hello'
+CHECK TABLE test PARTITION 'Hello'
+CHECK TABLE test PARTITION tuple()
+CHECK TABLE test PARTITION tuple()
+CHECK TABLE test PARTITION (1, 'Hello', ['World'])
+CHECK TABLE test PARTITION ALL
diff --git a/tests/queries/0_stateless/03251_check_query_formatting.sh b/tests/queries/0_stateless/03251_check_query_formatting.sh
new file mode 100755
index 00000000000..e6d0c42eb6d
--- /dev/null
+++ b/tests/queries/0_stateless/03251_check_query_formatting.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PART 'Hello'"
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PARTITION 'Hello'"
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PARTITION tuple()"
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PARTITION ()"
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PARTITION (1, 'Hello', ['World'])"
+$CLICKHOUSE_FORMAT --query "CHECK TABLE test PARTITION ALL"
diff --git a/tests/queries/0_stateless/03251_insert_sparse_all_formats.reference b/tests/queries/0_stateless/03251_insert_sparse_all_formats.reference
new file mode 100644
index 00000000000..840465284a1
--- /dev/null
+++ b/tests/queries/0_stateless/03251_insert_sparse_all_formats.reference
@@ -0,0 +1,102 @@
+Arrow
+9260153077572524277
+ArrowStream
+9260153077572524277
+Avro
+9260153077572524277
+BSONEachRow
+9260153077572524277
+CSV
+9260153077572524277
+CSVWithNames
+9260153077572524277
+CSVWithNamesAndTypes
+9260153077572524277
+CapnProto
+9260153077572524277
+CustomSeparated
+9260153077572524277
+CustomSeparatedWithNames
+9260153077572524277
+CustomSeparatedWithNamesAndTypes
+9260153077572524277
+JSON
+9260153077572524277
+JSONColumns
+9260153077572524277
+JSONColumnsWithMetadata
+9260153077572524277
+JSONCompact
+9260153077572524277
+JSONCompactColumns
+9260153077572524277
+JSONCompactEachRow
+9260153077572524277
+JSONCompactEachRowWithNames
+9260153077572524277
+JSONCompactEachRowWithNamesAndTypes
+9260153077572524277
+JSONCompactStringsEachRow
+9260153077572524277
+JSONCompactStringsEachRowWithNames
+9260153077572524277
+JSONCompactStringsEachRowWithNamesAndTypes
+9260153077572524277
+JSONEachRow
+9260153077572524277
+JSONLines
+9260153077572524277
+JSONObjectEachRow
+9260153077572524277
+JSONStringsEachRow
+9260153077572524277
+MsgPack
+9260153077572524277
+NDJSON
+9260153077572524277
+Native
+9260153077572524277
+ORC
+9260153077572524277
+Parquet
+9260153077572524277
+Raw
+9260153077572524277
+RawWithNames
+9260153077572524277
+RawWithNamesAndTypes
+9260153077572524277
+RowBinary
+9260153077572524277
+RowBinaryWithNames
+9260153077572524277
+RowBinaryWithNamesAndTypes
+9260153077572524277
+TSKV
+9260153077572524277
+TSV
+9260153077572524277
+TSVRaw
+9260153077572524277
+TSVRawWithNames
+9260153077572524277
+TSVRawWithNamesAndTypes
+9260153077572524277
+TSVWithNames
+9260153077572524277
+TSVWithNamesAndTypes
+9260153077572524277
+TabSeparated
+9260153077572524277
+TabSeparatedRaw
+9260153077572524277
+TabSeparatedRawWithNames
+9260153077572524277
+TabSeparatedRawWithNamesAndTypes
+9260153077572524277
+TabSeparatedWithNames
+9260153077572524277
+TabSeparatedWithNamesAndTypes
+9260153077572524277
+Values
+9260153077572524277
diff --git a/tests/queries/0_stateless/03251_insert_sparse_all_formats.sh b/tests/queries/0_stateless/03251_insert_sparse_all_formats.sh
new file mode 100755
index 00000000000..7b52c068107
--- /dev/null
+++ b/tests/queries/0_stateless/03251_insert_sparse_all_formats.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, long
+
+set -e
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+formats=$($CLICKHOUSE_CLIENT --query "
+    SELECT name FROM system.formats
+    WHERE is_input AND is_output AND name NOT IN ('Template', 'Npy', 'RawBLOB', 'ProtobufList', 'ProtobufSingle', 'Protobuf', 'LineAsString')
+    ORDER BY name FORMAT TSV
+")
+
+$CLICKHOUSE_CLIENT --query "
+    DROP TABLE IF EXISTS t_sparse_all_formats;
+    CREATE TABLE t_sparse_all_formats (a UInt64, b UInt64, c String) ENGINE = MergeTree ORDER BY a;
+"
+
+for format in $formats; do
+    echo $format
+    $CLICKHOUSE_CLIENT --query "INSERT INTO t_sparse_all_formats(a) SELECT number FROM numbers(1000)"
+
+    $CLICKHOUSE_CLIENT --query "SELECT number AS a, 0::UInt64 AS b, '' AS c FROM numbers(1000) FORMAT $format" \
+        | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+t_sparse_all_formats+FORMAT+$format&enable_parsing_to_custom_serialization=1" --data-binary @-
+
+    $CLICKHOUSE_CLIENT --query "SELECT number AS a FROM numbers(1000) FORMAT $format" \
+        | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+t_sparse_all_formats(a)+FORMAT+$format&enable_parsing_to_custom_serialization=1" --data-binary @-
+
+    $CLICKHOUSE_CLIENT --query "
+        SELECT sum(sipHash64(*)) FROM t_sparse_all_formats;
+        TRUNCATE TABLE t_sparse_all_formats;
+    "
+done
diff --git a/tests/queries/0_stateless/03251_unaligned_window_function_state.reference b/tests/queries/0_stateless/03251_unaligned_window_function_state.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03251_unaligned_window_function_state.sql b/tests/queries/0_stateless/03251_unaligned_window_function_state.sql
new file mode 100644
index 00000000000..2ff75f61f76
--- /dev/null
+++ b/tests/queries/0_stateless/03251_unaligned_window_function_state.sql
@@ -0,0 +1,3 @@
+-- https://github.com/ClickHouse/ClickHouse/issues/70569
+-- Reproduces UBSAN alert about misaligned address
+SELECT anyLast(id), anyLast(time), exponentialTimeDecayedAvg(10)(id, time) FROM values('id Int8, time DateTime', (1,1),(1,2),(2,3),(3,3),(3,5)); -- { serverError BAD_ARGUMENTS }
diff --git a/tests/queries/0_stateless/03252_fill_missed_arrays.reference b/tests/queries/0_stateless/03252_fill_missed_arrays.reference
new file mode 100644
index 00000000000..b92677edb95
--- /dev/null
+++ b/tests/queries/0_stateless/03252_fill_missed_arrays.reference
@@ -0,0 +1 @@
+20000
diff --git a/tests/queries/0_stateless/03252_fill_missed_arrays.sql b/tests/queries/0_stateless/03252_fill_missed_arrays.sql
new file mode 100644
index 00000000000..585cd370673
--- /dev/null
+++ b/tests/queries/0_stateless/03252_fill_missed_arrays.sql
@@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS t_fill_arrays;
+
+CREATE TABLE t_fill_arrays
+(
+    `id` String,
+    `mapCol` Map(String, Array(String)),
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1, min_bytes_for_full_part_storage = 0;
+
+INSERT INTO t_fill_arrays (id) SELECT hex(number) FROM numbers(10000);
+
+ALTER TABLE t_fill_arrays ADD COLUMN arrCol Array(String) DEFAULT [];
+
+INSERT INTO t_fill_arrays (id) SELECT hex(number) FROM numbers(10000);
+
+SELECT count() FROM t_fill_arrays WHERE NOT ignore(arrCol, mapCol.values);
+
+OPTIMIZE TABLE t_fill_arrays FINAL;
+
+DROP TABLE t_fill_arrays;
diff --git a/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.reference b/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.reference
new file mode 100644
index 00000000000..3bc835eaeac
--- /dev/null
+++ b/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.reference
@@ -0,0 +1 @@
+['foo']	['bar']
diff --git a/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.sql b/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.sql
new file mode 100644
index 00000000000..e0cc932783d
--- /dev/null
+++ b/tests/queries/0_stateless/03252_optimize_functions_to_subcolumns_map.sql
@@ -0,0 +1,9 @@
+drop table if exists x;
+create table x
+(
+    kv Map(LowCardinality(String), LowCardinality(String)),
+    k Array(LowCardinality(String)) alias mapKeys(kv),
+    v Array(LowCardinality(String)) alias mapValues(kv)
+) engine=Memory();
+insert into x values (map('foo', 'bar'));
+select k, v from x settings optimize_functions_to_subcolumns=1;
diff --git a/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.reference b/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.reference
new file mode 100644
index 00000000000..fb3e23bf2af
--- /dev/null
+++ b/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.reference
@@ -0,0 +1,19 @@
+1
+Row 1:
+──────
+name:               fields
+type:               Map(String, Tuple(
+    null_value Enum8('NULL_VALUE' = 0),
+    number_value Float64,
+    string_value String,
+    bool_value UInt8,
+    list_value_values Array(Tuple(
+        null_value Enum8('NULL_VALUE' = 0),
+        number_value Float64,
+        string_value String,
+        bool_value UInt8))))
+default_type:       
+default_expression: 
+comment:            
+codec_expression:   
+ttl_expression:     
diff --git a/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.sh b/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.sh
new file mode 100755
index 00000000000..b088e75e94d
--- /dev/null
+++ b/tests/queries/0_stateless/03252_recursive_proto_with_skip_unsupported_fields.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+SCHEMADIR="$CUR_DIR/format_schemas"
+$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') FORMAT Vertical SETTINGS format_schema='$SCHEMADIR/03252_recursive_type.proto:Struct', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=0" |& grep -c BAD_ARGUMENTS
+$CLICKHOUSE_LOCAL -q "DESCRIBE TABLE file('nonexist', 'Protobuf') FORMAT Vertical SETTINGS format_schema='$SCHEMADIR/03252_recursive_type.proto:Struct', input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference=1"
diff --git a/tests/queries/0_stateless/03253_group_by_cube_too_many_keys.reference b/tests/queries/0_stateless/03253_group_by_cube_too_many_keys.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/03253_group_by_cube_too_many_keys.sql b/tests/queries/0_stateless/03253_group_by_cube_too_many_keys.sql
new file mode 100644
index 00000000000..616387d39ae
--- /dev/null
+++ b/tests/queries/0_stateless/03253_group_by_cube_too_many_keys.sql
@@ -0,0 +1,2 @@
+SELECT * FROM (SELECT '' AS c0, '' AS c1, '' AS c2, '' AS c3, '' AS c4, '' AS c5, '' AS c6, '' AS c7, '' AS c8, '' AS c9, '' AS c10, '' AS c11, '' AS c12, '' AS c13, '' AS c14, '' AS c15, '' AS c16, '' AS c17, '' AS c18, '' AS c19, '' AS c20, '' AS c21, '' AS c22, '' AS c23, '' AS c24, '' AS c25, '' AS c26, '' AS c27, '' AS c28, '' AS c29, '' AS c30, '' AS c31, '' AS c32, '' AS c33, '' AS c34, '' AS c35, '' AS c36, '' AS c37, '' AS c38, '' AS c39, '' AS c40, '' AS c41, '' AS c42, '' AS c43, '' AS c44, '' AS c45, '' AS c46, '' AS c47, '' AS c48, '' AS c49, '' AS c50, '' AS c51, '' AS c52, '' AS c53, '' AS c54, '' AS c55, '' AS c56, '' AS c57, '' AS c58, '' AS c59, '' AS c60, '' AS c61, '' AS c62, '' AS c63, '' AS c64)
+GROUP BY ALL WITH CUBE; -- { serverError TOO_MANY_COLUMNS }
diff --git a/tests/queries/0_stateless/data_parquet/ipv6_bloom_filter.gz.parquet b/tests/queries/0_stateless/data_parquet/ipv6_bloom_filter.gz.parquet
new file mode 100644
index 00000000000..335fddc1f0a
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/ipv6_bloom_filter.gz.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/multi_column_bf.gz.parquet b/tests/queries/0_stateless/data_parquet/multi_column_bf.gz.parquet
new file mode 100644
index 00000000000..bfa0a39062a
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/multi_column_bf.gz.parquet differ
diff --git a/tests/queries/0_stateless/format_schemas/03250.proto b/tests/queries/0_stateless/format_schemas/03250.proto
new file mode 100644
index 00000000000..aaeea4a386e
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/03250.proto
@@ -0,0 +1,5 @@
+syntax = "proto3";
+
+message Numbers {
+  uint64 number = 1;
+};
diff --git a/tests/queries/0_stateless/format_schemas/03252_recursive_type.proto b/tests/queries/0_stateless/format_schemas/03252_recursive_type.proto
new file mode 100644
index 00000000000..49c56309003
--- /dev/null
+++ b/tests/queries/0_stateless/format_schemas/03252_recursive_type.proto
@@ -0,0 +1,62 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Based on Google's struct.proto (see above license)
+
+syntax = "proto3";
+
+message Struct {
+  map<string, Value> fields = 1;
+}
+
+message Value {
+  oneof kind {
+    NullValue null_value = 1;
+    double number_value = 2;
+    string string_value = 3;
+    bool bool_value = 4;
+    Struct struct_value = 5;
+    ListValue list_value = 6;
+  }
+}
+
+enum NullValue {
+  NULL_VALUE = 0;
+}
+
+message ListValue {
+  repeated Value values = 1;
+}
+
+message Message {
+  string event = 1;
+  Struct payload = 2;
+}
diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh
index 6e155aee1df..a2782083e7b 100755
--- a/tests/queries/1_stateful/00177_memory_bound_merging.sh
+++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh
@@ -15,7 +15,8 @@ check_replicas_read_in_order() {
         SELECT COUNT() > 0
         FROM system.text_log
         WHERE query_id IN (SELECT query_id FROM system.query_log WHERE query_id != '$1' AND initial_query_id = '$1' AND event_date >= yesterday())
-            AND event_date >= yesterday() AND message ILIKE '%Reading%ranges in order%'"
+            AND event_date >= yesterday() AND message ILIKE '%Reading%ranges in order%'
+        SETTINGS max_rows_to_read=0"
 }
 
 # replicas should use reading in order following initiator's decision to execute aggregation in order.
diff --git a/tests/queries/1_stateful/00183_prewhere_conditions_order.reference b/tests/queries/1_stateful/00183_prewhere_conditions_order.reference
new file mode 100644
index 00000000000..22b4d5da243
--- /dev/null
+++ b/tests/queries/1_stateful/00183_prewhere_conditions_order.reference
@@ -0,0 +1,2 @@
+                Prewhere filter column: and(like(__table1.Title, \'%Google%\'_String), notLike(__table1.URL, \'%.google.%\'_String), notEquals(__table1.SearchPhrase, \'\'_String)) (removed)
+                Prewhere filter column: and(notEquals(__table1.SearchPhrase, \'\'_String), like(__table1.Title, \'%Google%\'_String), notLike(__table1.URL, \'%.google.%\'_String)) (removed)
diff --git a/tests/queries/1_stateful/00183_prewhere_conditions_order.sql b/tests/queries/1_stateful/00183_prewhere_conditions_order.sql
new file mode 100644
index 00000000000..63b69b0469c
--- /dev/null
+++ b/tests/queries/1_stateful/00183_prewhere_conditions_order.sql
@@ -0,0 +1,28 @@
+SET optimize_move_to_prewhere = 1;
+SET enable_multiple_prewhere_read_steps = 1;
+
+SELECT explain
+FROM (
+EXPLAIN actions=1
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID)
+FROM test.hits
+WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> ''
+GROUP BY SearchPhrase
+ORDER BY c DESC
+LIMIT 10
+SETTINGS allow_reorder_prewhere_conditions = 0
+)
+WHERE explain like '%Prewhere filter column%';
+
+SELECT explain
+FROM (
+EXPLAIN actions=1
+SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID)
+FROM test.hits
+WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> ''
+GROUP BY SearchPhrase
+ORDER BY c DESC
+LIMIT 10
+SETTINGS allow_reorder_prewhere_conditions = 1
+)
+WHERE explain like '%Prewhere filter column%';
diff --git a/utils/check-style/experimental_settings_ignore.txt b/utils/check-style/experimental_settings_ignore.txt
index 3eda9821799..30004cea2fc 100644
--- a/utils/check-style/experimental_settings_ignore.txt
+++ b/utils/check-style/experimental_settings_ignore.txt
@@ -47,3 +47,4 @@ allow_settings_after_format_in_insert
 allow_statistic_optimize
 allow_statistics_optimize
 allow_unrestricted_reads_from_keeper
+allow_reorder_prewhere_conditions