Merge branch 'master' into zvonand-implicit-tz

2024-11-22 15:42:02 +00:00 · 2023-06-20 00:24:53 +02:00 · 2023-06-20 00:24:53 +02:00 · 07191ce10d
commit 07191ce10d
parent 43e572bd84 959f66eb8a
164 changed files with 2118 additions and 913 deletions
--- a/.github/workflows/woboq.yml
+++ b/.github/workflows/woboq.yml
@ -12,6 +12,7 @@ jobs:
  # don't use dockerhub push because this image updates so rarely
  WoboqCodebrowser:
    runs-on: [self-hosted, style-checker]
+    timeout-minutes: 420  # the task is pretty heavy, so there's an additional hour
    steps:
      - name: Set envs
        run: |
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -4,7 +4,7 @@ if (SANITIZE OR NOT (
 ))
    if (ENABLE_JEMALLOC)
        message (${RECONFIGURE_MESSAGE_LEVEL}
-                 "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
+                 "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds. Use -DENABLE_JEMALLOC=0")
    endif ()
    set (ENABLE_JEMALLOC OFF)
 else ()
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
    esac

 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.5.2.7"
+ARG VERSION="23.5.3.24"
 ARG PACKAGES="clickhouse-keeper"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.5.2.7"
+ARG VERSION="23.5.3.24"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.5.2.7"
+ARG VERSION="23.5.3.24"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/sqlancer/process_sqlancer_result.py
+++ b/docker/test/sqlancer/process_sqlancer_result.py
@ -16,7 +16,6 @@ def process_result(result_folder):
        "TLPGroupBy",
        "TLPHaving",
        "TLPWhere",
-        "TLPWhereGroupBy",
        "NoREC",
    ]
    failed_tests = []
--- a/docker/test/sqlancer/run.sh
+++ b/docker/test/sqlancer/run.sh
@ -33,7 +33,7 @@ cd /workspace

 for _ in $(seq 1 60); do if [[ $(wget -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done

-cd /sqlancer/sqlancer-master
+cd /sqlancer/sqlancer-main

 TIMEOUT=300
 NUM_QUERIES=1000
--- a/docs/changelogs/v22.8.19.10-lts.md
+++ b/docs/changelogs/v22.8.19.10-lts.md
@ -0,0 +1,19 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.8.19.10-lts (989bc2fe8b0) FIXME as compared to v22.8.18.31-lts (4de7a95a544)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
+* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.3.4.17-lts.md
+++ b/docs/changelogs/v23.3.4.17-lts.md
@ -0,0 +1,22 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.3.4.17-lts (2c99b73ff40) FIXME as compared to v23.3.3.52-lts (cb963c474db)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
+* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
+* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.4.4.16-stable.md
+++ b/docs/changelogs/v23.4.4.16-stable.md
@ -0,0 +1,22 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.4.4.16-stable (747ba4fc6a0) FIXME as compared to v23.4.3.48-stable (d9199f8d3cc)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix crash when Pool::Entry::disconnect() is called [#50334](https://github.com/ClickHouse/ClickHouse/pull/50334) ([Val Doroshchuk](https://github.com/valbok)).
+* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
+* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.5.3.24-stable.md
+++ b/docs/changelogs/v23.5.3.24-stable.md
@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.5.3.24-stable (76f54616d3b) FIXME as compared to v23.5.2.7-stable (5751aa1ab9f)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)).
+* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)).
+* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)).
+* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)).
+* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)).
+* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/engines/table-engines/integrations/hdfs.md
+++ b/docs/en/engines/table-engines/integrations/hdfs.md
@ -233,6 +233,12 @@ libhdfs3 support HDFS namenode HA.
 - `_path` — Path to the file.
 - `_file` — Name of the file.

+## Storage Settings {#storage-settings}
+
+- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+
 **See Also**

 - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@ -35,6 +35,10 @@ The table structure can differ from the original MySQL table structure:
 - Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types.
 - The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays.

+:::note
+The MySQL Table Engine is currently not available on the ClickHouse builds for MacOS ([issue](https://github.com/ClickHouse/ClickHouse/issues/21191))
+:::
+
 **Engine Parameters**

 - `host:port` — MySQL server address.
--- a/docs/en/engines/table-engines/integrations/redis.md
+++ b/docs/en/engines/table-engines/integrations/redis.md
@ -1,5 +1,5 @@
 ---
-slug: /en/sql-reference/table-functions/redis
+slug: /en/engines/table-engines/integrations/redis
 sidebar_position: 43
 sidebar_label: Redis
 ---
@ -34,7 +34,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name
 - `primary` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a Redis key.

 - columns other than the primary key will be serialized in binary as Redis value in corresponding order.
-  
+
 - queries with key equals or in filtering will be optimized to multi keys lookup from Redis. If queries without filtering key full table scan will happen which is a heavy operation.

 ## Usage Example {#usage-example}
--- a/docs/en/engines/table-engines/integrations/s3.md
+++ b/docs/en/engines/table-engines/integrations/s3.md
@ -127,6 +127,12 @@ CREATE TABLE table_with_asterisk (name String, value UInt32)
    ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV');
 ```

+## Storage Settings {#storage-settings}
+
+- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+
 ## S3-related Settings {#settings}

 The following settings can be set before query execution or placed into configuration file.
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -853,7 +853,7 @@ Tags:
 - `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
 - `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
 - `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
+- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default (if enabled) if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). If disabled then already expired data part is written into a default volume and then right after moved to TTL volume.
 - `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`.

 Configuration examples:
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@ -92,3 +92,11 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
 `PARTITION BY` — Optional.  It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).

 For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
+
+## Settings {#settings}
+
+- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
+- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
--- a/docs/en/engines/table-engines/special/url.md
+++ b/docs/en/engines/table-engines/special/url.md
@ -102,3 +102,7 @@ SELECT * FROM url_engine_table
 `PARTITION BY` — Optional.  It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression).

 For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
+
+## Storage Settings {#storage-settings}
+
+- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -470,6 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
 - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
 - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
+- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.

 ## CSVWithNames {#csvwithnames}

@ -1877,13 +1878,13 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `string (uuid)` \**                         | [UUID](/docs/en/sql-reference/data-types/uuid.md)                                                                             | `string (uuid)` \**           |
 | `fixed(16)`                                 | [Int128/UInt128](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `fixed(16)`                   |
 | `fixed(32)`                                 | [Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md)                                                               | `fixed(32)`                   |
+| `record`                                    | [Tuple](/docs/en/sql-reference/data-types/tuple.md)                                                                           | `record`                      |
+


 \* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern)
 \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types)

-Unsupported Avro data types: `record` (non-root), `map`
-
 Unsupported Avro logical data types: `time-millis`, `time-micros`, `duration`

 ### Inserting Data {#inserting-data-1}
@ -1922,7 +1923,26 @@ Output Avro file compression and sync interval can be configured with [output_fo

 Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:

-``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
+```
+DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro);
+```
+```
+┌─name───────────────────────┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
+│ WatchID                    │ Int64           │              │                    │         │                  │                │
+│ JavaEnable                 │ Int32           │              │                    │         │                  │                │
+│ Title                      │ String          │              │                    │         │                  │                │
+│ GoodEvent                  │ Int32           │              │                    │         │                  │                │
+│ EventTime                  │ Int32           │              │                    │         │                  │                │
+│ EventDate                  │ Date32          │              │                    │         │                  │                │
+│ CounterID                  │ Int32           │              │                    │         │                  │                │
+│ ClientIP                   │ Int32           │              │                    │         │                  │                │
+│ ClientIP6                  │ FixedString(16) │              │                    │         │                  │                │
+│ RegionID                   │ Int32           │              │                    │         │                  │                │
+...
+│ IslandID                   │ FixedString(16) │              │                    │         │                  │                │
+│ RequestNum                 │ Int32           │              │                    │         │                  │                │
+│ RequestTry                 │ Int32           │              │                    │         │                  │                │
+└────────────────────────────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
 ```

 ## AvroConfluent {#data-format-avro-confluent}
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -932,6 +932,38 @@ Result
 "  string  "
 ```

+### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
+
+Allow to use whitespace or tab as field delimiter in CSV strings.
+
+Default value: `false`.
+
+**Examples**
+
+Query
+
+```bash
+echo 'a b' | ./clickhouse local -q  "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter=' '
+```
+
+Result
+
+```text
+a  b
+```
+
+Query
+
+```bash
+echo 'a         b' | ./clickhouse local -q  "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter='\t'
+```
+
+Result
+
+```text
+a  b
+```
+
 ## Values format settings {#values-format-settings}

 ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -3328,7 +3328,35 @@ Possible values:

 Default value: `0`.

-## s3_truncate_on_insert
+## engine_file_allow_create_multiple_files {#engine_file_allow_create_multiple_files}
+
+Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
+
+`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
+
+Possible values:
+- 0 — `INSERT` query appends new data to the end of the file.
+- 1 — `INSERT` query creates a new file.
+
+Default value: `0`.
+
+## engine_file_skip_empty_files {#engine_file_skip_empty_files}
+
+Enables or disables skipping empty files in [File](../../engines/table-engines/special/file.md) engine tables.
+
+Possible values:
+- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
+- 1 — `SELECT` returns empty result for empty file.
+
+Default value: `0`.
+
+## storage_file_read_method {#storage_file_read_method}
+
+Method of reading data from storage file, one of: `read`, `pread`, `mmap`. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).
+
+Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
+
+## s3_truncate_on_insert {#s3_truncate_on_insert}

 Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists.

@ -3338,7 +3366,29 @@ Possible values:

 Default value: `0`.

-## hdfs_truncate_on_insert
+## s3_create_new_file_on_insert {#s3_create_new_file_on_insert}
+
+Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
+
+initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
+
+Possible values:
+- 0 — `INSERT` query appends new data to the end of the file.
+- 1 — `INSERT` query creates a new file.
+
+Default value: `0`.
+
+## s3_skip_empty_files {#s3_skip_empty_files}
+
+Enables or disables skipping empty files in [S3](../../engines/table-engines/integrations/s3.md) engine tables.
+
+Possible values:
+- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
+- 1 — `SELECT` returns empty result for empty file.
+
+Default value: `0`.
+
+## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}

 Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.

@ -3348,31 +3398,7 @@ Possible values:

 Default value: `0`.

-## engine_file_allow_create_multiple_files
-
-Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern:
-
-`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc.
-
-Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
-
-Default value: `0`.
-
-## s3_create_new_file_on_insert
-
-Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern:
-
-initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.
-
-Possible values:
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
-
-Default value: `0`.
-
-## hdfs_create_new_file_on_insert
+## hdfs_create_new_file_on_insert {#hdfs_create_new_file_on_insert

 Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern:

@ -3380,7 +3406,27 @@ initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc.

 Possible values:
 - 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` query replaces existing content of the file with the new data.
+- 1 — `INSERT` query creates a new file.
+
+Default value: `0`.
+
+## hdfs_skip_empty_files {#hdfs_skip_empty_files}
+
+Enables or disables skipping empty files in [HDFS](../../engines/table-engines/integrations/hdfs.md) engine tables.
+
+Possible values:
+- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
+- 1 — `SELECT` returns empty result for empty file.
+
+Default value: `0`.
+
+## engine_url_skip_empty_files {#engine_url_skip_empty_files}
+
+Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.
+
+Possible values:
+- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
+- 1 — `SELECT` returns empty result for empty file.

 Default value: `0`.

--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@ -196,6 +196,16 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
 - `_path` — Path to the file.
 - `_file` — Name of the file.

+## Settings
+
+- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
+- [engine_file_truncate_on_insert](/docs/en/operations/settings/settings.md#engine-file-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
+
+
+
 **See Also**

 - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/en/sql-reference/table-functions/hdfs.md
+++ b/docs/en/sql-reference/table-functions/hdfs.md
@ -97,6 +97,12 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
 - `_path` — Path to the file.
 - `_file` — Name of the file.

+## Storage Settings {#storage-settings}
+
+- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+
 **See Also**

 - [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/en/sql-reference/table-functions/redis.md
+++ b/docs/en/sql-reference/table-functions/redis.md
@ -1,10 +1,10 @@
 ---
 slug: /en/sql-reference/table-functions/redis
-sidebar_position: 10
-sidebar_label: Redis
+sidebar_position: 43
+sidebar_label: redis
 ---

-# Redis
+# redis

 This table function allows integrating ClickHouse with [Redis](https://redis.io/).

--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@ -202,6 +202,12 @@ FROM s3(
 LIMIT 5;
 ```

+## Storage Settings {#storage-settings}
+
+- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
+- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
+- [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+
 **See Also**

 - [S3 engine](../../engines/table-engines/integrations/s3.md)
--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@ -53,6 +53,10 @@ Character `|` inside patterns is used to specify failover addresses. They are it
 - `_path` — Path to the `URL`.
 - `_file` — Resource name of the `URL`.

+## Storage Settings {#storage-settings}
+
+- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
+
 **See Also**

 - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -409,8 +409,15 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
    list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
 endif ()
 if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
-    add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
-    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    if (NOT BUILD_STANDALONE_KEEPER)
+        add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
+        install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    # symlink to standalone keeper binary
+    else ()
+        add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse-keeper clickhouse-keeper-client DEPENDS clickhouse-keeper)
+        install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION "${CMAKE_INSTALL_BINDIR}" COMPONENT clickhouse-keeper)
+    endif ()
+
    list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
 endif ()
 if (ENABLE_CLICKHOUSE_DISKS)
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -112,6 +112,18 @@ if (BUILD_STANDALONE_KEEPER)
        clickhouse-keeper.cpp
    )

+    # List of resources for clickhouse-keeper client
+    if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
+        list(APPEND CLICKHOUSE_KEEPER_STANDALONE_SOURCES
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/KeeperClient.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Commands.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../programs/keeper-client/Parser.cpp
+
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/LineReader.cpp
+            ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Client/ReplxxLineReader.cpp
+        )
+    endif()
+
    clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})

    # Remove some redundant dependencies
@ -122,6 +134,10 @@ if (BUILD_STANDALONE_KEEPER)
    target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core
    target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common

+    if (ENABLE_CLICKHOUSE_KEEPER_CLIENT AND TARGET ch_rust::skim)
+        target_link_libraries(clickhouse-keeper PRIVATE ch_rust::skim)
+    endif()
+
    target_link_libraries(clickhouse-keeper
        PRIVATE
            ch_contrib::abseil_swiss_tables
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -34,6 +34,8 @@
 #include "Core/Defines.h"
 #include "config.h"
 #include "config_version.h"
+#include "config_tools.h"
+

 #if USE_SSL
 #    include <Poco/Net/Context.h>
@ -131,7 +133,10 @@ int Keeper::run()
    if (config().hasOption("help"))
    {
        Poco::Util::HelpFormatter help_formatter(Keeper::options());
-        auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n"
+        auto header_str = fmt::format("{0} [OPTION] [-- [ARG]...]\n"
+#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
+                                      "{0} client [OPTION]\n"
+#endif
                                      "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010",
                                      commandName());
        help_formatter.setHeader(header_str);
--- a/programs/keeper/clickhouse-keeper.cpp
+++ b/programs/keeper/clickhouse-keeper.cpp
@ -1,6 +1,30 @@
+#include <Common/StringUtils/StringUtils.h>
+#include "config_tools.h"
+
+
 int mainEntryClickHouseKeeper(int argc, char ** argv);

+#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
+int mainEntryClickHouseKeeperClient(int argc, char ** argv);
+#endif
+
 int main(int argc_, char ** argv_)
 {
+#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
+
+    if (argc_ >= 2)
+    {
+        /// 'clickhouse-keeper --client ...' and 'clickhouse-keeper client ...' are OK
+        if (strcmp(argv_[1], "--client") == 0 || strcmp(argv_[1], "client") == 0)
+        {
+            argv_[1] = argv_[0];
+            return mainEntryClickHouseKeeperClient(--argc_, argv_ + 1);
+        }
+    }
+
+    if (argc_ > 0 && (strcmp(argv_[0], "clickhouse-keeper-client") == 0 || endsWith(argv_[0], "/clickhouse-keeper-client")))
+        return mainEntryClickHouseKeeperClient(argc_, argv_);
+#endif
+
    return mainEntryClickHouseKeeper(argc_, argv_);
 }
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -1705,7 +1705,6 @@ try
 #endif

        /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
-
        async_metrics.start();

        {
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -333,7 +333,7 @@ void ContextAccess::calculateAccessRights() const
                boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "),
                boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
        }
-        LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", params.readonly, params.allow_ddl, params.allow_introspection);
+        LOG_TRACE(trace_log, "Settings: readonly = {}, allow_ddl = {}, allow_introspection_functions = {}", params.readonly, params.allow_ddl, params.allow_introspection);
        LOG_TRACE(trace_log, "List of all grants: {}", access->toString());
        LOG_TRACE(trace_log, "List of all grants including implicit: {}", access_with_implicit->toString());
    }
--- a/src/Backups/BackupCoordinationStageSync.cpp
+++ b/src/Backups/BackupCoordinationStageSync.cpp
@ -85,6 +85,9 @@ void BackupCoordinationStageSync::setError(const String & current_host, const Ex
        writeException(exception, buf, true);
        zookeeper->createIfNotExists(zookeeper_path + "/error", buf.str());

+        /// When backup/restore fails, it removes the nodes from Zookeeper.
+        /// Sometimes it fails to remove all nodes. It's possible that it removes /error node, but fails to remove /stage node,
+        /// so the following line tries to preserve the error status.
        auto code = zookeeper->trySet(zookeeper_path, Stage::ERROR);
        if (code != Coordination::Error::ZOK)
            throw zkutil::KeeperException(code, zookeeper_path);
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@ -152,8 +152,7 @@ namespace
        }
        catch (...)
        {
-            if (coordination)
-                coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
+            sendExceptionToCoordination(coordination, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
        }
    }

--- a/src/Client/ConnectionPool.cpp
+++ b/src/Client/ConnectionPool.cpp
@ -18,7 +18,7 @@ ConnectionPoolPtr ConnectionPoolFactory::get(
    String client_name,
    Protocol::Compression compression,
    Protocol::Secure secure,
-    Int64 priority)
+    Priority priority)
 {
    Key key{
        max_connections, host, port, default_database, user, password, quota_key, cluster, cluster_secret, client_name, compression, secure, priority};
@ -74,7 +74,7 @@ size_t ConnectionPoolFactory::KeyHash::operator()(const ConnectionPoolFactory::K
    hash_combine(seed, hash_value(k.client_name));
    hash_combine(seed, hash_value(k.compression));
    hash_combine(seed, hash_value(k.secure));
-    hash_combine(seed, hash_value(k.priority));
+    hash_combine(seed, hash_value(k.priority.value));
    return seed;
 }

--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Common/PoolBase.h>
+#include <Common/Priority.h>
 #include <Client/Connection.h>
 #include <IO/ConnectionTimeouts.h>
 #include <Core/Settings.h>
@ -34,7 +35,7 @@ public:
                      const Settings * settings = nullptr,
                      bool force_connected = true) = 0;

-    virtual Int64 getPriority() const { return 1; }
+    virtual Priority getPriority() const { return Priority{1}; }
 };

 using ConnectionPoolPtr = std::shared_ptr<IConnectionPool>;
@ -60,7 +61,7 @@ public:
            const String & client_name_,
            Protocol::Compression compression_,
            Protocol::Secure secure_,
-            Int64 priority_ = 1)
+            Priority priority_ = Priority{1})
       : Base(max_connections_,
        &Poco::Logger::get("ConnectionPool (" + host_ + ":" + toString(port_) + ")")),
        host(host_),
@ -103,7 +104,7 @@ public:
        return host + ":" + toString(port);
    }

-    Int64 getPriority() const override
+    Priority getPriority() const override
    {
        return priority;
    }
@ -134,7 +135,7 @@ private:
    String client_name;
    Protocol::Compression compression; /// Whether to compress data when interacting with the server.
    Protocol::Secure secure;           /// Whether to encrypt data when interacting with the server.
-    Int64 priority;                    /// priority from <remote_servers>
+    Priority priority;                 /// priority from <remote_servers>
 };

 /**
@ -157,7 +158,7 @@ public:
        String client_name;
        Protocol::Compression compression;
        Protocol::Secure secure;
-        Int64 priority;
+        Priority priority;
    };

    struct KeyHash
@ -180,7 +181,7 @@ public:
        String client_name,
        Protocol::Compression compression,
        Protocol::Secure secure,
-        Int64 priority);
+        Priority priority);
 private:
    mutable std::mutex mutex;
    using ConnectionPoolWeakPtr = std::weak_ptr<IConnectionPool>;
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -71,7 +71,7 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
    return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority);
 }

-Int64 ConnectionPoolWithFailover::getPriority() const
+Priority ConnectionPoolWithFailover::getPriority() const
 {
    return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b)
    {
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@ -48,7 +48,7 @@ public:
              const Settings * settings,
              bool force_connected) override; /// From IConnectionPool

-    Int64 getPriority() const override; /// From IConnectionPool
+    Priority getPriority() const override; /// From IConnectionPool

    /** Allocates up to the specified number of connections to work.
      * Connections provide access to different replicas of one shard.
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@ -151,13 +151,13 @@ public:

    ColumnPtr compress() const override;

-    void forEachSubcolumn(ColumnCallback callback) const override
+    void forEachSubcolumn(MutableColumnCallback callback) override
    {
        callback(offsets);
        callback(data);
    }

-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
    {
        callback(*offsets);
        offsets->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@ -230,12 +230,12 @@ public:
        data->getExtremes(min, max);
    }

-    void forEachSubcolumn(ColumnCallback callback) const override
+    void forEachSubcolumn(MutableColumnCallback callback) override
    {
        callback(data);
    }

-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
    {
        callback(*data);
        data->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@ -166,7 +166,7 @@ public:
    size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
    size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }

-    void forEachSubcolumn(ColumnCallback callback) const override
+    void forEachSubcolumn(MutableColumnCallback callback) override
    {
        callback(idx.getPositionsPtr());

@ -175,7 +175,7 @@ public:
            callback(dictionary.getColumnUniquePtr());
    }

-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
    {
        callback(*idx.getPositionsPtr());
        idx.getPositionsPtr()->forEachSubcolumnRecursively(callback);
@ -340,7 +340,7 @@ private:
        explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
        explicit Dictionary(ColumnPtr column_unique, bool is_shared);

-        const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
+        const WrappedPtr & getColumnUniquePtr() const { return column_unique; }
        WrappedPtr & getColumnUniquePtr() { return column_unique; }

        const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@ -273,12 +273,12 @@ void ColumnMap::getExtremes(Field & min, Field & max) const
    max = std::move(map_max_value);
 }

-void ColumnMap::forEachSubcolumn(ColumnCallback callback) const
+void ColumnMap::forEachSubcolumn(MutableColumnCallback callback)
 {
    callback(nested);
 }

-void ColumnMap::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
+void ColumnMap::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
 {
    callback(*nested);
    nested->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -88,8 +88,8 @@ public:
    size_t byteSizeAt(size_t n) const override;
    size_t allocatedBytes() const override;
    void protect() override;
-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+    void forEachSubcolumn(MutableColumnCallback callback) override;
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
    bool structureEquals(const IColumn & rhs) const override;
    double getRatioOfDefaultRows(double sample_ratio) const override;
    UInt64 getNumberOfDefaultRows() const override;
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@ -130,13 +130,13 @@ public:

    ColumnPtr compress() const override;

-    void forEachSubcolumn(ColumnCallback callback) const override
+    void forEachSubcolumn(MutableColumnCallback callback) override
    {
        callback(nested_column);
        callback(null_map);
    }

-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override
    {
        callback(*nested_column);
        nested_column->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@ -664,18 +664,18 @@ size_t ColumnObject::allocatedBytes() const
    return res;
 }

-void ColumnObject::forEachSubcolumn(ColumnCallback callback) const
+void ColumnObject::forEachSubcolumn(MutableColumnCallback callback)
 {
-    for (const auto & entry : subcolumns)
-        for (const auto & part : entry->data.data)
+    for (auto & entry : subcolumns)
+        for (auto & part : entry->data.data)
            callback(part);
 }

-void ColumnObject::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
+void ColumnObject::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
 {
-    for (const auto & entry : subcolumns)
+    for (auto & entry : subcolumns)
    {
-        for (const auto & part : entry->data.data)
+        for (auto & part : entry->data.data)
        {
            callback(*part);
            part->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@ -206,8 +206,8 @@ public:
    size_t size() const override;
    size_t byteSize() const override;
    size_t allocatedBytes() const override;
-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+    void forEachSubcolumn(MutableColumnCallback callback) override;
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
    void insert(const Field & field) override;
    void insertDefault() override;
    void insertFrom(const IColumn & src, size_t n) override;
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@ -751,13 +751,13 @@ bool ColumnSparse::structureEquals(const IColumn & rhs) const
    return false;
 }

-void ColumnSparse::forEachSubcolumn(ColumnCallback callback) const
+void ColumnSparse::forEachSubcolumn(MutableColumnCallback callback)
 {
    callback(values);
    callback(offsets);
 }

-void ColumnSparse::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
+void ColumnSparse::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
 {
    callback(*values);
    values->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@ -140,8 +140,8 @@ public:

    ColumnPtr compress() const override;

-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+    void forEachSubcolumn(MutableColumnCallback callback) override;
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;

    bool structureEquals(const IColumn & rhs) const override;

--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@ -31,14 +31,12 @@ ColumnString::ColumnString(const ColumnString & src)
    offsets(src.offsets.begin(), src.offsets.end()),
    chars(src.chars.begin(), src.chars.end())
 {
-    if (!offsets.empty())
-    {
-        Offset last_offset = offsets.back();
-
-        /// This will also prevent possible overflow in offset.
-        if (chars.size() != last_offset)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "String offsets has data inconsistent with chars array");
-    }
+    Offset last_offset = offsets.empty() ? 0 : offsets.back();
+    /// This will also prevent possible overflow in offset.
+    if (last_offset != chars.size())
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+            "String offsets has data inconsistent with chars array. Last offset: {}, array length: {}",
+            last_offset, chars.size());
 }


@ -157,6 +155,7 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
    Offsets & res_offsets = res->offsets;

    filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
+
    return res;
 }

@ -571,10 +570,11 @@ void ColumnString::protect()

 void ColumnString::validate() const
 {
-    if (!offsets.empty() && offsets.back() != chars.size())
+    Offset last_offset = offsets.empty() ? 0 : offsets.back();
+    if (last_offset != chars.size())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
                        "ColumnString validation failed: size mismatch (internal logical error) {} != {}",
-                        offsets.back(), chars.size());
+                        last_offset, chars.size());
 }

 }
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@ -495,15 +495,15 @@ void ColumnTuple::getExtremes(Field & min, Field & max) const
    max = max_tuple;
 }

-void ColumnTuple::forEachSubcolumn(ColumnCallback callback) const
+void ColumnTuple::forEachSubcolumn(MutableColumnCallback callback)
 {
-    for (const auto & column : columns)
+    for (auto & column : columns)
        callback(column);
 }

-void ColumnTuple::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
+void ColumnTuple::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
 {
-    for (const auto & column : columns)
+    for (auto & column : columns)
    {
        callback(*column);
        column->forEachSubcolumnRecursively(callback);
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@ -96,8 +96,8 @@ public:
    size_t byteSizeAt(size_t n) const override;
    size_t allocatedBytes() const override;
    void protect() override;
-    void forEachSubcolumn(ColumnCallback callback) const override;
-    void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override;
+    void forEachSubcolumn(MutableColumnCallback callback) override;
+    void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
    bool structureEquals(const IColumn & rhs) const override;
    bool isCollationSupported() const override;
    ColumnPtr compress() const override;
--- a/src/Columns/IColumn.cpp
+++ b/src/Columns/IColumn.cpp
@ -62,19 +62,19 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const Field & defa
    return res;
 }

-void IColumn::forEachSubcolumn(MutableColumnCallback callback)
+void IColumn::forEachSubcolumn(ColumnCallback callback) const
 {
-    std::as_const(*this).forEachSubcolumn([&callback](const WrappedPtr & subcolumn)
+    const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)
    {
-        callback(const_cast<WrappedPtr &>(subcolumn));
+        callback(std::as_const(subcolumn));
    });
 }

-void IColumn::forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback)
+void IColumn::forEachSubcolumnRecursively(RecursiveColumnCallback callback) const
 {
-    std::as_const(*this).forEachSubcolumnRecursively([&callback](const IColumn & subcolumn)
+    const_cast<IColumn*>(this)->forEachSubcolumnRecursively([&callback](IColumn & subcolumn)
    {
-        callback(const_cast<IColumn &>(subcolumn));
+        callback(std::as_const(subcolumn));
    });
 }

--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@ -418,21 +418,23 @@ public:
    /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
    /// Shallow: doesn't do recursive calls; don't do call for itself.

-    using ColumnCallback = std::function<void(const WrappedPtr &)>;
-    virtual void forEachSubcolumn(ColumnCallback) const {}
-
    using MutableColumnCallback = std::function<void(WrappedPtr &)>;
-    virtual void forEachSubcolumn(MutableColumnCallback callback);
+    virtual void forEachSubcolumn(MutableColumnCallback) {}
+
+    /// Default implementation calls the mutable overload using const_cast.
+    using ColumnCallback = std::function<void(const WrappedPtr &)>;
+    virtual void forEachSubcolumn(ColumnCallback) const;

    /// Similar to forEachSubcolumn but it also do recursive calls.
    /// In recursive calls it's prohibited to replace pointers
    /// to subcolumns, so we use another callback function.

-    using RecursiveColumnCallback = std::function<void(const IColumn &)>;
-    virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const {}
-
    using RecursiveMutableColumnCallback = std::function<void(IColumn &)>;
-    virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback);
+    virtual void forEachSubcolumnRecursively(RecursiveMutableColumnCallback) {}
+
+    /// Default implementation calls the mutable overload using const_cast.
+    using RecursiveColumnCallback = std::function<void(const IColumn &)>;
+    virtual void forEachSubcolumnRecursively(RecursiveColumnCallback) const;

    /// Columns have equal structure.
    /// If true - you can use "compareAt", "insertFrom", etc. methods.
--- a/src/Common/DNSResolver.cpp
+++ b/src/Common/DNSResolver.cpp
@ -104,7 +104,7 @@ DNSResolver::IPAddresses hostByName(const std::string & host)
    }
    catch (const Poco::Net::DNSException & e)
    {
-        LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
+        LOG_WARNING(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
        addresses.clear();
    }

--- a/src/Common/GetPriorityForLoadBalancing.cpp
+++ b/src/Common/GetPriorityForLoadBalancing.cpp
@ -1,4 +1,5 @@
 #include <Common/GetPriorityForLoadBalancing.h>
+#include <Common/Priority.h>

 namespace DB
 {
@ -8,23 +9,23 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
+std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
 {
-    std::function<size_t(size_t index)> get_priority;
+    std::function<Priority(size_t index)> get_priority;
    switch (load_balance)
    {
        case LoadBalancing::NEAREST_HOSTNAME:
            if (hostname_differences.empty())
                throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
-            get_priority = [this](size_t i) { return hostname_differences[i]; };
+            get_priority = [this](size_t i) { return Priority{static_cast<Int64>(hostname_differences[i])}; };
            break;
        case LoadBalancing::IN_ORDER:
-            get_priority = [](size_t i) { return i; };
+            get_priority = [](size_t i) { return Priority{static_cast<Int64>(i)}; };
            break;
        case LoadBalancing::RANDOM:
            break;
        case LoadBalancing::FIRST_OR_RANDOM:
-            get_priority = [offset](size_t i) -> size_t { return i != offset; };
+            get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; };
            break;
        case LoadBalancing::ROUND_ROBIN:
            if (last_used >= pool_size)
@ -38,8 +39,8 @@ std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc
             * */
            get_priority = [this, pool_size](size_t i)
            {
-                ++i;
-                return i < last_used ? pool_size - i : i - last_used;
+                ++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does
+                return Priority{static_cast<Int64>(i < last_used ? pool_size - i : i - last_used)};
            };
            break;
    }
--- a/src/Common/GetPriorityForLoadBalancing.h
+++ b/src/Common/GetPriorityForLoadBalancing.h
@ -21,7 +21,7 @@ public:
        return !(*this == other);
    }

-    std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
+    std::function<Priority(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;

    std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.

--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@ -13,6 +13,7 @@
 #include <Common/NetException.h>
 #include <Common/Exception.h>
 #include <Common/randomSeed.h>
+#include <Common/Priority.h>


 namespace DB
@ -34,7 +35,7 @@ namespace ProfileEvents
 /// This class provides a pool with fault tolerance. It is used for pooling of connections to replicated DB.
 /// Initialized by several PoolBase objects.
 /// When a connection is requested, tries to create or choose an alive connection from one of the nested pools.
-/// Pools are tried in the order consistent with lexicographical order of (error count, priority, random number) tuples.
+/// Pools are tried in the order consistent with lexicographical order of (error count, slowdown count, config priority, priority, random number) tuples.
 /// Number of tries for a single pool is limited by max_tries parameter.
 /// The client can set nested pool priority by passing a GetPriority functor.
 ///
@ -113,7 +114,7 @@ public:

    /// The client can provide this functor to affect load balancing - the index of a pool is passed to
    /// this functor. The pools with lower result value will be tried first.
-    using GetPriorityFunc = std::function<size_t(size_t index)>;
+    using GetPriorityFunc = std::function<Priority(size_t index)>;

    /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
    /// The method will throw if it is unable to get min_entries alive connections or
@ -336,9 +337,9 @@ struct PoolWithFailoverBase<TNestedPool>::PoolState
    /// The number of slowdowns that led to changing replica in HedgedRequestsFactory
    UInt64 slowdown_count = 0;
    /// Priority from the <remote_server> configuration.
-    Int64 config_priority = 1;
+    Priority config_priority{1};
    /// Priority from the GetPriorityFunc.
-    Int64 priority = 0;
+    Priority priority{0};
    UInt64 random = 0;

    void randomize()
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@ -179,7 +179,7 @@ ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std

 std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
 {
-    std::function<size_t(size_t index)> get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size());
+    std::function<Priority(size_t index)> get_priority = args.get_priority_load_balancing.getPriorityFunc(args.get_priority_load_balancing.load_balancing, 0, args.hosts.size());
    std::vector<ShuffleHost> shuffle_hosts;
    for (size_t i = 0; i < args.hosts.size(); ++i)
    {
--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@ -49,7 +49,7 @@ constexpr size_t MULTI_BATCH_SIZE = 100;
 struct ShuffleHost
 {
    String host;
-    Int64 priority = 0;
+    Priority priority;
    UInt64 random = 0;

    void randomize()
--- a/src/Core/BaseSettings.h
+++ b/src/Core/BaseSettings.h
@ -2,6 +2,8 @@

 #include <Core/SettingsFields.h>
 #include <Common/SettingsChanges.h>
+#include <Common/FieldVisitorToString.h>
+#include <IO/Operators.h>
 #include <base/range.h>
 #include <boost/blank.hpp>
 #include <unordered_map>
@ -547,14 +549,16 @@ void BaseSettings<TTraits>::read(ReadBuffer & in, SettingsWriteFormat format)
 template <typename TTraits>
 String BaseSettings<TTraits>::toString() const
 {
-    String res;
-    for (const auto & field : *this)
+    WriteBufferFromOwnString out;
+    bool first = true;
+    for (const auto & setting : *this)
    {
-        if (!res.empty())
-            res += ", ";
-        res += field.getName() + " = " + field.getValueString();
+        if (!first)
+            out << ", ";
+        out << setting.getName() << " = " << applyVisitor(FieldVisitorToString(), setting.getValue());
+        first = false;
    }
-    return res;
+    return out.str();
 }

 template <typename TTraits>
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -96,6 +96,7 @@ class IColumn;
    M(Bool, s3_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables.", 0) \
    M(Bool, azure_truncate_on_insert, false, "Enables or disables truncate before insert in azure engine tables.", 0) \
    M(Bool, s3_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in s3 engine tables", 0) \
+    M(Bool, s3_skip_empty_files, false, "Allow to skip empty files in s3 table engine", 0) \
    M(Bool, azure_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in azure engine tables", 0) \
    M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
    M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
@ -105,6 +106,7 @@ class IColumn;
    M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
    M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
    M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
+    M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \
    M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
    M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
    M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \
@ -612,6 +614,8 @@ class IColumn;
    M(Bool, engine_file_empty_if_not_exists, false, "Allows to select data from a file engine table without file", 0) \
    M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
    M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
+    M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
+    M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
    M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
    M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
    M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
@ -861,6 +865,7 @@ class IColumn;
    M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
    M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
    M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
+    M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
    M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
    M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
    M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -54,6 +54,7 @@
 #include <Common/Elf.h>
 #include <Common/setThreadName.h>
 #include <Common/logger_useful.h>
+#include <Interpreters/Context.h>
 #include <filesystem>

 #include <Loggers/OwnFormattingChannel.h>
@ -80,7 +81,9 @@ namespace DB
    }
 }

-DB::PipeFDs signal_pipe;
+using namespace DB;
+
+PipeFDs signal_pipe;


 /** Reset signal handler to the default and send signal to itself.
@ -89,10 +92,10 @@ DB::PipeFDs signal_pipe;
 static void call_default_signal_handler(int sig)
 {
    if (SIG_ERR == signal(sig, SIG_DFL))
-        DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+        throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);

    if (0 != raise(sig))
-        DB::throwFromErrno("Cannot send signal.", DB::ErrorCodes::CANNOT_SEND_SIGNAL);
+        throwFromErrno("Cannot send signal.", ErrorCodes::CANNOT_SEND_SIGNAL);
 }

 static const size_t signal_pipe_buf_size =
@ -110,8 +113,8 @@ static void writeSignalIDtoSignalPipe(int sig)
    auto saved_errno = errno;   /// We must restore previous value of errno in signal handler.

    char buf[signal_pipe_buf_size];
-    DB::WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
-    DB::writeBinary(sig, out);
+    WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
+    writeBinary(sig, out);
    out.next();

    errno = saved_errno;
@ -141,17 +144,17 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
    auto saved_errno = errno;   /// We must restore previous value of errno in signal handler.

    char buf[signal_pipe_buf_size];
-    DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
+    WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);

    const ucontext_t * signal_context = reinterpret_cast<ucontext_t *>(context);
    const StackTrace stack_trace(*signal_context);

-    DB::writeBinary(sig, out);
-    DB::writePODBinary(*info, out);
-    DB::writePODBinary(signal_context, out);
-    DB::writePODBinary(stack_trace, out);
-    DB::writeBinary(static_cast<UInt32>(getThreadId()), out);
-    DB::writePODBinary(DB::current_thread, out);
+    writeBinary(sig, out);
+    writePODBinary(*info, out);
+    writePODBinary(signal_context, out);
+    writePODBinary(stack_trace, out);
+    writeBinary(static_cast<UInt32>(getThreadId()), out);
+    writePODBinary(current_thread, out);

    out.next();

@ -203,12 +206,12 @@ public:
        static_assert(PIPE_BUF >= 512);
        static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512");
        char buf[signal_pipe_buf_size];
-        DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);
+        ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf);

        while (!in.eof())
        {
            int sig = 0;
-            DB::readBinary(sig, in);
+            readBinary(sig, in);
            // We may log some specific signals afterwards, with different log
            // levels and more info, but for completeness we log all signals
            // here at trace level.
@ -231,8 +234,8 @@ public:
                UInt32 thread_num;
                std::string message;

-                DB::readBinary(thread_num, in);
-                DB::readBinary(message, in);
+                readBinary(thread_num, in);
+                readBinary(message, in);

                onTerminate(message, thread_num);
            }
@ -248,17 +251,17 @@ public:
                ucontext_t * context{};
                StackTrace stack_trace(NoCapture{});
                UInt32 thread_num{};
-                DB::ThreadStatus * thread_ptr{};
+                ThreadStatus * thread_ptr{};

                if (sig != SanitizerTrap)
                {
-                    DB::readPODBinary(info, in);
-                    DB::readPODBinary(context, in);
+                    readPODBinary(info, in);
+                    readPODBinary(context, in);
                }

-                DB::readPODBinary(stack_trace, in);
-                DB::readBinary(thread_num, in);
-                DB::readPODBinary(thread_ptr, in);
+                readPODBinary(stack_trace, in);
+                readBinary(thread_num, in);
+                readPODBinary(thread_ptr, in);

                /// This allows to receive more signals if failure happens inside onFault function.
                /// Example: segfault while symbolizing stack trace.
@ -298,9 +301,9 @@ private:
        ucontext_t * context,
        const StackTrace & stack_trace,
        UInt32 thread_num,
-        DB::ThreadStatus * thread_ptr) const
+        ThreadStatus * thread_ptr) const
    {
-        DB::ThreadStatus thread_status;
+        ThreadStatus thread_status;

        String query_id;
        String query;
@ -314,7 +317,7 @@ private:

            if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
            {
-                DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
+                CurrentThread::attachInternalTextLogsQueue(logs_queue, LogsLevel::trace);
            }
        }

@ -358,12 +361,12 @@ private:
            /// NOTE: This still require memory allocations and mutex lock inside logger.
            ///       BTW we can also print it to stderr using write syscalls.

-            DB::WriteBufferFromOwnString bare_stacktrace;
-            DB::writeString("Stack trace:", bare_stacktrace);
+            WriteBufferFromOwnString bare_stacktrace;
+            writeString("Stack trace:", bare_stacktrace);
            for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
            {
-                DB::writeChar(' ', bare_stacktrace);
-                DB::writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
+                writeChar(' ', bare_stacktrace);
+                writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
            }

            LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
@ -411,8 +414,48 @@ private:

        /// Send crash report to developers (if configured)
        if (sig != SanitizerTrap)
+        {
            SentryWriter::onFault(sig, error_message, stack_trace);

+            /// Advice the user to send it manually.
+            if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build"))
+            {
+                const auto & date_lut = DateLUT::instance();
+
+                /// Approximate support period, upper bound.
+                if (time(nullptr) - date_lut.makeDate(2000 + VERSION_MAJOR, VERSION_MINOR, 1) < (365 + 30) * 86400)
+                {
+                    LOG_FATAL(log, "Report this error to https://github.com/ClickHouse/ClickHouse/issues");
+                }
+                else
+                {
+                    LOG_FATAL(log, "ClickHouse version {} is old and should be upgraded to the latest version.", VERSION_STRING);
+                }
+            }
+            else
+            {
+                LOG_FATAL(log, "This ClickHouse version is not official and should be upgraded to the official build.");
+            }
+        }
+
+        /// ClickHouse Keeper does not link to some part of Settings.
+#ifndef CLICKHOUSE_PROGRAM_STANDALONE_BUILD
+        /// List changed settings.
+        if (!query_id.empty())
+        {
+            ContextPtr query_context = thread_ptr->getQueryContext();
+            if (query_context)
+            {
+                String changed_settings = query_context->getSettingsRef().toString();
+
+                if (changed_settings.empty())
+                    LOG_FATAL(log, "No settings were changed");
+                else
+                    LOG_FATAL(log, "Changed settings: {}", changed_settings);
+            }
+        }
+#endif
+
        /// When everything is done, we will try to send these error messages to client.
        if (thread_ptr)
            thread_ptr->onFatalError();
@ -436,15 +479,15 @@ static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
    /// Also need to send data via pipe. Otherwise it may lead to deadlocks or failures in printing diagnostic info.

    char buf[signal_pipe_buf_size];
-    DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);
+    WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf);

    const StackTrace stack_trace;

    int sig = SignalListener::SanitizerTrap;
-    DB::writeBinary(sig, out);
-    DB::writePODBinary(stack_trace, out);
-    DB::writeBinary(UInt32(getThreadId()), out);
-    DB::writePODBinary(DB::current_thread, out);
+    writeBinary(sig, out);
+    writePODBinary(stack_trace, out);
+    writeBinary(UInt32(getThreadId()), out);
+    writePODBinary(current_thread, out);

    out.next();

@ -470,7 +513,7 @@ static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
    std::string log_message;

    if (std::current_exception())
-        log_message = "Terminate called for uncaught exception:\n" + DB::getCurrentExceptionMessage(true);
+        log_message = "Terminate called for uncaught exception:\n" + getCurrentExceptionMessage(true);
    else
        log_message = "Terminate called without an active exception";

@ -482,11 +525,11 @@ static DISABLE_SANITIZER_INSTRUMENTATION void sanitizerDeathCallback()
        log_message.resize(buf_size - 16);

    char buf[buf_size];
-    DB::WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf);
+    WriteBufferFromFileDescriptor out(signal_pipe.fds_rw[1], buf_size, buf);

-    DB::writeBinary(static_cast<int>(SignalListener::StdTerminate), out);
-    DB::writeBinary(static_cast<UInt32>(getThreadId()), out);
-    DB::writeBinary(log_message, out);
+    writeBinary(static_cast<int>(SignalListener::StdTerminate), out);
+    writeBinary(static_cast<UInt32>(getThreadId()), out);
+    writeBinary(log_message, out);
    out.next();

    abort();
@ -512,7 +555,7 @@ static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path
    }
    catch (...)
    {
-        LOG_WARNING(logger, "{}: when creating {}, {}", __PRETTY_FUNCTION__, path, DB::getCurrentExceptionMessage(true));
+        LOG_WARNING(logger, "{}: when creating {}, {}", __PRETTY_FUNCTION__, path, getCurrentExceptionMessage(true));
    }
    return false;
 }
@ -527,7 +570,7 @@ void BaseDaemon::reloadConfiguration()
      * (It's convenient to log in console when you start server without any command line parameters.)
      */
    config_path = config().getString("config-file", getDefaultConfigFileName());
-    DB::ConfigProcessor config_processor(config_path, false, true);
+    ConfigProcessor config_processor(config_path, false, true);
    config_processor.setConfigPath(fs::path(config_path).parent_path());
    loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true);

@ -548,7 +591,7 @@ BaseDaemon::~BaseDaemon()
    /// Reset signals to SIG_DFL to avoid trying to write to the signal_pipe that will be closed after.
    for (int sig : handled_signals)
        if (SIG_ERR == signal(sig, SIG_DFL))
-            DB::throwFromErrno("Cannot set signal handler.", DB::ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
+            throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
    signal_pipe.close();
 }

@ -592,7 +635,7 @@ void BaseDaemon::closeFDs()
        /// Iterate directory separately from closing fds to avoid closing iterated directory fd.
        std::vector<int> fds;
        for (const auto & path : fs::directory_iterator(proc_path))
-            fds.push_back(DB::parse<int>(path.path().filename()));
+            fds.push_back(parse<int>(path.path().filename()));

        for (const auto & fd : fds)
        {
@ -662,7 +705,7 @@ void BaseDaemon::initialize(Application & self)
    }
    umask(umask_num);

-    DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, "");
+    ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, "");

    /// Write core dump on crash.
    {
@ -713,12 +756,12 @@ void BaseDaemon::initialize(Application & self)
        ///     {
        ///         try
        ///         {
-        ///             DB::SomeApp app;
+        ///             SomeApp app;
        ///             return app.run(argc, argv);
        ///         }
        ///         catch (...)
        ///         {
-        ///             std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
+        ///             std::cerr << getCurrentExceptionMessage(true) << "\n";
        ///             return 1;
        ///         }
        ///     }
@ -772,7 +815,7 @@ void BaseDaemon::initialize(Application & self)

    /// Create pid file.
    if (config().has("pid"))
-        pid_file.emplace(config().getString("pid"), DB::StatusFile::write_pid);
+        pid_file.emplace(config().getString("pid"), StatusFile::write_pid);

    if (is_daemon)
    {
@ -799,7 +842,7 @@ void BaseDaemon::initialize(Application & self)
    initializeTerminationAndSignalProcessing();
    logRevision();

-    for (const auto & key : DB::getMultipleKeysFromConfig(config(), "", "graphite"))
+    for (const auto & key : getMultipleKeysFromConfig(config(), "", "graphite"))
    {
        graphite_writers.emplace(key, std::make_unique<GraphiteWriter>(key));
    }
@ -887,7 +930,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
    signal_listener_thread.start(*signal_listener);

 #if defined(__ELF__) && !defined(OS_FREEBSD)
-    String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
+    String build_id_hex = SymbolIndex::instance()->getBuildIDHex();
    if (build_id_hex.empty())
        build_id = "";
    else
@ -902,7 +945,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
    std::string executable_path = getExecutablePath();

    if (!executable_path.empty())
-        stored_binary_hash = DB::Elf(executable_path).getStoredBinaryHash();
+        stored_binary_hash = Elf(executable_path).getStoredBinaryHash();
 #endif
 }

@ -963,7 +1006,7 @@ void BaseDaemon::handleSignal(int signal_id)
        onInterruptSignals(signal_id);
    }
    else
-        throw DB::Exception::createDeprecated(std::string("Unsupported signal: ") + strsignal(signal_id), 0); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
+        throw Exception::createDeprecated(std::string("Unsupported signal: ") + strsignal(signal_id), 0); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
 }

 void BaseDaemon::onInterruptSignals(int signal_id)
@ -1020,7 +1063,7 @@ void BaseDaemon::setupWatchdog()
        pid = fork();

        if (-1 == pid)
-            DB::throwFromErrno("Cannot fork", DB::ErrorCodes::SYSTEM_ERROR);
+            throwFromErrno("Cannot fork", ErrorCodes::SYSTEM_ERROR);

        if (0 == pid)
        {
@ -1073,13 +1116,13 @@ void BaseDaemon::setupWatchdog()
                pf = new OwnJSONPatternFormatter(config());
            else
                pf = new OwnPatternFormatter;
-            Poco::AutoPtr<DB::OwnFormattingChannel> log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel(std::cerr));
+            Poco::AutoPtr<OwnFormattingChannel> log = new OwnFormattingChannel(pf, new Poco::ConsoleChannel(std::cerr));
            logger().setChannel(log);
        }

        /// Cuncurrent writing logs to the same file from two threads is questionable on its own,
        ///  but rotating them from two threads is disastrous.
-        if (auto * channel = dynamic_cast<DB::OwnSplitChannel *>(logger().getChannel()))
+        if (auto * channel = dynamic_cast<OwnSplitChannel *>(logger().getChannel()))
        {
            channel->setChannelProperty("log", Poco::FileChannel::PROP_ROTATION, "never");
            channel->setChannelProperty("log", Poco::FileChannel::PROP_ROTATEONOPEN, "false");
@ -1191,7 +1234,7 @@ void systemdNotify(const std::string_view & command)
    int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);

    if (s == -1)
-        DB::throwFromErrno("Can't create UNIX socket for systemd notify.", DB::ErrorCodes::SYSTEM_ERROR);
+        throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR);

    SCOPE_EXIT({ close(s); });

@ -1202,7 +1245,7 @@ void systemdNotify(const std::string_view & command)
    addr.sun_family = AF_UNIX;

    if (len < 2 || len > sizeof(addr.sun_path) - 1)
-        throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);
+        throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path);

    memcpy(addr.sun_path, path, len + 1); /// write last zero as well.

@ -1214,7 +1257,7 @@ void systemdNotify(const std::string_view & command)
    else if (path[0] == '/')
        addrlen += 1; /// non-abstract-addresses should be zero terminated.
    else
-        throw DB::Exception(DB::ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);
+        throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path);

    const struct sockaddr *sock_addr = reinterpret_cast <const struct sockaddr *>(&addr);

@ -1227,7 +1270,7 @@ void systemdNotify(const std::string_view & command)
            if (errno == EINTR)
                continue;
            else
-                DB::throwFromErrno("Failed to notify systemd, sendto returned error.", DB::ErrorCodes::SYSTEM_ERROR);
+                throwFromErrno("Failed to notify systemd, sendto returned error.", ErrorCodes::SYSTEM_ERROR);
        }
        else
            sent_bytes_total += sent_bytes;
--- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp
+++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp
@ -300,49 +300,6 @@ namespace
        MutableColumnPtr additional_keys_map;
    };

-    template <typename T>
-    IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeysRef(PaddedPODArray<T> & index, size_t dict_size)
-    {
-        PaddedPODArray<T> copy(index.cbegin(), index.cend());
-
-        HashMap<T, T> dict_map;
-        HashMap<T, T> add_keys_map;
-
-        for (auto val : index)
-        {
-            if (val < dict_size)
-                dict_map.insert({val, dict_map.size()});
-            else
-                add_keys_map.insert({val, add_keys_map.size()});
-        }
-
-        auto dictionary_map = ColumnVector<T>::create(dict_map.size());
-        auto additional_keys_map = ColumnVector<T>::create(add_keys_map.size());
-        auto & dict_data = dictionary_map->getData();
-        auto & add_keys_data = additional_keys_map->getData();
-
-        for (auto val : dict_map)
-            dict_data[val.second] = val.first;
-
-        for (auto val : add_keys_map)
-            add_keys_data[val.second] = val.first - dict_size;
-
-        for (auto & val : index)
-            val = val < dict_size ? dict_map[val]
-                                  : add_keys_map[val] + dict_map.size();
-
-        for (size_t i = 0; i < index.size(); ++i)
-        {
-            T expected = index[i] < dict_data.size() ? dict_data[index[i]]
-                                                     : add_keys_data[index[i] - dict_data.size()] + dict_size;
-            if (expected != copy[i])
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {}, but got {}", toString(expected), toString(copy[i]));
-
-        }
-
-        return {std::move(dictionary_map), std::move(additional_keys_map)};
-    }
-
    template <typename T>
    IndexMapsWithAdditionalKeys mapIndexWithAdditionalKeys(PaddedPODArray<T> & index, size_t dict_size)
    {
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -36,6 +36,7 @@
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/queryToString.h>
 #include <Storages/StorageKeeperMap.h>
+#include <Storages/AlterCommands.h>

 namespace DB
 {
@ -252,7 +253,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
        treat_local_as_remote,
        treat_local_port_as_remote,
        cluster_auth_info.cluster_secure_connection,
-        /*priority=*/ 1,
+        Priority{1},
        TSA_SUPPRESS_WARNING_FOR_READ(database_name),     /// FIXME
        cluster_auth_info.cluster_secret};

@ -1441,9 +1442,49 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
        return table->as<StorageKeeperMap>() != nullptr;
    };

+    const auto is_replicated_table = [&](const ASTPtr & ast)
+    {
+        auto table_id = query_context->resolveStorageID(ast, Context::ResolveOrdinary);
+        StoragePtr table = DatabaseCatalog::instance().getTable(table_id, query_context);
+
+        return table->supportsReplication();
+    };
+
+    const auto has_many_shards = [&]()
+    {
+        /// If there is only 1 shard then there is no need to replicate some queries.
+        auto current_cluster = tryGetCluster();
+        return
+            !current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
+            current_cluster->getShardsInfo().size() > 1;
+    };
+
    /// Some ALTERs are not replicated on database level
    if (const auto * alter = query_ptr->as<const ASTAlterQuery>())
-        return !alter->isAttachAlter() && !alter->isFetchAlter() && !alter->isDropPartitionAlter() && !is_keeper_map_table(query_ptr);
+    {
+        if (alter->isAttachAlter() || alter->isFetchAlter() || alter->isDropPartitionAlter() || is_keeper_map_table(query_ptr))
+            return false;
+
+        if (has_many_shards() || !is_replicated_table(query_ptr))
+            return true;
+
+        try
+        {
+            /// Metadata alter should go through database
+            for (const auto & child : alter->command_list->children)
+                if (AlterCommand::parse(child->as<ASTAlterCommand>()))
+                    return true;
+
+            /// It's ALTER PARTITION or mutation, doesn't involve database
+            return false;
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+        }
+
+        return true;
+    }

    /// DROP DATABASE is not replicated
    if (const auto * drop = query_ptr->as<const ASTDropQuery>())
@ -1459,11 +1500,7 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context,
        if (is_keeper_map_table(query_ptr))
            return false;

-        /// If there is only 1 shard then there is no need to replicate DELETE query.
-        auto current_cluster = tryGetCluster();
-        return
-            !current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are.
-            current_cluster->getShardsInfo().size() > 1;
+        return has_many_shards() || !is_replicated_table(query_ptr);
    }

    return true;
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -71,6 +71,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.csv.try_detect_header = settings.input_format_csv_detect_header;
    format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines;
    format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
+    format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
    format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
    format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
    format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -139,6 +139,7 @@ struct FormatSettings
        bool try_detect_header = true;
        bool skip_trailing_empty_lines = false;
        bool trim_whitespaces = true;
+        bool allow_whitespace_or_tab_as_delimiter = false;
    } csv;

    struct HiveText
--- a/src/Functions/hasColumnInTable.cpp
+++ b/src/Functions/hasColumnInTable.cpp
@ -137,7 +137,7 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a
            treat_local_as_remote,
            treat_local_port_as_remote,
            /* secure= */ false,
-            /* priority= */ 1,
+            /* priority= */ Priority{1},
            /* cluster_name= */ "",
            /* password= */ ""
        };
--- a/src/IO/FileEncryptionCommon.cpp
+++ b/src/IO/FileEncryptionCommon.cpp
@ -8,10 +8,11 @@
 #include <Common/SipHash.h>
 #include <Common/safe_cast.h>

-#include <boost/algorithm/string/predicate.hpp>
-#include <cassert>
-#include <random>
+#    include <cassert>
+#    include <boost/algorithm/string/predicate.hpp>

+#    include <openssl/err.h>
+#    include <openssl/rand.h>

 namespace DB
 {
@ -20,6 +21,7 @@ namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
    extern const int DATA_ENCRYPTION_ERROR;
+    extern const int OPENSSL_ERROR;
 }

 namespace FileEncryption
@ -260,12 +262,11 @@ void InitVector::write(WriteBuffer & out) const

 InitVector InitVector::random()
 {
-    std::random_device rd;
-    std::mt19937 gen{rd()};
-    std::uniform_int_distribution<UInt128::base_type> dis;
    UInt128 counter;
-    for (auto & i : counter.items)
-        i = dis(gen);
+    auto * buf = reinterpret_cast<unsigned char *>(counter.items);
+    auto ret = RAND_bytes(buf, sizeof(counter.items));
+    if (ret != 1)
+        throw Exception(DB::ErrorCodes::OPENSSL_ERROR, "OpenSSL error code: {}", ERR_get_error());
    return InitVector{counter};
 }

--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@ -125,9 +125,10 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence)
            if (w->bytes_produced > diff)
            {
                working_buffer = internal_buffer = Buffer(
-                    w->segment.data() + diff, w->segment.data() + w->bytes_produced);
+                    w->segment.data(), w->segment.data() + w->bytes_produced);
+                pos = working_buffer.begin() + diff;
                w->bytes_consumed = w->bytes_produced;
-                current_position += w->start_offset + w->bytes_consumed;
+                current_position = w->start_offset + w->bytes_consumed;
                addReaders();
                return offset;
            }
--- a/src/IO/Resource/tests/gtest_resource_manager_static.cpp
+++ b/src/IO/Resource/tests/gtest_resource_manager_static.cpp
@ -44,8 +44,8 @@ TEST(IOResourceStaticResourceManager, Smoke)

 TEST(IOResourceStaticResourceManager, Prioritization)
 {
-    std::optional<Int64> last_priority;
-    auto check = [&] (Int64 priority)
+    std::optional<Priority> last_priority;
+    auto check = [&] (Priority priority)
    {
        // Lock is not required here because this is called during request execution and we have max_requests = 1
        if (last_priority)
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@ -108,7 +108,7 @@ Cluster::Address::Address(
    password = config.getString(config_prefix + ".password", "");
    default_database = config.getString(config_prefix + ".default_database", "");
    secure = ConfigHelper::getBool(config, config_prefix + ".secure", false, /* empty_as */true) ? Protocol::Secure::Enable : Protocol::Secure::Disable;
-    priority = config.getInt(config_prefix + ".priority", 1);
+    priority = Priority{config.getInt(config_prefix + ".priority", 1)};

    const char * port_type = secure == Protocol::Secure::Enable ? "tcp_port_secure" : "tcp_port";
    auto default_port = config.getInt(port_type, 0);
--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@ -4,6 +4,7 @@
 #include <Client/ConnectionPoolWithFailover.h>
 #include <Common/Macros.h>
 #include <Common/MultiVersion.h>
+#include <Common/Priority.h>

 #include <Poco/Net/SocketAddress.h>

@ -44,7 +45,7 @@ struct ClusterConnectionParameters
    bool treat_local_as_remote;
    bool treat_local_port_as_remote;
    bool secure = false;
-    Int64 priority = 1;
+    Priority priority{1};
    String cluster_name;
    String cluster_secret;
 };
@ -131,7 +132,7 @@ public:
        Protocol::Compression compression = Protocol::Compression::Enable;
        Protocol::Secure secure = Protocol::Secure::Disable;

-        Int64 priority = 1;
+        Priority priority{1};

        Address() = default;

--- a/src/Interpreters/ClusterDiscovery.cpp
+++ b/src/Interpreters/ClusterDiscovery.cpp
@ -246,7 +246,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info)
        /* treat_local_as_remote= */ false,
        /* treat_local_port_as_remote= */ false, /// should be set only for clickhouse-local, but cluster discovery is not used there
        /* secure= */ secure,
-        /* priority= */ 1,
+        /* priority= */ Priority{1},
        /* cluster_name= */ "",
        /* password= */ ""};
    auto cluster = std::make_shared<Cluster>(
--- a/src/Interpreters/sortBlock.cpp
+++ b/src/Interpreters/sortBlock.cpp
@ -5,6 +5,9 @@
 #include <Columns/ColumnTuple.h>
 #include <Functions/FunctionHelpers.h>

+#ifdef __SSE2__
+    #include <emmintrin.h>
+#endif

 namespace DB
 {
@ -190,6 +193,85 @@ void getBlockSortPermutationImpl(const Block & block, const SortDescription & de
    }
 }

+bool isIdentityPermutation(const IColumn::Permutation & permutation, size_t limit)
+{
+    static_assert(sizeof(permutation[0]) == sizeof(UInt64), "Invalid permutation value size");
+
+    size_t permutation_size = permutation.size();
+    size_t size = limit == 0 ? permutation_size : std::min(limit, permutation_size);
+    if (size == 0)
+        return true;
+
+    if (permutation[0] != 0)
+        return false;
+
+    size_t i = 0;
+
+#if defined(__SSE2__)
+    if (size >= 8)
+    {
+        static constexpr UInt64 compare_all_elements_equal_mask = (1UL << 16) - 1;
+
+        __m128i permutation_add_vector = { 8, 8 };
+        __m128i permutation_compare_values_vectors[4] { { 0, 1 }, { 2, 3 }, { 4, 5 }, { 6, 7 } };
+
+        const size_t * permutation_data = permutation.data();
+
+        static constexpr size_t unroll_count = 8;
+        size_t size_unrolled = (size / unroll_count) * unroll_count;
+
+        for (; i < size_unrolled; i += 8)
+        {
+            UInt64 permutation_equals_vector_mask = compare_all_elements_equal_mask;
+
+            for (size_t j = 0; j < 4; ++j)
+            {
+                __m128i permutation_data_vector = _mm_loadu_si128(reinterpret_cast<const __m128i *>(permutation_data + i + j * 2));
+                __m128i permutation_equals_vector = _mm_cmpeq_epi8(permutation_data_vector, permutation_compare_values_vectors[j]);
+                permutation_compare_values_vectors[j] = _mm_add_epi64(permutation_compare_values_vectors[j], permutation_add_vector);
+                permutation_equals_vector_mask &= _mm_movemask_epi8(permutation_equals_vector);
+            }
+
+            if (permutation_equals_vector_mask != compare_all_elements_equal_mask)
+                return false;
+        }
+    }
+#endif
+
+    i = std::max(i, static_cast<size_t>(1));
+    for (; i < size; ++i)
+        if (permutation[i] != (permutation[i - 1] + 1))
+            return false;
+
+    return true;
+}
+
+template <typename Comparator>
+bool isAlreadySortedImpl(size_t rows, Comparator compare)
+{
+    /** If the rows are not too few, then let's make a quick attempt to verify that the block is not sorted.
+     * Constants - at random.
+     */
+    static constexpr size_t num_rows_to_try = 10;
+    if (rows > num_rows_to_try * 5)
+    {
+        for (size_t i = 1; i < num_rows_to_try; ++i)
+        {
+            size_t prev_position = rows * (i - 1) / num_rows_to_try;
+            size_t curr_position = rows * i / num_rows_to_try;
+
+            if (compare(curr_position, prev_position))
+                return false;
+        }
+    }
+
+    for (size_t i = 1; i < rows; ++i)
+        if (compare(i, i - 1))
+            return false;
+
+    return true;
+}
+
 }

 void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
@ -200,30 +282,18 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit)
    if (permutation.empty())
        return;

-    size_t columns = block.columns();
-    for (size_t i = 0; i < columns; ++i)
-    {
-        auto & column_to_sort = block.getByPosition(i).column;
-        column_to_sort = column_to_sort->permute(permutation, limit);
-    }
-}
-
-void stableSortBlock(Block & block, const SortDescription & description)
-{
-    if (!block)
-        return;
-
-    IColumn::Permutation permutation;
-    getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, permutation);
-
-    if (permutation.empty())
+    bool is_identity_permutation = isIdentityPermutation(permutation, limit);
+    if (is_identity_permutation && limit == 0)
        return;

    size_t columns = block.columns();
    for (size_t i = 0; i < columns; ++i)
    {
        auto & column_to_sort = block.getByPosition(i).column;
-        column_to_sort = column_to_sort->permute(permutation, 0);
+        if (is_identity_permutation)
+            column_to_sort = column_to_sort->cut(0, std::min(static_cast<size_t>(limit), permutation.size()));
+        else
+            column_to_sort = column_to_sort->permute(permutation, limit);
    }
 }

@ -240,33 +310,28 @@ bool isAlreadySorted(const Block & block, const SortDescription & description)
    if (!block)
        return true;

-    size_t rows = block.rows();
-
    ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description);
+    bool is_collation_required = false;

-    PartialSortingLess less(columns_with_sort_desc);
-
-    /** If the rows are not too few, then let's make a quick attempt to verify that the block is not sorted.
-     * Constants - at random.
-     */
-    static constexpr size_t num_rows_to_try = 10;
-    if (rows > num_rows_to_try * 5)
+    for (auto & column_with_sort_desc : columns_with_sort_desc)
    {
-        for (size_t i = 1; i < num_rows_to_try; ++i)
+        if (isCollationRequired(column_with_sort_desc.description))
        {
-            size_t prev_position = rows * (i - 1) / num_rows_to_try;
-            size_t curr_position = rows * i / num_rows_to_try;
-
-            if (less(curr_position, prev_position))
-                return false;
+            is_collation_required = true;
+            break;
        }
    }

-    for (size_t i = 1; i < rows; ++i)
-        if (less(i, i - 1))
-            return false;
+    size_t rows = block.rows();

-    return true;
+    if (is_collation_required)
+    {
+        PartialSortingLessWithCollation less(columns_with_sort_desc);
+        return isAlreadySortedImpl(rows, less);
+    }
+
+    PartialSortingLess less(columns_with_sort_desc);
+    return isAlreadySortedImpl(rows, less);
 }

 }
--- a/src/Interpreters/sortBlock.h
+++ b/src/Interpreters/sortBlock.h
@ -10,20 +10,15 @@ namespace DB
 /// Sort one block by `description`. If limit != 0, then the partial sort of the first `limit` rows is produced.
 void sortBlock(Block & block, const SortDescription & description, UInt64 limit = 0);

-/** Used only in StorageMergeTree to sort the data with INSERT.
+/** Same as sortBlock, but do not sort the block, but only calculate the permutation of the values,
+  *  so that you can rearrange the column values yourself.
  * Sorting is stable. This is important for keeping the order of rows in the CollapsingMergeTree engine
  *  - because based on the order of rows it is determined whether to delete or leave groups of rows when collapsing.
-  * Collations are not supported. Partial sorting is not supported.
-  */
-void stableSortBlock(Block & block, const SortDescription & description);
-
-/** Same as stableSortBlock, but do not sort the block, but only calculate the permutation of the values,
-  *  so that you can rearrange the column values yourself.
+  * Used only in StorageMergeTree to sort the data with INSERT.
  */
 void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation);

 /** Quickly check whether the block is already sorted. If the block is not sorted - returns false as fast as possible.
-  * Collations are not supported.
  */
 bool isAlreadySorted(const Block & block, const SortDescription & description);

--- a/src/Loggers/OwnPatternFormatter.cpp
+++ b/src/Loggers/OwnPatternFormatter.cpp
@ -4,7 +4,6 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/WriteHelpers.h>
 #include <Common/HashTable/Hash.h>
-#include <Interpreters/InternalTextLogsQueue.h>
 #include <base/terminalColors.h>


--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@ -25,8 +25,12 @@ namespace ErrorCodes

 namespace
 {
-    void checkBadDelimiter(char delimiter)
+    void checkBadDelimiter(char delimiter, bool allow_whitespace_or_tab_as_delimiter)
    {
+        if ((delimiter == ' ' || delimiter == '\t') && allow_whitespace_or_tab_as_delimiter)
+        {
+            return;
+        }
        constexpr std::string_view bad_delimiters = " \t\"'.UL";
        if (bad_delimiters.find(delimiter) != std::string_view::npos)
            throw Exception(
@ -68,7 +72,7 @@ CSVRowInputFormat::CSVRowInputFormat(
        format_settings_.csv.try_detect_header),
    buf(std::move(in_))
 {
-    checkBadDelimiter(format_settings_.csv.delimiter);
+    checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.allow_whitespace_or_tab_as_delimiter);
 }

 CSVRowInputFormat::CSVRowInputFormat(
@ -90,7 +94,7 @@ CSVRowInputFormat::CSVRowInputFormat(
        format_settings_.csv.try_detect_header),
    buf(std::move(in_))
 {
-    checkBadDelimiter(format_settings_.csv.delimiter);
+    checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.allow_whitespace_or_tab_as_delimiter);
 }

 void CSVRowInputFormat::syncAfterError()
@ -134,8 +138,12 @@ static void skipEndOfLine(ReadBuffer & in)
 }

 /// Skip `whitespace` symbols allowed in CSV.
-static inline void skipWhitespacesAndTabs(ReadBuffer & in)
+static inline void skipWhitespacesAndTabs(ReadBuffer & in, const bool & allow_whitespace_or_tab_as_delimiter)
 {
+    if (allow_whitespace_or_tab_as_delimiter)
+    {
+        return;
+    }
    while (!in.eof() && (*in.position() == ' ' || *in.position() == '\t'))
        ++in.position();
 }
@ -146,7 +154,7 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings

 void CSVFormatReader::skipFieldDelimiter()
 {
-    skipWhitespacesAndTabs(*buf);
+    skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
    assertChar(format_settings.csv.delimiter, *buf);
 }

@ -154,7 +162,7 @@ template <bool read_string>
 String CSVFormatReader::readCSVFieldIntoString()
 {
    if (format_settings.csv.trim_whitespaces) [[likely]]
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);

    String field;
    if constexpr (read_string)
@ -166,14 +174,14 @@ String CSVFormatReader::readCSVFieldIntoString()

 void CSVFormatReader::skipField()
 {
-    skipWhitespacesAndTabs(*buf);
+    skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
    NullOutput out;
    readCSVStringInto(out, *buf, format_settings.csv);
 }

 void CSVFormatReader::skipRowEndDelimiter()
 {
-    skipWhitespacesAndTabs(*buf);
+    skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);

    if (buf->eof())
        return;
@ -182,7 +190,7 @@ void CSVFormatReader::skipRowEndDelimiter()
    if (*buf->position() == format_settings.csv.delimiter)
        ++buf->position();

-    skipWhitespacesAndTabs(*buf);
+    skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
    if (buf->eof())
        return;

@ -194,7 +202,7 @@ void CSVFormatReader::skipHeaderRow()
    do
    {
        skipField();
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
    } while (checkChar(format_settings.csv.delimiter, *buf));

    skipRowEndDelimiter();
@ -207,7 +215,7 @@ std::vector<String> CSVFormatReader::readRowImpl()
    do
    {
        fields.push_back(readCSVFieldIntoString<is_header>());
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
    } while (checkChar(format_settings.csv.delimiter, *buf));

    skipRowEndDelimiter();
@ -220,7 +228,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)

    try
    {
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
        assertChar(delimiter, *buf);
    }
    catch (const DB::Exception &)
@ -246,7 +254,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)

 bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
 {
-    skipWhitespacesAndTabs(*buf);
+    skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);

    if (buf->eof())
        return true;
@ -255,7 +263,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
    if (*buf->position() == format_settings.csv.delimiter)
    {
        ++buf->position();
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
        if (buf->eof())
            return true;
    }
@ -283,7 +291,7 @@ bool CSVFormatReader::readField(
    const String & /*column_name*/)
 {
    if (format_settings.csv.trim_whitespaces || !isStringOrFixedString(removeNullable(type))) [[likely]]
-        skipWhitespacesAndTabs(*buf);
+        skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);

    const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter;
    const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r');
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@ -66,7 +66,7 @@ namespace
    /* Recursive directory listing with matched paths as a result.
     * Have the same method in StorageFile.
     */
-    Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match, std::unordered_map<String, time_t> * last_mod_times)
+    std::vector<StorageHDFS::PathWithInfo> LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, const String & for_match)
    {
        const size_t first_glob = for_match.find_first_of("*?{");

@ -88,7 +88,7 @@ namespace
            throw Exception(
                ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}", prefix_without_globs, String(hdfsGetLastError()));
        }
-        Strings result;
+        std::vector<StorageHDFS::PathWithInfo> result;
        if (!ls.file_info && ls.length > 0)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
        for (int i = 0; i < ls.length; ++i)
@ -102,17 +102,15 @@ namespace
            if (!is_directory && !looking_for_directory)
            {
                if (re2::RE2::FullMatch(file_name, matcher))
-                {
-                    result.push_back(String(ls.file_info[i].mName));
-                    if (last_mod_times)
-                        (*last_mod_times)[result.back()] = ls.file_info[i].mLastMod;
-                }
+                    result.emplace_back(
+                        String(ls.file_info[i].mName),
+                        StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast<size_t>(ls.file_info[i].mSize)});
            }
            else if (is_directory && looking_for_directory)
            {
                if (re2::RE2::FullMatch(file_name, matcher))
                {
-                    Strings result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash), last_mod_times);
+                    std::vector<StorageHDFS::PathWithInfo> result_part = LSWithRegexpMatching(fs::path(full_path) / "", fs, suffix_with_globs.substr(next_slash));
                    /// Recursion depth is limited by pattern. '*' works only for depth = 1, for depth = 2 pattern path is '*/*'. So we do not need additional check.
                    std::move(result_part.begin(), result_part.end(), std::back_inserter(result));
                }
@ -135,12 +133,12 @@ namespace
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set");
    }

-    std::vector<String> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map<String, time_t> * last_mod_times = nullptr)
+    std::vector<StorageHDFS::PathWithInfo> getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context)
    {
        HDFSBuilderWrapper builder = createHDFSBuilder(uri_without_path + "/", context->getGlobalContext()->getConfigRef());
        HDFSFSPtr fs = createHDFSFS(builder.get());

-        return LSWithRegexpMatching("/", fs, path_from_uri, last_mod_times);
+        return LSWithRegexpMatching("/", fs, path_from_uri);
    }
 }

@ -199,9 +197,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(
    ContextPtr ctx)
 {
    const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
-    std::unordered_map<String, time_t> last_mod_time;
-    auto paths = getPathsList(path_from_uri, uri, ctx, &last_mod_time);
-    if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
+    auto paths_with_info = getPathsList(path_from_uri, uri, ctx);
+    if (paths_with_info.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format))
        throw Exception(
            ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
            "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path."
@ -209,26 +206,47 @@ ColumnsDescription StorageHDFS::getTableStructureFromData(

    std::optional<ColumnsDescription> columns_from_cache;
    if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs)
-        columns_from_cache = tryGetColumnsFromCache(paths, path_from_uri, last_mod_time, format, ctx);
+        columns_from_cache = tryGetColumnsFromCache(paths_with_info, path_from_uri, format, ctx);

-    ReadBufferIterator read_buffer_iterator = [&, my_uri_without_path = uri_without_path, it = paths.begin()](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
+    ReadBufferIterator read_buffer_iterator
+        = [&, my_uri_without_path = uri_without_path, it = paths_with_info.begin(), first = true](
+              ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
    {
-        if (it == paths.end())
-            return nullptr;
-        auto compression = chooseCompressionMethod(*it, compression_method);
-        auto impl = std::make_unique<ReadBufferFromHDFS>(my_uri_without_path, *it++, ctx->getGlobalContext()->getConfigRef(), ctx->getReadSettings());
-        const Int64 zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max;
-        return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+        PathWithInfo path_with_info;
+        while (true)
+        {
+            if (it == paths_with_info.end())
+            {
+                if (first)
+                    throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                                    "Cannot extract table structure from {} format file, because all files are empty. "
+                                    "You must specify table structure manually", format);
+                return nullptr;
+            }
+
+            path_with_info = *it++;
+            if (ctx->getSettingsRef().hdfs_skip_empty_files && path_with_info.info && path_with_info.info->size == 0)
+                continue;
+
+            auto compression = chooseCompressionMethod(path_with_info.path, compression_method);
+            auto impl = std::make_unique<ReadBufferFromHDFS>(my_uri_without_path, path_with_info.path, ctx->getGlobalContext()->getConfigRef(), ctx->getReadSettings());
+            if (!ctx->getSettingsRef().hdfs_skip_empty_files || !impl->eof())
+            {
+                const Int64 zstd_window_log_max = ctx->getSettingsRef().zstd_window_log_max;
+                first = false;
+                return wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+            }
+        }
    };

    ColumnsDescription columns;
    if (columns_from_cache)
        columns = *columns_from_cache;
    else
-        columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths.size() > 1, ctx);
+        columns = readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, paths_with_info.size() > 1, ctx);

    if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs)
-        addColumnsToCache(paths, path_from_uri, columns, format, ctx);
+        addColumnsToCache(paths_with_info, path_from_uri, columns, format, ctx);

    return columns;
 }
@ -241,11 +259,11 @@ public:
        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri);
        uris = getPathsList(path_from_uri, uri_without_path, context_);
        for (auto & elem : uris)
-            elem = uri_without_path + elem;
+            elem.path = uri_without_path + elem.path;
        uris_iter = uris.begin();
    }

-    String next()
+    StorageHDFS::PathWithInfo next()
    {
        std::lock_guard lock(mutex);
        if (uris_iter != uris.end())
@ -258,8 +276,8 @@ public:
    }
 private:
    std::mutex mutex;
-    Strings uris;
-    Strings::iterator uris_iter;
+    std::vector<StorageHDFS::PathWithInfo> uris;
+    std::vector<StorageHDFS::PathWithInfo>::iterator uris_iter;
 };

 class HDFSSource::URISIterator::Impl
@ -279,14 +297,14 @@ public:
        uris_iter = uris.begin();
    }

-    String next()
+    StorageHDFS::PathWithInfo next()
    {
        std::lock_guard lock(mutex);
        if (uris_iter == uris.end())
-            return "";
+            return {"", {}};
        auto key = *uris_iter;
        ++uris_iter;
-        return key;
+        return {key, {}};
    }

 private:
@ -298,7 +316,7 @@ private:
 HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri)
    : pimpl(std::make_shared<HDFSSource::DisclosedGlobIterator::Impl>(context_, uri)) {}

-String HDFSSource::DisclosedGlobIterator::next()
+StorageHDFS::PathWithInfo HDFSSource::DisclosedGlobIterator::next()
 {
    return pimpl->next();
 }
@ -308,7 +326,7 @@ HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, Contex
 {
 }

-String HDFSSource::URISIterator::next()
+StorageHDFS::PathWithInfo HDFSSource::URISIterator::next()
 {
    return pimpl->next();
 }
@ -343,17 +361,29 @@ HDFSSource::HDFSSource(

 bool HDFSSource::initialize()
 {
-    current_path = (*file_iterator)();
-    if (current_path.empty())
-        return false;
+    bool skip_empty_files = getContext()->getSettingsRef().hdfs_skip_empty_files;
+    while (true)
+    {
+        auto path_with_info = (*file_iterator)();
+        if (path_with_info.path.empty())
+            return false;

-    const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
+        if (path_with_info.info && skip_empty_files && path_with_info.info->size == 0)
+            continue;

-    auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
-    auto impl = std::make_unique<ReadBufferFromHDFS>(
-        uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
-    const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
-    read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+        current_path = path_with_info.path;
+        const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path);
+
+        auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method);
+        auto impl = std::make_unique<ReadBufferFromHDFS>(
+            uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings());
+        if (!skip_empty_files || !impl->eof())
+        {
+            const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max;
+            read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast<int>(zstd_window_log_max));
+            break;
+        }
+    }

    auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size);

@ -554,8 +584,8 @@ Pipe StorageHDFS::read(
    if (distributed_processing)
    {
        iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>(
-            [callback = context_->getReadTaskCallback()]() -> String {
-                return callback();
+            [callback = context_->getReadTaskCallback()]() -> StorageHDFS::PathWithInfo {
+                return StorageHDFS::PathWithInfo{callback(), std::nullopt};
        });
    }
    else if (is_path_with_globs)
@ -762,24 +792,22 @@ SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
 }

 std::optional<ColumnsDescription> StorageHDFS::tryGetColumnsFromCache(
-    const Strings & paths,
+    const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
    const String & uri_without_path,
-    std::unordered_map<String, time_t> & last_mod_time,
    const String & format_name,
    const ContextPtr & ctx)
 {
    auto & schema_cache = getSchemaCache(ctx);
-    for (const auto & path : paths)
+    for (const auto & path_with_info : paths_with_info)
    {
        auto get_last_mod_time = [&]() -> std::optional<time_t>
        {
-            auto it = last_mod_time.find(path);
-            if (it == last_mod_time.end())
-                return std::nullopt;
-            return it->second;
+            if (path_with_info.info)
+                return path_with_info.info->last_mod_time;
+            return std::nullopt;
        };

-        String url = fs::path(uri_without_path) / path;
+        String url = fs::path(uri_without_path) / path_with_info.path;
        auto cache_key = getKeyForSchemaCache(url, format_name, {}, ctx);
        auto columns = schema_cache.tryGet(cache_key, get_last_mod_time);
        if (columns)
@ -790,7 +818,7 @@ std::optional<ColumnsDescription> StorageHDFS::tryGetColumnsFromCache(
 }

 void StorageHDFS::addColumnsToCache(
-    const Strings & paths,
+    const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
    const String & uri_without_path,
    const ColumnsDescription & columns,
    const String & format_name,
@ -798,8 +826,8 @@ void StorageHDFS::addColumnsToCache(
 {
    auto & schema_cache = getSchemaCache(ctx);
    Strings sources;
-    sources.reserve(paths.size());
-    std::transform(paths.begin(), paths.end(), std::back_inserter(sources), [&](const String & path){ return fs::path(uri_without_path) / path; });
+    sources.reserve(paths_with_info.size());
+    std::transform(paths_with_info.begin(), paths_with_info.end(), std::back_inserter(sources), [&](const PathWithInfo & path_with_info){ return fs::path(uri_without_path) / path_with_info.path; });
    auto cache_keys = getKeysForSchemaCache(sources, format_name, {}, ctx);
    schema_cache.addMany(cache_keys, columns);
 }
--- a/src/Storages/HDFS/StorageHDFS.h
+++ b/src/Storages/HDFS/StorageHDFS.h
@ -18,6 +18,18 @@ namespace DB
 class StorageHDFS final : public IStorage, WithContext
 {
 public:
+    struct PathInfo
+    {
+        time_t last_mod_time;
+        size_t size;
+    };
+
+    struct PathWithInfo
+    {
+        String path;
+        std::optional<PathInfo> info;
+    };
+
    StorageHDFS(
        const String & uri_,
        const StorageID & table_id_,
@ -72,14 +84,13 @@ protected:

 private:
    static std::optional<ColumnsDescription> tryGetColumnsFromCache(
-        const Strings & paths,
+        const std::vector<StorageHDFS::PathWithInfo> & paths_with_info,
        const String & uri_without_path,
-        std::unordered_map<String, time_t> & last_mod_time,
        const String & format_name,
        const ContextPtr & ctx);

    static void addColumnsToCache(
-        const Strings & paths,
+        const std::vector<StorageHDFS::PathWithInfo> & paths,
        const String & uri_without_path,
        const ColumnsDescription & columns,
        const String & format_name,
@ -105,7 +116,7 @@ public:
    {
        public:
            DisclosedGlobIterator(ContextPtr context_, const String & uri_);
-            String next();
+            StorageHDFS::PathWithInfo next();
        private:
            class Impl;
            /// shared_ptr to have copy constructor
@ -116,14 +127,14 @@ public:
    {
        public:
            URISIterator(const std::vector<String> & uris_, ContextPtr context);
-            String next();
+            StorageHDFS::PathWithInfo next();
        private:
            class Impl;
            /// shared_ptr to have copy constructor
            std::shared_ptr<Impl> pimpl;
    };

-    using IteratorWrapper = std::function<String()>;
+    using IteratorWrapper = std::function<StorageHDFS::PathWithInfo()>;
    using StorageHDFSPtr = std::shared_ptr<StorageHDFS>;

    static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);
--- a/src/Storages/HDFS/StorageHDFSCluster.cpp
+++ b/src/Storages/HDFS/StorageHDFSCluster.cpp
@ -79,7 +79,7 @@ void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String
 RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const
 {
    auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(context, uri);
-    auto callback = std::make_shared<HDFSSource::IteratorWrapper>([iter = std::move(iterator)]() mutable -> String { return iter->next(); });
+    auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
    return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
 }

--- a/src/Storages/MergeTree/IDataPartStorage.h
+++ b/src/Storages/MergeTree/IDataPartStorage.h
@ -73,7 +73,7 @@ struct WriteSettings;
 class TemporaryFileOnDisk;

 /// This is an abstraction of storage for data part files.
-/// Ideally, it is assumed to contains read-only methods from IDisk.
+/// Ideally, it is assumed to contain read-only methods from IDisk.
 /// It is not fulfilled now, but let's try our best.
 class IDataPartStorage : public boost::noncopyable
 {
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -492,13 +492,17 @@ void IMergeTreeDataPart::removeIfNeeded()

        if (is_temp)
        {
-            String file_name = fileName(getDataPartStorage().getPartDirectory());
+            const auto & part_directory = getDataPartStorage().getPartDirectory();
+
+            String file_name = fileName(part_directory);

            if (file_name.empty())
                throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set",
                                getDataPartStorage().getPartDirectory(), name);

-            if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj"))
+            const auto part_parent_directory = directoryPath(part_directory);
+            bool is_moving_part = part_parent_directory.ends_with("moving/");
+            if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj") && !is_moving_part)
            {
                LOG_ERROR(
                    storage.log,
@ -507,6 +511,11 @@ void IMergeTreeDataPart::removeIfNeeded()
                    path);
                return;
            }
+
+            if (is_moving_part)
+            {
+                LOG_TRACE(storage.log, "Removing unneeded moved part from {}", path);
+            }
        }

        remove();
--- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h
@ -21,7 +21,7 @@ using SerializationPtr = std::shared_ptr<const ISerialization>;
 * in order to use MergeTreeDataPartReader's.
 * It is a separate interface and not a simple struct because
 * otherwise it will need to copy all the information which might not
- * be even used (for example, an IndexGranulary class object is quite heavy).
+ * be even used (for example, an IndexGranularity class object is quite heavy).
 */
 class IMergeTreeDataPartInfoForReader : public WithContext
 {
--- a/src/Storages/MergeTree/IMergeTreeReader.cpp
+++ b/src/Storages/MergeTree/IMergeTreeReader.cpp
@ -79,7 +79,11 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e
    catch (Exception & e)
    {
        /// Better diagnostics.
-        e.addMessage("(while reading from part " + data_part_info_for_read->getDataPartStorage()->getFullPath() + ")");
+        const auto & part_storage = data_part_info_for_read->getDataPartStorage();
+        e.addMessage(
+            "(while reading from part " + part_storage->getFullPath()
+            + " located on disk " + part_storage->getDiskName()
+            + " of type " + part_storage->getDiskType() + ")");
        throw;
    }
 }
@ -124,7 +128,11 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns
    catch (Exception & e)
    {
        /// Better diagnostics.
-        e.addMessage("(while reading from part " + data_part_info_for_read->getDataPartStorage()->getFullPath() + ")");
+        const auto & part_storage = data_part_info_for_read->getDataPartStorage();
+        e.addMessage(
+            "(while reading from part " + part_storage->getFullPath()
+            + " located on disk " + part_storage->getDiskName()
+            + " of type " + part_storage->getDiskType() + ")");
        throw;
    }
 }
@ -199,7 +207,11 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const
    catch (Exception & e)
    {
        /// Better diagnostics.
-        e.addMessage("(while reading from part " + data_part_info_for_read->getDataPartStorage()->getFullPath() + ")");
+        const auto & part_storage = data_part_info_for_read->getDataPartStorage();
+        e.addMessage(
+            "(while reading from part " + part_storage->getFullPath()
+            + " located on disk " + part_storage->getDiskName()
+            + " of type " + part_storage->getDiskType() + ")");
        throw;
    }
 }
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -19,6 +19,7 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/ThreadFuzzer.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
+#include <Common/Config/ConfigHelper.h>
 #include <Compression/CompressedReadBuffer.h>
 #include <Core/QueryProcessingStage.h>
 #include <DataTypes/DataTypeEnum.h>
@ -1432,6 +1433,21 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries(
    UNREACHABLE();
 }

+/// Wait for all tasks to finish and rethrow the first exception if any.
+/// The tasks access local variables of the caller function, so we can't just rethrow the first exception until all other tasks are finished.
+void waitForAllToFinishAndRethrowFirstError(std::vector<std::future<void>> & futures)
+{
+    /// First wait for all tasks to finish.
+    for (auto & future : futures)
+        future.wait();
+
+    /// Now rethrow the first exception if any.
+    for (auto & future : futures)
+        future.get();
+
+    futures.clear();
+}
+
 std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(PartLoadingTreeNodes & parts_to_load)
 {
    const size_t num_parts = parts_to_load.size();
@ -1462,10 +1478,8 @@ std::vector<MergeTreeData::LoadPartResult> MergeTreeData::loadDataPartsFromDisk(
            if (are_parts_to_load_empty)
            {
                /// Wait for all scheduled tasks.
-                /// We have to use .get() method to rethrow any exception that could occur.
-                for (auto & future: parts_futures)
-                    future.get();
-                parts_futures.clear();
+                waitForAllToFinishAndRethrowFirstError(parts_futures);
+
                /// At this point it is possible, that some other parts appeared in the queue for processing (parts_to_load),
                /// because we added them from inside the pool.
                /// So we need to recheck it.
@ -1659,10 +1673,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
    }

    /// For iteration to be completed
-    /// Any exception will be re-thrown.
-    for (auto & future : disks_futures)
-        future.get();
-    disks_futures.clear();
+    waitForAllToFinishAndRethrowFirstError(disks_futures);

    PartLoadingTree::PartLoadingInfos parts_to_load;
    for (auto & disk_parts : parts_to_load_by_disk)
@ -1772,10 +1783,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
        }

        /// For for iteration to be completed
-        /// Any exception will be re-thrown.
-        for (auto & future : wal_disks_futures)
-            future.get();
-        wal_disks_futures.clear();
+        waitForAllToFinishAndRethrowFirstError(wal_disks_futures);

        MutableDataPartsVector parts_from_wal;
        for (auto & disk_wal_parts : disks_wal_parts)
@ -1890,9 +1898,7 @@ try
            {
                /// Wait for every scheduled task
                /// In case of any exception it will be re-thrown and server will be terminated.
-                for (auto & future : parts_futures)
-                    future.get();
-                parts_futures.clear();
+                waitForAllToFinishAndRethrowFirstError(parts_futures);

                LOG_DEBUG(log,
                    "Stopped loading outdated data parts because task was canceled. "
@ -1916,7 +1922,10 @@ try

            ++num_loaded_parts;
            if (res.is_broken)
+            {
+                forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(res.part->name);
                res.part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes
+            }
            else if (res.part->is_duplicate)
                res.part->remove();
            else
@ -2011,6 +2020,21 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa


 size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes)
+{
+    size_t cleared_count = 0;
+
+    cleared_count += clearOldTemporaryDirectories(relative_data_path, custom_directories_lifetime_seconds, valid_prefixes);
+
+    if (allowRemoveStaleMovingParts())
+    {
+        /// Clear _all_ parts from the `moving` directory
+        cleared_count += clearOldTemporaryDirectories(fs::path(relative_data_path) / "moving", custom_directories_lifetime_seconds, {""});
+    }
+
+    return cleared_count;
+}
+
+size_t MergeTreeData::clearOldTemporaryDirectories(const String & root_path, size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes)
 {
    /// If the method is already called from another thread, then we don't need to do anything.
    std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock);
@ -2029,7 +2053,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif
        if (disk->isBroken())
            continue;

-        for (auto it = disk->iterateDirectory(relative_data_path); it->isValid(); it->next())
+        for (auto it = disk->iterateDirectory(root_path); it->isValid(); it->next())
        {
            const std::string & basename = it->name();
            bool start_with_valid_prefix = false;
@ -2424,10 +2448,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
            }, Priority{0}));
        }

-        /// Any exception will be re-thrown.
-        for (auto & future : parts_to_remove_futures)
-            future.get();
-        parts_to_remove_futures.clear();
+        waitForAllToFinishAndRethrowFirstError(parts_to_remove_futures);

        return;
    }
@ -2583,10 +2604,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t

    independent_ranges = split_into_independent_ranges(excluded_parts, /* split_times */ 0);

-    /// Any exception will be re-thrown.
-    for (auto & future : part_removal_futures)
-        future.get();
-    part_removal_futures.clear();
+    waitForAllToFinishAndRethrowFirstError(part_removal_futures);

    for (size_t i = 0; i < independent_ranges.infos.size(); ++i)
    {
@ -2595,10 +2613,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
        schedule_parts_removal(range, std::move(parts_in_range));
    }

-    /// Any exception will be re-thrown.
-    for (auto & future : part_removal_futures)
-        future.get();
-    part_removal_futures.clear();
+    waitForAllToFinishAndRethrowFirstError(part_removal_futures);

    if (parts_to_remove.size() != sum_of_ranges + excluded_parts.size())
        throw Exception(ErrorCodes::LOGICAL_ERROR,
@ -4326,14 +4341,14 @@ std::optional<Int64> MergeTreeData::getMinPartDataVersion() const
 }


-void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const
+void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context, bool allow_throw) const
 {
    const auto settings = getSettings();
    const auto & query_settings = query_context->getSettingsRef();
    const size_t parts_count_in_total = getActivePartsCount();

-    /// check if have too many parts in total
-    if (parts_count_in_total >= settings->max_parts_in_total)
+    /// Check if we have too many parts in total
+    if (allow_throw && parts_count_in_total >= settings->max_parts_in_total)
    {
        ProfileEvents::increment(ProfileEvents::RejectedInserts);
        throw Exception(
@ -4349,7 +4364,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
        if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
            outdated_parts_count_in_partition = getMaxOutdatedPartsCountForPartition();

-        if (settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
+        if (allow_throw && settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
        {
            ProfileEvents::increment(ProfileEvents::RejectedInserts);
            throw Exception(
@ -4373,7 +4388,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
        bool parts_are_large_enough_in_average
            = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts;

-        if (parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average)
+        if (allow_throw && parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average)
        {
            ProfileEvents::increment(ProfileEvents::RejectedInserts);
            throw Exception(
@ -4411,18 +4426,17 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
                allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
        }

-        if (allowed_parts_over_threshold == 0 || parts_over_threshold > allowed_parts_over_threshold) [[unlikely]]
-            throw Exception(
-                ErrorCodes::LOGICAL_ERROR,
-                "Incorrect calculation of {} parts over threshold: allowed_parts_over_threshold={}, parts_over_threshold={}",
-                (use_active_parts_threshold ? "active" : "inactive"),
-                allowed_parts_over_threshold,
-                parts_over_threshold);
-
        const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000);
-        double delay_factor = static_cast<double>(parts_over_threshold) / allowed_parts_over_threshold;
-        const UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms;
-        delay_milliseconds = std::max(min_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor));
+        if (allowed_parts_over_threshold == 0 || parts_over_threshold > allowed_parts_over_threshold)
+        {
+            delay_milliseconds = max_delay_milliseconds;
+        }
+        else
+        {
+            double delay_factor = static_cast<double>(parts_over_threshold) / allowed_parts_over_threshold;
+            const UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms;
+            delay_milliseconds = std::max(min_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor));
+        }
    }

    ProfileEvents::increment(ProfileEvents::DelayedInserts);
@ -7854,7 +7868,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
    for (const auto & moving_part : moving_tagger->parts_to_move)
    {
        Stopwatch stopwatch;
-        MutableDataPartPtr cloned_part;
+        MergeTreePartsMover::TemporaryClonedPart cloned_part;
        ProfileEventsScope profile_events_scope;

        auto write_part_log = [&](const ExecutionStatus & execution_status)
@ -7864,7 +7878,7 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
                execution_status,
                stopwatch.elapsed(),
                moving_part.part->name,
-                cloned_part,
+                cloned_part.part,
                {moving_part.part},
                nullptr,
                profile_events_scope.getSnapshot());
@ -7940,9 +7954,6 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr &
        catch (...)
        {
            write_part_log(ExecutionStatus::fromCurrentException("", true));
-            if (cloned_part)
-                cloned_part->remove();
-
            throw;
        }
    }
@ -8457,6 +8468,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart(
    return new_data_part;
 }

+bool MergeTreeData::allowRemoveStaleMovingParts() const
+{
+    return ConfigHelper::getBool(getContext()->getConfigRef(), "allow_remove_stale_moving_parts");
+}
+
 CurrentlySubmergingEmergingTagger::~CurrentlySubmergingEmergingTagger()
 {
    std::lock_guard lock(storage.currently_submerging_emerging_mutex);
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -557,7 +557,7 @@ public:
    /// If the table contains too many active parts, sleep for a while to give them time to merge.
    /// If until is non-null, wake up from the sleep earlier if the event happened.
    /// The decision to delay or throw is made according to settings 'parts_to_delay_insert' and 'parts_to_throw_insert'.
-    void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
+    void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context, bool allow_throw) const;

    /// If the table contains too many unfinished mutations, sleep for a while to give them time to execute.
    /// If until is non-null, wake up from the sleep earlier if the event happened.
@ -646,6 +646,9 @@ public:
    /// For active parts it's unsafe because this method modifies fields of part (rename) while some other thread can try to read it.
    void forcefullyMovePartToDetachedAndRemoveFromMemory(const DataPartPtr & part, const String & prefix = "", bool restore_covered = false);

+    /// This method should not be here, but async loading of Outdated parts is implemented in MergeTreeData
+    virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {}
+
    /// Outdate broken part, set remove time to zero (remove as fast as possible) and make clone in detached directory.
    void outdateBrokenPartAndCloneToDetached(const DataPartPtr & part, const String & prefix);

@ -676,6 +679,7 @@ public:
    /// Delete all directories which names begin with "tmp"
    /// Must be called with locked lockForShare() because it's using relative_data_path.
    size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"});
+    size_t clearOldTemporaryDirectories(const String & root_path, size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes);

    size_t clearEmptyParts();

@ -1061,6 +1065,9 @@ public:
    void waitForOutdatedPartsToBeLoaded() const;
    bool canUsePolymorphicParts() const;

+    /// TODO: make enabled by default in the next release if no problems found.
+    bool allowRemoveStaleMovingParts() const;
+
 protected:
    friend class IMergeTreeDataPart;
    friend class MergeTreeDataMergerMutator;
--- a/src/Storages/MergeTree/MergeTreePartsMover.cpp
+++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp
@ -11,6 +11,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ABORTED;
+    extern const int DIRECTORY_ALREADY_EXISTS;
 }

 namespace
@ -203,7 +204,7 @@ bool MergeTreePartsMover::selectPartsForMove(
        return false;
 }

-MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const
+MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const
 {
    if (moves_blocker.isCancelled())
        throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");
@ -212,6 +213,8 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn
    auto part = moving_part.part;
    auto disk = moving_part.reserved_space->getDisk();
    LOG_DEBUG(log, "Cloning part {} from '{}' to '{}'", part->name, part->getDataPartStorage().getDiskName(), disk->getName());
+    TemporaryClonedPart cloned_part;
+    cloned_part.temporary_directory_lock = data->getTemporaryPartDirectoryHolder(part->name);

    MutableDataPartStoragePtr cloned_part_storage;
    if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication)
@ -222,8 +225,10 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn
        String relative_path = part->getDataPartStorage().getPartDirectory();
        if (disk->exists(path_to_clone + relative_path))
        {
-            LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone + relative_path));
-            disk->removeRecursive(fs::path(path_to_clone) / relative_path / "");
+            throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS,
+                "Cannot clone part {} from '{}' to '{}': path '{}' already exists",
+                part->name, part->getDataPartStorage().getDiskName(), disk->getName(),
+                fullPath(disk, path_to_clone + relative_path));
        }

        disk->createDirectories(path_to_clone);
@ -242,37 +247,48 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn
    }

    MergeTreeDataPartBuilder builder(*data, part->name, cloned_part_storage);
-    auto cloned_part = std::move(builder).withPartFormatFromDisk().build();
-    LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getDataPartStorage().getFullPath());
+    cloned_part.part = std::move(builder).withPartFormatFromDisk().build();
+    LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part.part->getDataPartStorage().getFullPath());

-    cloned_part->loadColumnsChecksumsIndexes(true, true);
-    cloned_part->loadVersionMetadata();
-    cloned_part->modification_time = cloned_part->getDataPartStorage().getLastModified().epochTime();
+    cloned_part.part->is_temp = data->allowRemoveStaleMovingParts();
+    cloned_part.part->loadColumnsChecksumsIndexes(true, true);
+    cloned_part.part->loadVersionMetadata();
+    cloned_part.part->modification_time = cloned_part.part->getDataPartStorage().getLastModified().epochTime();
    return cloned_part;
 }


-void MergeTreePartsMover::swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_part) const
+void MergeTreePartsMover::swapClonedPart(TemporaryClonedPart & cloned_part) const
 {
    if (moves_blocker.isCancelled())
        throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts.");

-    auto active_part = data->getActiveContainingPart(cloned_part->name);
+    auto active_part = data->getActiveContainingPart(cloned_part.part->name);

    /// It's ok, because we don't block moving parts for merges or mutations
-    if (!active_part || active_part->name != cloned_part->name)
+    if (!active_part || active_part->name != cloned_part.part->name)
    {
-        LOG_INFO(log, "Failed to swap {}. Active part doesn't exist. Possible it was merged or mutated. Will remove copy on path '{}'.", cloned_part->name, cloned_part->getDataPartStorage().getFullPath());
+        LOG_INFO(log,
+            "Failed to swap {}. Active part doesn't exist (containing part {}). "
+            "Possible it was merged or mutated. Part on path '{}' {}",
+            cloned_part.part->name,
+            active_part ? active_part->name : "doesn't exist",
+            cloned_part.part->getDataPartStorage().getFullPath(),
+            data->allowRemoveStaleMovingParts() ? "will be removed" : "will remain intact (set <allow_remove_stale_moving_parts> in config.xml, exercise caution when using)");
        return;
    }

+    cloned_part.part->is_temp = false;
+
    /// Don't remove new directory but throw an error because it may contain part which is currently in use.
-    cloned_part->renameTo(active_part->name, false);
+    cloned_part.part->renameTo(active_part->name, false);

    /// TODO what happen if server goes down here?
-    data->swapActivePart(cloned_part);
+    data->swapActivePart(cloned_part.part);

-    LOG_TRACE(log, "Part {} was moved to {}", cloned_part->name, cloned_part->getDataPartStorage().getFullPath());
+    LOG_TRACE(log, "Part {} was moved to {}", cloned_part.part->name, cloned_part.part->getDataPartStorage().getFullPath());
+
+    cloned_part.temporary_directory_lock = {};
 }

 }
--- a/src/Storages/MergeTree/MergeTreePartsMover.h
+++ b/src/Storages/MergeTree/MergeTreePartsMover.h
@ -3,6 +3,7 @@
 #include <functional>
 #include <optional>
 #include <vector>
+#include <base/scope_guard.h>
 #include <Disks/StoragePolicy.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/MovesList.h>
@ -43,12 +44,19 @@ private:
    using AllowedMovingPredicate = std::function<bool(const std::shared_ptr<const IMergeTreeDataPart> &, String * reason)>;

 public:
+
    explicit MergeTreePartsMover(MergeTreeData * data_)
        : data(data_)
        , log(&Poco::Logger::get("MergeTreePartsMover"))
    {
    }

+    struct TemporaryClonedPart
+    {
+        MergeTreeMutableDataPartPtr part;
+        scope_guard temporary_directory_lock;
+    };
+
    /// Select parts for background moves according to storage_policy configuration.
    /// Returns true if at least one part was selected for move.
    bool selectPartsForMove(
@ -57,14 +65,14 @@ public:
        const std::lock_guard<std::mutex> & moving_parts_lock);

    /// Copies part to selected reservation in detached folder. Throws exception if part already exists.
-    MergeTreeMutableDataPartPtr clonePart(const MergeTreeMoveEntry & moving_part) const;
+    TemporaryClonedPart clonePart(const MergeTreeMoveEntry & moving_part) const;

    /// Replaces cloned part from detached directory into active data parts set.
    /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of
    /// IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than
    /// cloned part will be removed and log message will be reported. It may happen in case of concurrent
    /// merge or mutation.
-    void swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_parts) const;
+    void swapClonedPart(TemporaryClonedPart & cloned_part) const;

    /// Can stop background moves and moves from queries
    ActionBlocker moves_blocker;
--- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp
@ -188,10 +188,11 @@ size_t MergeTreeReaderWide::readRows(
            data_part_info_for_read->reportBroken();

        /// Better diagnostics.
+        const auto & part_storage = data_part_info_for_read->getDataPartStorage();
        e.addMessage(
            fmt::format(
-                "(while reading from part {} from mark {} with max_rows_to_read = {})",
-                data_part_info_for_read->getDataPartStorage()->getFullPath(),
+                "(while reading from part {} located on disk {} of type {}, from mark {} with max_rows_to_read = {})",
+                part_storage->getFullPath(), part_storage->getDiskName(), part_storage->getDiskType(),
                toString(from_mark), toString(max_rows_to_read)));
        throw;
    }
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -73,11 +73,11 @@ struct Settings;
    M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
    \
    /** Inserts settings. */ \
-    M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
+    M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
    M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \
-    M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \
+    M(UInt64, parts_to_throw_insert, 3000, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \
    M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \
-    M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \
+    M(UInt64, max_avg_part_size_for_too_many_parts, 1ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \
    M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
    M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
    M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
--- a/src/Storages/MergeTree/MergeTreeSink.cpp
+++ b/src/Storages/MergeTree/MergeTreeSink.cpp
@ -45,9 +45,9 @@ MergeTreeSink::MergeTreeSink(

 void MergeTreeSink::onStart()
 {
-    /// Only check "too many parts" before write,
+    /// It's only allowed to throw "too many parts" before write,
    /// because interrupting long-running INSERT query in the middle is not convenient for users.
-    storage.delayInsertOrThrowIfNeeded(nullptr, context);
+    storage.delayInsertOrThrowIfNeeded(nullptr, context, true);
 }

 void MergeTreeSink::onFinish()
@ -57,6 +57,9 @@ void MergeTreeSink::onFinish()

 void MergeTreeSink::consume(Chunk chunk)
 {
+    if (num_blocks_processed > 0)
+        storage.delayInsertOrThrowIfNeeded(nullptr, context, false);
+
    auto block = getHeader().cloneWithColumns(chunk.detachColumns());
    if (!storage_snapshot->object_columns.empty())
        convertDynamicColumnsToTuples(block, storage_snapshot);
@ -136,6 +139,8 @@ void MergeTreeSink::consume(Chunk chunk)
    finishDelayedChunk();
    delayed_chunk = std::make_unique<MergeTreeSink::DelayedChunk>();
    delayed_chunk->partitions = std::move(partitions);
+
+    ++num_blocks_processed;
 }

 void MergeTreeSink::finishDelayedChunk()
--- a/src/Storages/MergeTree/MergeTreeSink.h
+++ b/src/Storages/MergeTree/MergeTreeSink.h
@ -35,7 +35,8 @@ private:
    size_t max_parts_per_block;
    ContextPtr context;
    StorageSnapshotPtr storage_snapshot;
-    uint64_t chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token
+    UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token
+    UInt64 num_blocks_processed = 0;

    /// We can delay processing for previous chunk and start writing a new one.
    struct DelayedChunk;
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@ -367,6 +367,9 @@ size_t ReplicatedMergeTreeSinkImpl<async_insert>::checkQuorumPrecondition(const
 template<bool async_insert>
 void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
 {
+    if (num_blocks_processed > 0)
+        storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false);
+
    auto block = getHeader().cloneWithColumns(chunk.detachColumns());

    const auto & settings = context->getSettingsRef();
@ -512,6 +515,8 @@ void ReplicatedMergeTreeSinkImpl<async_insert>::consume(Chunk chunk)
    /// TODO: we can also delay commit if there is no MVs.
    if (!settings.deduplicate_blocks_in_dependent_materialized_views)
        finishDelayedChunk(zookeeper);
+
+    ++num_blocks_processed;
 }

 template<>
@ -1136,9 +1141,9 @@ std::pair<std::vector<String>, bool> ReplicatedMergeTreeSinkImpl<async_insert>::
 template<bool async_insert>
 void ReplicatedMergeTreeSinkImpl<async_insert>::onStart()
 {
-    /// Only check "too many parts" before write,
+    /// It's only allowed to throw "too many parts" before write,
    /// because interrupting long-running INSERT query in the middle is not convenient for users.
-    storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context);
+    storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, true);
 }

 template<bool async_insert>
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@ -123,6 +123,7 @@ private:
    bool quorum_parallel = false;
    const bool deduplicate = true;
    bool last_block_is_duplicate = false;
+    UInt64 num_blocks_processed = 0;

    using Logger = Poco::Logger;
    Poco::Logger * log;
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@ -259,35 +259,40 @@ std::unique_ptr<ReadBuffer> selectReadBuffer(
    return res;
 }

-std::unique_ptr<ReadBuffer> createReadBuffer(
-    const String & current_path,
-    bool use_table_fd,
-    const String & storage_name,
-    int table_fd,
-    const String & compression_method,
-    ContextPtr context)
+struct stat getFileStat(const String & current_path, bool use_table_fd, int table_fd, const String & storage_name)
 {
-    CompressionMethod method;
-
    struct stat file_stat{};
-
    if (use_table_fd)
    {
        /// Check if file descriptor allows random reads (and reading it twice).
        if (0 != fstat(table_fd, &file_stat))
            throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
-
-        method = chooseCompressionMethod("", compression_method);
    }
    else
    {
        /// Check if file descriptor allows random reads (and reading it twice).
        if (0 != stat(current_path.c_str(), &file_stat))
            throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
-
-        method = chooseCompressionMethod(current_path, compression_method);
    }

+    return file_stat;
+}
+
+std::unique_ptr<ReadBuffer> createReadBuffer(
+    const String & current_path,
+    const struct stat & file_stat,
+    bool use_table_fd,
+    int table_fd,
+    const String & compression_method,
+    ContextPtr context)
+{
+    CompressionMethod method;
+
+    if (use_table_fd)
+        method = chooseCompressionMethod("", compression_method);
+    else
+        method = chooseCompressionMethod(current_path, compression_method);
+
    std::unique_ptr<ReadBuffer> nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context);

    /// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
@ -357,7 +362,8 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c
    {
        /// We will use PeekableReadBuffer to create a checkpoint, so we need a place
        /// where we can store the original read buffer.
-        read_buffer_from_fd = createReadBuffer("", true, getName(), table_fd, compression_method, context);
+        auto file_stat = getFileStat("", true, table_fd, getName());
+        read_buffer_from_fd = createReadBuffer("", file_stat, true, table_fd, compression_method, context);
        auto read_buf = std::make_unique<PeekableReadBuffer>(*read_buffer_from_fd);
        read_buf->setCheckpoint();
        return read_buf;
@ -398,12 +404,29 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
    if (context->getSettingsRef().schema_inference_use_cache_for_file)
        columns_from_cache = tryGetColumnsFromCache(paths, format, format_settings, context);

-    ReadBufferIterator read_buffer_iterator = [&, it = paths.begin()](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
+    ReadBufferIterator read_buffer_iterator = [&, it = paths.begin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
    {
-        if (it == paths.end())
-            return nullptr;
+        String path;
+        struct stat file_stat;
+        do
+        {
+            if (it == paths.end())
+            {
+                if (first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Cannot extract table structure from {} format file, because all files are empty. You must specify table structure manually",
+                        format);
+                return nullptr;
+            }

-        return createReadBuffer(*it++, false, "File", -1, compression_method, context);
+            path = *it++;
+            file_stat = getFileStat(path, false, -1, "File");
+        }
+        while (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0);
+
+        first = false;
+        return createReadBuffer(path, file_stat, false, -1, compression_method, context);
    };

    ColumnsDescription columns;
@ -692,7 +715,12 @@ public:
                }

                if (!read_buf)
-                    read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context);
+                {
+                    auto file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName());
+                    if (context->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0)
+                        continue;
+                    read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
+                }

                const Settings & settings = context->getSettingsRef();
                chassert(!storage->paths.empty());
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -1226,12 +1226,53 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin
    return res;
 }

+static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicatedMergeTree * storage, const Strings & parts_in_zk,
+                                                           MergeTreeDataFormatVersion format_version, Poco::Logger * log)
+{
+#ifdef ABORT_ON_LOGICAL_ERROR
+    constexpr bool paranoid_check_for_covered_parts_default = true;
+#else
+    constexpr bool paranoid_check_for_covered_parts_default = false;
+#endif
+
+    bool paranoid_check_for_covered_parts = Context::getGlobalContextInstance()->getConfigRef().getBool(
+        "replicated_merge_tree_paranoid_check_on_startup", paranoid_check_for_covered_parts_default);
+    if (!paranoid_check_for_covered_parts)
+        return;
+
+    ActiveDataPartSet active_set(format_version);
+    for (const auto & part_name : parts_in_zk)
+        active_set.add(part_name);
+
+    const auto disks = storage->getStoragePolicy()->getDisks();
+    auto path = storage->getRelativeDataPath();
+
+    for (const auto & part_name : parts_in_zk)
+    {
+        String covering_part = active_set.getContainingPart(part_name);
+        if (part_name == covering_part)
+            continue;
+
+        bool found = false;
+        for (const DiskPtr & disk : disks)
+            if (disk->exists(fs::path(path) / part_name))
+                found = true;
+
+        if (!found)
+        {
+            LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. "
+                        "It may cause false-positive 'part is lost forever' messages", part_name, covering_part);
+            chassert(false);
+        }
+    }
+}

 void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks)
 {
    auto zookeeper = getZooKeeper();

    Strings expected_parts_vec = zookeeper->getChildren(fs::path(replica_path) / "parts");
+    paranoidCheckForCoveredPartsInZooKeeperOnStart(this, expected_parts_vec, format_version, log);

    /// Parts in ZK.
    NameSet expected_parts(expected_parts_vec.begin(), expected_parts_vec.end());
@ -6805,6 +6846,31 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKee
 }


+void StorageReplicatedMergeTree::forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & part_name)
+{
+    /// An outdated part is broken and we are going to move it do detached/
+    /// But we need to remove it from ZooKeeper as well. Otherwise it will be considered as "lost forever".
+
+    /// Since the part is Outdated, it should be safe to remove it, but it's still dangerous.
+    /// It could became Outdated because it was merged/mutated (so we have a covering part) or because it was dropped.
+    /// But DROP [PART]ITION waits for all Outdated parts to be loaded, so it's not the case.
+
+    auto zookeeper = getZooKeeper();
+    String part_path = replica_path + "/parts/" + part_name;
+    if (!zookeeper->exists(part_path))
+        return;
+
+    auto part = getActiveContainingPart(part_name);
+    if (!part)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Outdated part {} is broken and going to be detached, "
+                        "but there's no active covering part, so we are not sure that it can be safely removed from ZooKeeper "
+                        "(path: {})", part_name, part_path);
+
+    LOG_WARNING(log, "Outdated part {} is broken and going to be detached, removing it from ZooKeeper. The part is covered by {}",
+                part_name, part->name);
+    removePartsFromZooKeeperWithRetries({part_name}, /* infinite retries */ 0);
+}
+
 void StorageReplicatedMergeTree::removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries)
 {
    Strings part_names_to_remove;
@ -7166,6 +7232,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
        }

        Coordination::Responses op_results;
+        DataPartsVector parts_holder;

        try
        {
@ -7215,7 +7282,10 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
                auto data_parts_lock = lockParts();
                transaction.commit(&data_parts_lock);
                if (replace)
+                {
+                    parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &data_parts_lock);
                    removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, data_parts_lock);
+                }
            }

            PartLog::addNewParts(getContext(), PartLog::createPartLogEntries(dst_parts, watch.elapsed(), profile_events_scope.getSnapshot()));
@ -7235,11 +7305,15 @@ void StorageReplicatedMergeTree::replacePartitionFrom(
        for (auto & lock : ephemeral_locks)
            lock.assumeUnlocked();

-        cleanup_thread.wakeup();
-
        lock2.reset();
        lock1.reset();

+        /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost)
+        queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC);
+        parts_holder.clear();
+        cleanup_thread.wakeup();
+
+
        waitForLogEntryToBeProcessedIfNecessary(entry, query_context);

        return;
@ -7405,6 +7479,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta

        Coordination::Responses op_results;

+        /// We should hold replaced parts until we actually create DROP_RANGE in ZooKeeper
+        DataPartsVector parts_holder;
        try
        {
            Coordination::Requests ops;
@ -7439,6 +7515,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
                else
                    zkutil::KeeperMultiException::check(code, ops, op_results);

+                parts_holder = getDataPartsVectorInPartitionForInternalUsage(MergeTreeDataPartState::Active, drop_range.partition_id, &src_data_parts_lock);
                removePartsInRangeFromWorkingSet(NO_TRANSACTION_RAW, drop_range, src_data_parts_lock);
                transaction.commit(&src_data_parts_lock);
            }
@ -7461,7 +7538,6 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
        for (auto & lock : ephemeral_locks)
            lock.assumeUnlocked();

-        cleanup_thread.wakeup();
        lock2.reset();

        dest_table_storage->waitForLogEntryToBeProcessedIfNecessary(entry, query_context);
@ -7480,6 +7556,12 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
        entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1);

        lock1.reset();
+
+        /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost)
+        queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC);
+        parts_holder.clear();
+        cleanup_thread.wakeup();
+
        waitForLogEntryToBeProcessedIfNecessary(entry_delete, query_context);

        /// Cleaning possibly stored information about parts from /quorum/last_part node in ZooKeeper.
@ -9381,7 +9463,7 @@ void StorageReplicatedMergeTree::backupData(
    auto post_collecting_task = [shared_id,
                                 my_replica_name = getReplicaName(),
                                 coordination,
-                                 parts_backup_entries = std::move(parts_backup_entries),
+                                 my_parts_backup_entries = std::move(parts_backup_entries),
                                 &backup_entries_collector]()
    {
        Strings data_paths = coordination->getReplicatedDataPaths(shared_id);
@ -9393,7 +9475,7 @@ void StorageReplicatedMergeTree::backupData(
        Strings part_names = coordination->getReplicatedPartNames(shared_id, my_replica_name);
        std::unordered_set<std::string_view> part_names_set{part_names.begin(), part_names.end()};

-        for (const auto & part_backup_entries : parts_backup_entries)
+        for (const auto & part_backup_entries : my_parts_backup_entries)
        {
            if (part_names_set.contains(part_backup_entries.part_name))
            {
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@ -579,6 +579,8 @@ private:
    void removePartsFromZooKeeperWithRetries(const Strings & part_names, size_t max_retries = 5);
    void removePartsFromZooKeeperWithRetries(PartsToRemoveFromZooKeeper & parts, size_t max_retries = 5);

+    void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & part_name) override;
+
    /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts.
    void removePartAndEnqueueFetch(const String & part_name, bool storage_init);

--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@ -578,14 +578,21 @@ StorageS3Source::StorageS3Source(

 StorageS3Source::ReaderHolder StorageS3Source::createReader()
 {
-    auto [current_key, info] = (*file_iterator)();
-    if (current_key.empty())
-        return {};
+    KeyWithInfo key_with_info;
+    size_t object_size;
+    do
+    {
+        key_with_info = (*file_iterator)();
+        if (key_with_info.key.empty())
+            return {};

-    size_t object_size = info ? info->size : S3::getObjectSize(*client, bucket, current_key, version_id, request_settings);
-    auto compression_method = chooseCompressionMethod(current_key, compression_hint);
+        object_size = key_with_info.info ? key_with_info.info->size : S3::getObjectSize(*client, bucket, key_with_info.key, version_id, request_settings);
+    }
+    while (getContext()->getSettingsRef().s3_skip_empty_files && object_size == 0);

-    auto read_buf = createS3ReadBuffer(current_key, object_size);
+    auto compression_method = chooseCompressionMethod(key_with_info.key, compression_hint);
+
+    auto read_buf = createS3ReadBuffer(key_with_info.key, object_size);
    auto input_format = FormatFactory::instance().getInput(
            format, *read_buf, sample_block, getContext(), max_block_size,
            format_settings, std::nullopt, std::nullopt,
@ -604,7 +611,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader()
    auto pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
    auto current_reader = std::make_unique<PullingPipelineExecutor>(*pipeline);

-    return ReaderHolder{fs::path(bucket) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)};
+    return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(pipeline), std::move(current_reader)};
 }

 std::future<StorageS3Source::ReaderHolder> StorageS3Source::createReaderAsync()
@ -1449,38 +1456,45 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl(

    ReadBufferIterator read_buffer_iterator = [&, first = true](ColumnsDescription & cached_columns) mutable -> std::unique_ptr<ReadBuffer>
    {
-        auto [key, _] = (*file_iterator)();
-
-        if (key.empty())
+        while (true)
        {
-            if (first)
-                throw Exception(
-                    ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                    "Cannot extract table structure from {} format file, because there are no files with provided path "
-                    "in S3. You must specify table structure manually", configuration.format);
+            auto key_with_info = (*file_iterator)();

-            return nullptr;
-        }
-
-        /// S3 file iterator could get new keys after new iteration, check them in schema cache.
-        if (ctx->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
-        {
-            columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx);
-            prev_read_keys_size = read_keys.size();
-            if (columns_from_cache)
+            if (key_with_info.key.empty())
            {
-                cached_columns = *columns_from_cache;
+                if (first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Cannot extract table structure from {} format file, because there are no files with provided path "
+                        "in S3 or all files are empty. You must specify table structure manually",
+                        configuration.format);
+
                return nullptr;
            }
-        }

-        first = false;
-        int zstd_window_log_max = static_cast<int>(ctx->getSettingsRef().zstd_window_log_max);
-        return wrapReadBufferWithCompressionMethod(
-            std::make_unique<ReadBufferFromS3>(
-                configuration.client, configuration.url.bucket, key, configuration.url.version_id, configuration.request_settings, ctx->getReadSettings()),
-            chooseCompressionMethod(key, configuration.compression_method),
-            zstd_window_log_max);
+            /// S3 file iterator could get new keys after new iteration, check them in schema cache.
+            if (ctx->getSettingsRef().schema_inference_use_cache_for_s3 && read_keys.size() > prev_read_keys_size)
+            {
+                columns_from_cache = tryGetColumnsFromCache(read_keys.begin() + prev_read_keys_size, read_keys.end(), configuration, format_settings, ctx);
+                prev_read_keys_size = read_keys.size();
+                if (columns_from_cache)
+                {
+                    cached_columns = *columns_from_cache;
+                    return nullptr;
+                }
+            }
+
+            if (ctx->getSettingsRef().s3_skip_empty_files && key_with_info.info && key_with_info.info->size == 0)
+                continue;
+
+            int zstd_window_log_max = static_cast<int>(ctx->getSettingsRef().zstd_window_log_max);
+            auto impl = std::make_unique<ReadBufferFromS3>(configuration.client, configuration.url.bucket, key_with_info.key, configuration.url.version_id, configuration.request_settings, ctx->getReadSettings());
+            if (!ctx->getSettingsRef().s3_skip_empty_files || !impl->eof())
+            {
+                first = false;
+                return wrapReadBufferWithCompressionMethod(std::move(impl), chooseCompressionMethod(key_with_info.key, configuration.compression_method), zstd_window_log_max);
+            }
+        }
    };

    ColumnsDescription columns;
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@ -14,10 +14,8 @@
 #include <Parsers/ASTIdentifier.h>

 #include <IO/ConnectionTimeouts.h>
-#include <IO/ParallelReadBuffer.h>
 #include <IO/WriteBufferFromHTTP.h>
 #include <IO/WriteHelpers.h>
-#include <IO/WithFileSize.h>

 #include <Formats/FormatFactory.h>
 #include <Formats/ReadSchemaUtils.h>
@ -29,7 +27,6 @@
 #include <Common/ThreadStatus.h>
 #include <Common/parseRemoteDescription.h>
 #include <Common/NamedCollections/NamedCollections.h>
-#include <IO/HTTPCommon.h>
 #include <IO/ReadWriteBufferFromHTTP.h>
 #include <IO/HTTPHeaderEntries.h>

@ -48,7 +45,7 @@ namespace ErrorCodes
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
    extern const int NETWORK_ERROR;
    extern const int BAD_ARGUMENTS;
-    extern const int LOGICAL_ERROR;
+    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }

 static constexpr auto bad_arguments_error_message = "Storage URL requires 1-4 arguments: "
@ -242,27 +239,36 @@ StorageURLSource::StorageURLSource(
    auto headers = getHeaders(headers_);

    /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline.
-    initialize = [=, this](const FailoverOptions & uri_options)
+    initialize = [=, this]()
    {
-        if (uri_options.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list");
+        std::vector<String> current_uri_options;
+        std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> uri_and_buf;
+        do
+        {
+            current_uri_options = (*uri_iterator)();
+            if (current_uri_options.empty())
+                return false;

-        auto first_option = uri_options.begin();
-        auto [actual_uri, buf] = getFirstAvailableURIAndReadBuffer(
-            first_option,
-            uri_options.end(),
-            context,
-            params,
-            http_method,
-            callback,
-            timeouts,
-            credentials,
-            headers,
-            glob_url,
-            uri_options.size() == 1);
+            auto first_option = current_uri_options.cbegin();
+            uri_and_buf = getFirstAvailableURIAndReadBuffer(
+                first_option,
+                current_uri_options.end(),
+                context,
+                params,
+                http_method,
+                callback,
+                timeouts,
+                credentials,
+                headers,
+                glob_url,
+                current_uri_options.size() == 1);

-        curr_uri = actual_uri;
-        read_buf = std::move(buf);
+            /// If file is empty and engine_url_skip_empty_files=1, skip it and go to the next file.
+        }
+        while (context->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof());
+
+        curr_uri = uri_and_buf.first;
+        read_buf = std::move(uri_and_buf.second);

        try
        {
@ -294,6 +300,7 @@ StorageURLSource::StorageURLSource(

        pipeline = std::make_unique<QueryPipeline>(QueryPipelineBuilder::getPipeline(std::move(builder)));
        reader = std::make_unique<PullingPipelineExecutor>(*pipeline);
+        return true;
    };
 }

@ -308,14 +315,8 @@ Chunk StorageURLSource::generate()
            break;
        }

-        if (!reader)
-        {
-            auto current_uri = (*uri_iterator)();
-            if (current_uri.empty())
-                return {};
-
-            initialize(current_uri);
-        }
+        if (!reader && !initialize())
+            return {};

        Chunk chunk;
        if (reader->pull(chunk))
@ -350,7 +351,7 @@ Chunk StorageURLSource::generate()
    return {};
 }

-std::tuple<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource::getFirstAvailableURIAndReadBuffer(
+std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource::getFirstAvailableURIAndReadBuffer(
    std::vector<String>::const_iterator & option,
    const std::vector<String>::const_iterator & end,
    ContextPtr context,
@ -367,6 +368,7 @@ std::tuple<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource
    ReadSettings read_settings = context->getReadSettings();

    size_t options = std::distance(option, end);
+    std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> last_skipped_empty_res;
    for (; option != end; ++option)
    {
        bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end);
@ -396,6 +398,12 @@ std::tuple<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource
                /* use_external_buffer */ false,
                /* skip_url_not_found_error */ skip_url_not_found_error);

+            if (context->getSettingsRef().engine_url_skip_empty_files && res->eof() && option != std::prev(end))
+            {
+                last_skipped_empty_res = {request_uri, std::move(res)};
+                continue;
+            }
+
            return std::make_tuple(request_uri, std::move(res));
        }
        catch (...)
@ -412,6 +420,11 @@ std::tuple<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> StorageURLSource
        }
    }

+    /// If all options are unreachable except empty ones that we skipped,
+    /// return last empty result. It will be skipped later.
+    if (last_skipped_empty_res.second)
+        return last_skipped_empty_res;
+
    throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message);
 }

@ -593,26 +606,41 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
    if (context->getSettingsRef().schema_inference_use_cache_for_url)
        columns_from_cache = tryGetColumnsFromCache(urls_to_check, headers, credentials, format, format_settings, context);

-    ReadBufferIterator read_buffer_iterator = [&, it = urls_to_check.cbegin()](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
+    ReadBufferIterator read_buffer_iterator = [&, it = urls_to_check.cbegin(), first = true](ColumnsDescription &) mutable -> std::unique_ptr<ReadBuffer>
    {
-        if (it == urls_to_check.cend())
-            return nullptr;
+        std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> uri_and_buf;
+        do
+        {
+            if (it == urls_to_check.cend())
+            {
+                if (first)
+                    throw Exception(
+                        ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
+                        "Cannot extract table structure from {} format file, because all files are empty. "
+                        "You must specify table structure manually",
+                        format);
+                return nullptr;
+            }

-        auto [_, buf] = StorageURLSource::getFirstAvailableURIAndReadBuffer(
-            it,
-            urls_to_check.cend(),
-            context,
-            {},
-            Poco::Net::HTTPRequest::HTTP_GET,
-            {},
-            getHTTPTimeouts(context),
-            credentials,
-            headers,
-            false,
-            false);
-        ++it;
+            uri_and_buf = StorageURLSource::getFirstAvailableURIAndReadBuffer(
+                it,
+                urls_to_check.cend(),
+                context,
+                {},
+                Poco::Net::HTTPRequest::HTTP_GET,
+                {},
+                getHTTPTimeouts(context),
+                credentials,
+                headers,
+                false,
+                false);
+
+            ++it;
+        } while (context->getSettingsRef().engine_url_skip_empty_files && uri_and_buf.second->eof());
+
+        first = false;
        return wrapReadBufferWithCompressionMethod(
-            std::move(buf),
+            std::move(uri_and_buf.second),
            compression_method,
            static_cast<int>(context->getSettingsRef().zstd_window_log_max));
    };
--- a/src/Storages/StorageURL.h
+++ b/src/Storages/StorageURL.h
@ -183,7 +183,7 @@ public:

    static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);

-    static std::tuple<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> getFirstAvailableURIAndReadBuffer(
+    static std::pair<Poco::URI, std::unique_ptr<ReadWriteBufferFromHTTP>> getFirstAvailableURIAndReadBuffer(
        std::vector<String>::const_iterator & option,
        const std::vector<String>::const_iterator & end,
        ContextPtr context,
@ -197,7 +197,7 @@ public:
        bool delay_initialization);

 private:
-    using InitializeFunc = std::function<void(const FailoverOptions &)>;
+    using InitializeFunc = std::function<bool()>;
    InitializeFunc initialize;

    String name;
--- a/Show More
+++ b/Show More