Merge branch 'master' into create-user-defined-lambda-function

2024-11-21 23:21:59 +00:00 · 2021-08-24 19:12:32 +03:00 · 2021-08-24 19:12:32 +03:00 · 82ba24cd10
commit 82ba24cd10
parent 010d72593f 23a2ce2019
158 changed files with 4854 additions and 375 deletions
--- a/SECURITY.md
+++ b/SECURITY.md
@ -1,9 +1,11 @@
 # Security Policy

-## Supported Versions
+## Security Announcements
+Security fixes will be announced by posting them in the [security changelog](https://clickhouse.tech/docs/en/whats-new/security-changelog/)

-The following versions of ClickHouse server are
-currently being supported with security updates:
+## Scope and Supported Versions
+
+The following versions of ClickHouse server are currently being supported with security updates:

 | Version | Supported          |
 | ------- | ------------------ |
@ -11,18 +13,49 @@ currently being supported with security updates:
 | 18.x   | :x:                |
 | 19.x   | :x:                |
 | 20.1   | :x: |
-| 20.3   | :white_check_mark: |
+| 20.3   | :x: |
 | 20.4   | :x: |
 | 20.5   | :x: |
 | 20.6   | :x: |
 | 20.7   | :x: |
-| 20.8   | :white_check_mark: |
+| 20.8   | :x: |
 | 20.9   | :x: |
 | 20.10  | :x: |
-| 20.11  | :white_check_mark: |
-| 20.12  | :white_check_mark: |
-| 21.1   | :white_check_mark: |
+| 20.11  | :x: |
+| 20.12  | :x: |
+| 21.1   | :x: |
+| 21.2   | :x: |
+| 21.3   | ✅ |
+| 21.4   | :x: |
+| 21.5   | :x: |
+| 21.6   | ✅ |
+| 21.7   | ✅ |
+| 21.8   | ✅ |

 ## Reporting a Vulnerability

+We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
+
 To report a potential vulnerability in ClickHouse please send the details about it to [clickhouse-feedback@yandex-team.com](mailto:clickhouse-feedback@yandex-team.com).
+
+### When Should I Report a Vulnerability?
+
+- You think you discovered a potential security vulnerability in ClickHouse
+- You are unsure how a vulnerability affects ClickHouse
+
+### When Should I NOT Report a Vulnerability?
+
+- You need help tuning ClickHouse components for security
+- You need help applying security related updates
+- Your issue is not security related
+
+## Security Vulnerability Response
+
+Each report is acknowledged and analyzed by ClickHouse maintainers within 5 working days.
+As the security issue moves from triage, to identified fix, to release planning we will keep the reporter updated.
+
+## Public Disclosure Timing
+
+A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect report date to disclosure date to be on the order of 7 days. 
+
+
--- a/base/common/insertAtEnd.h
+++ b/base/common/insertAtEnd.h
@ -0,0 +1,28 @@
+#pragma once
+
+#include <vector>
+
+/// Appends a specified vector with elements of another vector.
+template <typename T>
+void insertAtEnd(std::vector<T> & dest, const std::vector<T> & src)
+{
+    if (src.empty())
+        return;
+    dest.reserve(dest.size() + src.size());
+    dest.insert(dest.end(), src.begin(), src.end());
+}
+
+template <typename T>
+void insertAtEnd(std::vector<T> & dest, std::vector<T> && src)
+{
+    if (src.empty())
+        return;
+    if (dest.empty())
+    {
+        dest.swap(src);
+        return;
+    }
+    dest.reserve(dest.size() + src.size());
+    dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end()));
+    src.clear();
+}
--- a/base/common/unit.h
+++ b/base/common/unit.h
@ -0,0 +1,10 @@
+#pragma once
+#include <cstddef>
+
+constexpr size_t KiB = 1024;
+constexpr size_t MiB = 1024 * KiB;
+constexpr size_t GiB = 1024 * MiB;
+
+constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; }
+constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; }
+constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; }
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@ -9,6 +9,7 @@
 #include <cmath>
 #include <cfloat>
 #include <cassert>
+#include <tuple>
 #include <limits>


@ -39,6 +40,18 @@ static constexpr bool IntegralConcept() noexcept
    return std::is_integral_v<T> || IsWideInteger<T>::value;
 }

+template <typename T>
+class IsTupleLike
+{
+    template <typename U>
+    static auto check(U * p) -> decltype(std::tuple_size<U>::value, int());
+    template <typename>
+    static void check(...);
+
+public:
+    static constexpr const bool value = !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
 }

 namespace std
@ -227,6 +240,19 @@ struct integer<Bits, Signed>::_impl
            self.items[i] = 0;
    }

+    template <typename TupleLike, size_t i = 0>
+    constexpr static void wide_integer_from_tuple_like(integer<Bits, Signed> & self, const TupleLike & tuple) noexcept
+    {
+        if constexpr (i < item_count)
+        {
+            if constexpr (i < std::tuple_size_v<TupleLike>)
+                self.items[i] = std::get<i>(tuple);
+            else
+                self.items[i] = 0;
+            wide_integer_from_tuple_like<TupleLike, i + 1>(self, tuple);
+        }
+    }
+
    /**
     * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310
     * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them.
@ -966,6 +992,8 @@ constexpr integer<Bits, Signed>::integer(T rhs) noexcept
 {
    if constexpr (IsWideInteger<T>::value)
        _impl::wide_integer_from_wide_integer(*this, rhs);
+    else if  constexpr (IsTupleLike<T>::value)
+        _impl::wide_integer_from_tuple_like(*this, rhs);
    else
        _impl::wide_integer_from_builtin(*this, rhs);
 }
@ -979,6 +1007,8 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
    {
        if constexpr (IsWideInteger<T>::value)
            _impl::wide_integer_from_wide_integer(*this, *il.begin());
+        else if  constexpr (IsTupleLike<T>::value)
+            _impl::wide_integer_from_tuple_like(*this, *il.begin());
        else
            _impl::wide_integer_from_builtin(*this, *il.begin());
    }
@ -1007,7 +1037,10 @@ template <size_t Bits, typename Signed>
 template <typename T>
 constexpr integer<Bits, Signed> & integer<Bits, Signed>::operator=(T rhs) noexcept
 {
-    _impl::wide_integer_from_builtin(*this, rhs);
+    if  constexpr (IsTupleLike<T>::value)
+        _impl::wide_integer_from_tuple_like(*this, rhs);
+    else
+        _impl::wide_integer_from_builtin(*this, rhs);
    return *this;
 }

--- a/docker/builder/Dockerfile
+++ b/docker/builder/Dockerfile
@ -2,6 +2,8 @@ FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
        --yes --no-install-recommends --verbose-versions \
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -3,6 +3,8 @@ FROM ubuntu:18.04
 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
 ARG version=21.10.1.*

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
        apt-transport-https \
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -3,6 +3,8 @@ FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install \
        apt-transport-https \
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -3,6 +3,8 @@ FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
        --yes --no-install-recommends --verbose-versions \
--- a/docker/packager/unbundled/Dockerfile
+++ b/docker/packager/unbundled/Dockerfile
@ -5,6 +5,8 @@ RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && wget -nv -O /tmp/arrow-keyring.deb "https://apache.jfrog.io/artifactory/arrow/ubuntu/apache-arrow-apt-source-latest-${CODENAME}.deb" \
    && dpkg -i /tmp/arrow-keyring.deb

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 # Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
 RUN apt-get update \
    && apt-get install \
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -26,6 +26,8 @@ ARG DEBIAN_FRONTEND=noninteractive
 # installed to prevent picking those uid / gid by some unrelated software.
 # The same uid / gid (101) is used both for alpine and ubuntu.

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN groupadd -r clickhouse --gid=101 \
    && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
    && apt-get update \
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -3,6 +3,8 @@ FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
        --yes --no-install-recommends --verbose-versions \
--- a/docker/test/codebrowser/Dockerfile
+++ b/docker/test/codebrowser/Dockerfile
@ -2,6 +2,8 @@
 # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser
 FROM yandex/clickhouse-binary-builder

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev

 # repo versions doesn't work correctly with C++17
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -3,6 +3,8 @@ FROM ubuntu:20.04

 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \
        --yes --no-install-recommends --verbose-versions \
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -393,6 +393,9 @@ function run_tests
        01853_s2_cells_intersect
        01854_s2_cap_contains
        01854_s2_cap_union
+
+        # needs s3
+        01944_insert_partition_by
    )

    time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
            ca-certificates \
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -1,6 +1,8 @@
 # docker build -t yandex/clickhouse-integration-tests-runner .
 FROM ubuntu:20.04

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    ca-certificates \
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@ -5,6 +5,8 @@ ENV LANG=C.UTF-8
 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
            bash \
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@ -1,6 +1,8 @@
 # docker build -t yandex/clickhouse-sqlancer-test .
 FROM ubuntu:20.04

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven python3 --yes --no-install-recommends
 RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
 RUN mkdir /sqlancer && \
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -1,6 +1,8 @@
 # docker build -t yandex/clickhouse-style-test .
 FROM ubuntu:20.04

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    shellcheck \
    libxml2-utils \
--- a/docker/test/testflows/runner/Dockerfile
+++ b/docker/test/testflows/runner/Dockerfile
@ -1,6 +1,8 @@
 # docker build -t yandex/clickhouse-testflows-runner .
 FROM ubuntu:20.04

+RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
+
 RUN apt-get update \
    && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    ca-certificates \
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -99,7 +99,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
    -   `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”.
    -   `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
        <a name="mergetree_setting-merge_with_ttl_timeout"></a>
-    -   `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with TTL. Default value: 86400 (1 day).
+    -   `merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours).
+    -   `merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours).    
+    -   `try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours).    
    -   `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Don’t turn it off.
    -   `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
    -   `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
@ -333,7 +335,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234

    The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.

-    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`.
+    Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`.

    The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md).

@ -416,18 +418,20 @@ Reading from a table is automatically parallelized.

 Determines the lifetime of values.

-The `TTL` clause can be set for the whole table and for each individual column. Table-level TTL can also specify logic of automatic move of data between disks and volumes.
+The `TTL` clause can be set for the whole table and for each individual column. Table-level `TTL` can also specify the logic of automatic moving data between disks and volumes, or recompressing parts where all the data has been expired.

 Expressions must evaluate to [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md) data type.

-Example:
+**Syntax**
+
+Setting time-to-live for a column:

 ``` sql
 TTL time_column
 TTL time_column + interval
 ```

-To define `interval`, use [time interval](../../../sql-reference/operators/index.md#operators-datetime) operators.
+To define `interval`, use [time interval](../../../sql-reference/operators/index.md#operators-datetime) operators, for example:

 ``` sql
 TTL date_time + INTERVAL 1 MONTH
@ -440,9 +444,9 @@ When the values in the column expire, ClickHouse replaces them with the default

 The `TTL` clause can’t be used for key columns.

-Examples:
+**Examples**

-Creating a table with TTL
+Creating a table with `TTL`:

 ``` sql
 CREATE TABLE example_table
@ -475,11 +479,11 @@ ALTER TABLE example_table

 ### Table TTL {#mergetree-table-ttl}

-Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria.
+Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving or recompressing, all rows of a part must satisfy the `TTL` expression criteria.

 ``` sql
 TTL expr
-    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [DELETE|RECOMPRESS codec_name1|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|RECOMPRESS codec_name2|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
    [WHERE conditions]
    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
 ```
@ -487,11 +491,12 @@ TTL expr
 Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time):

 -   `DELETE` - delete expired rows (default action);
+-   `RECOMPRESS codec_name` - recompress data part with the `codec_name`;
 -   `TO DISK 'aaa'` - move part to the disk `aaa`;
 -   `TO VOLUME 'bbb'` - move part to the disk `bbb`;
 -   `GROUP BY` - aggregate expired rows.

-With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves).
+With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves or recompression).

 `GROUP BY` expression must be a prefix of the table primary key.

@ -499,7 +504,7 @@ If a column is not part of the `GROUP BY` expression and is not set explicitly i

 **Examples**

-Creating a table with TTL:
+Creating a table with `TTL`:

 ``` sql
 CREATE TABLE example_table
@ -515,7 +520,7 @@ TTL d + INTERVAL 1 MONTH [DELETE],
    d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```

-Altering TTL of the table:
+Altering `TTL` of the table:

 ``` sql
 ALTER TABLE example_table
@ -536,6 +541,21 @@ ORDER BY d
 TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
 ```

+Creating a table, where expired rows are recompressed: 
+
+```sql
+CREATE TABLE table_for_recompression
+(
+    d DateTime,
+    key UInt64,
+    value String
+) ENGINE MergeTree()
+ORDER BY tuple()
+PARTITION BY key
+TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
+SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
+```
+
 Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.

 ``` sql
@ -552,14 +572,19 @@ ORDER BY (k1, k2)
 TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
 ```

-**Removing Data**
+### Removing Expired Data {#mergetree-removing-expired-data}

-Data with an expired TTL is removed when ClickHouse merges data parts.
+Data with an expired `TTL` is removed when ClickHouse merges data parts.

-When ClickHouse see that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.
+When ClickHouse detects that data is expired, it performs an off-schedule merge. To control the frequency of such merges, you can set `merge_with_ttl_timeout`. If the value is too low, it will perform many off-schedule merges that may consume a lot of resources.

 If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/optimize.md) query before `SELECT`.

+**See Also**
+
+- [ttl_only_drop_parts](../../../operations/settings/settings.md#ttl_only_drop_parts) setting
+
+
 ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}

 ### Introduction {#introduction}
--- a/docs/en/engines/table-engines/special/file.md
+++ b/docs/en/engines/table-engines/special/file.md
@ -24,7 +24,7 @@ The `Format` parameter specifies one of the available file formats. To perform
 `INSERT` queries – for output. The available formats are listed in the
 [Formats](../../../interfaces/formats.md#formats) section.

-ClickHouse does not allow specifying filesystem path for`File`. It will use folder defined by [path](../../../operations/server-configuration-parameters/settings.md) setting in server configuration.
+ClickHouse does not allow specifying filesystem path for `File`. It will use folder defined by [path](../../../operations/server-configuration-parameters/settings.md) setting in server configuration.

 When creating table using `File(Format)` it creates empty subdirectory in that folder. When data is written to that table, it’s put into `data.Format` file in that subdirectory.

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -2041,10 +2041,25 @@ Default value: 0.

 ## input_format_parallel_parsing {#input-format-parallel-parsing}

-   Type: bool
-   Default value: True
+Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.

-Enable order-preserving parallel parsing of data formats. Supported only for TSV, TKSV, CSV, and JSONEachRow formats.
+Possible values:
+
+-   1 — Enabled.
+-   0 — Disabled.
+
+Default value: `0`.
+
+## output_format_parallel_formatting {#output-format-parallel-formatting}
+
+Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.
+
+Possible values:
+
+-   1 — Enabled.
+-   0 — Disabled.
+
+Default value: `0`.

 ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

--- a/docs/ru/development/architecture.md
+++ b/docs/ru/development/architecture.md
@ -36,7 +36,7 @@ ClickHouse - полноценная колоночная СУБД. Данные

 `IDataType` и `IColumn` слабо связаны друг с другом. Различные типы данных могут быть представлены в памяти с помощью одной реализации `IColumn`. Например, и `DataTypeUInt32`, и `DataTypeDateTime` в памяти представлены как `ColumnUInt32` или `ColumnConstUInt32`. В добавок к этому, один тип данных может быть представлен различными реализациями `IColumn`. Например, `DataTypeUInt8` может быть представлен как `ColumnUInt8` и `ColumnConstUInt8`.

-`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранить ничего (кроме скрытого указателя `vptr`), а `DataTypeFixedString` хранит только `N` (фиксированный размер строки).
+`IDataType` хранит только метаданные. Например, `DataTypeUInt8` не хранит ничего (кроме скрытого указателя `vptr`), а `DataTypeFixedString` хранит только `N` (фиксированный размер строки).

 В `IDataType` есть вспомогательные методы для данных различного формата. Среди них методы сериализации значений, допускающих использование кавычек, сериализации значения в JSON или XML. Среди них нет прямого соответствия форматам данных. Например, различные форматы `Pretty` и `TabSeparated` могут использовать один вспомогательный метод `serializeTextEscaped` интерфейса `IDataType`.

@ -62,7 +62,7 @@ ClickHouse - полноценная колоночная СУБД. Данные

 > Потоки блоков используют «втягивающий» (pull) подход к управлению потоком выполнения: когда вы вытягиваете блок из первого потока, он, следовательно, вытягивает необходимые блоки из вложенных потоков, так и работает весь конвейер выполнения. Ни «pull» ни «push» не имеют явного преимущества, потому что поток управления неявный, и это ограничивает в реализации различных функций, таких как одновременное выполнение нескольких запросов (слияние нескольких конвейеров вместе). Это ограничение можно преодолеть с помощью сопрограмм (coroutines) или просто запуском дополнительных потоков, которые ждут друг друга. У нас может быть больше возможностей, если мы сделаем поток управления явным: если мы локализуем логику для передачи данных из одной расчетной единицы в другую вне этих расчетных единиц. Читайте эту [статью](http://journal.stuffwithstuff.com/2013/01/13/iteration-inside-and-out/) для углубленного изучения.

-Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвеере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.
+Следует отметить, что конвейер выполнения запроса создает временные данные на каждом шаге. Мы стараемся сохранить размер блока достаточно маленьким, чтобы временные данные помещались в кэш процессора. При таком допущении запись и чтение временных данных практически бесплатны по сравнению с другими расчетами. Мы могли бы рассмотреть альтернативу, которая заключается в том, чтобы объединить многие операции в конвейере вместе. Это может сделать конвейер как можно короче и удалить большую часть временных данных, что может быть преимуществом, но у такого подхода также есть недостатки. Например, разделенный конвейер позволяет легко реализовать кэширование промежуточных данных, использование промежуточных данных из аналогичных запросов, выполняемых одновременно, и объединение конвейеров для аналогичных запросов.

 ## Форматы {#formats}

@ -119,7 +119,7 @@ ClickHouse - полноценная колоночная СУБД. Данные

 Существуют обычные функции и агрегатные функции. Агрегатные функции смотрите в следующем разделе.

-Обычный функции не изменяют число строк и  работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов.
+Обычные функции не изменяют число строк и  работают так, как если бы обрабатывали каждую строку независимо. В действительности же, функции вызываются не к отдельным строкам, а блокам данных для реализации векторизованного выполнения запросов.

 Некоторые функции, такие как [blockSize](../sql-reference/functions/other-functions.md#function-blocksize), [rowNumberInBlock](../sql-reference/functions/other-functions.md#function-rownumberinblock), и [runningAccumulate](../sql-reference/functions/other-functions.md#runningaccumulate), эксплуатируют блочную обработку и нарушают независимость строк.

@ -162,7 +162,7 @@ ClickHouse имеет сильную типизацию, поэтому нет

 Сервера в кластере в основном независимы. Вы можете создать `Распределенную` (`Distributed`) таблицу на одном или всех серверах в кластере. Такая таблица сама по себе не хранит данные - она только предоставляет возможность "просмотра" всех локальных таблиц на нескольких узлах кластера. При выполнении `SELECT` распределенная таблица переписывает запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет им запрос. Распределенная таблица просит удаленные сервера обработать запрос до той стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и объединяет их. Распределенная таблица пытается возложить как можно больше работы на удаленные серверы и сократить объем промежуточных данных, передаваемых по сети.

-Ситуация усложняется, при использовании подзапросов в случае `IN` или `JOIN`, когда каждый из них использует таблицу `Distributed`. Есть разные стратегии для выполнения таких запросов.
+Ситуация усложняется при использовании подзапросов в случае `IN` или `JOIN`, когда каждый из них использует таблицу `Distributed`. Есть разные стратегии для выполнения таких запросов.

 Глобального плана выполнения распределенных запросов не существует. Каждый узел имеет собственный локальный план для своей части работы. У нас есть простое однонаправленное выполнение распределенных запросов: мы отправляем запросы на удаленные узлы и затем объединяем результаты. Но это невозможно для сложных запросов `GROUP BY` высокой кардинальности или запросов с большим числом временных данных в `JOIN`: в таких случаях нам необходимо перераспределить («reshuffle») данные между серверами, что требует дополнительной координации. ClickHouse не поддерживает выполнение запросов такого рода, и нам нужно работать над этим.

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -86,7 +86,9 @@ ORDER BY expr
    -   `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
    -   `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если  `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
    -   `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse  использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
-    -   <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
+    -   `merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием для удаления данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
+    -   `merge_with_recompression_ttl_timeout` — минимальное время в секундах перед повторным слиянием для повторного сжатия данных с истекшим TTL. По умолчанию: `14400` секунд (4 часа).
+    -   `try_fetch_recompressed_part_timeout` — время ожидания (в секундах) перед началом слияния с повторным сжатием. В течение этого времени ClickHouse пытается извлечь сжатую часть из реплики, которая назначила это слияние. Значение по умолчанию: `7200` секунд (2 часа).   
    -   `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
    -   `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
    -   `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
@ -401,20 +403,22 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT

 ## TTL для столбцов и таблиц {#table_engine-mergetree-ttl}

-Определяет время жизни значений, а также правила перемещения данных на другой диск или том.
+Определяет время жизни значений.

-Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Правила `TTL` для таблицы позволяют указать целевые диски или тома для фонового перемещения на них частей данных.
+Секция `TTL` может быть установлена как для всей таблицы, так и для каждого отдельного столбца. Для таблиц можно установить правила `TTL` для фонового перемещения кусков данных на целевые диски или тома, или правила повторного сжатия кусков данных.

 Выражения должны возвращать тип [Date](../../../engines/table-engines/mergetree-family/mergetree.md) или [DateTime](../../../engines/table-engines/mergetree-family/mergetree.md).

-Для задания времени жизни столбца, например:
+**Синтаксис**
+
+Для задания времени жизни столбца:

 ``` sql
 TTL time_column
 TTL time_column + interval
 ```

-Чтобы задать `interval`, используйте операторы [интервала времени](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime).
+Чтобы задать `interval`, используйте операторы [интервала времени](../../../engines/table-engines/mergetree-family/mergetree.md#operators-datetime), например:

 ``` sql
 TTL date_time + INTERVAL 1 MONTH
@ -423,13 +427,13 @@ TTL date_time + INTERVAL 15 HOUR

 ### TTL столбца {#mergetree-column-ttl}

-Когда срок действия значений в столбце истечет, ClickHouse заменит их значениями по умолчанию для типа данных столбца. Если срок действия всех значений столбцов в части данных истек, ClickHouse удаляет столбец из куска данных в файловой системе.
+Когда срок действия значений в столбце истечёт, ClickHouse заменит их значениями по умолчанию для типа данных столбца. Если срок действия всех значений столбцов в части данных истек, ClickHouse удаляет столбец из куска данных в файловой системе.

 Секцию `TTL` нельзя использовать для ключевых столбцов.

-Примеры:
+**Примеры**

-Создание таблицы с TTL
+Создание таблицы с `TTL`:

 ``` sql
 CREATE TABLE example_table
@ -444,7 +448,7 @@ PARTITION BY toYYYYMM(d)
 ORDER BY d;
 ```

-Добавление TTL на колонку существующей таблицы
+Добавление `TTL` на колонку существующей таблицы:

 ``` sql
 ALTER TABLE example_table
@ -452,7 +456,7 @@ ALTER TABLE example_table
    c String TTL d + INTERVAL 1 DAY;
 ```

-Изменение TTL у колонки
+Изменение `TTL` у колонки:

 ``` sql
 ALTER TABLE example_table
@ -462,23 +466,24 @@ ALTER TABLE example_table

 ### TTL таблицы {#mergetree-table-ttl}

-Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки.
+Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, при срабатывании которых данные будут перемещены на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки. Операции перемещения или повторного сжатия данных выполняются только когда устаревают все данные в куске.

 ``` sql
 TTL expr
-    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
+    [DELETE|RECOMPRESS codec_name1|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|RECOMPRESS codec_name2|TO DISK 'aaa'|TO VOLUME 'bbb'] ...
    [WHERE conditions]
    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ]
 ```

-За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения:
+За каждым `TTL` выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату `TTL` выражения:

 -   `DELETE` - удалить данные (действие по умолчанию);
+-   `RECOMPRESS codec_name` - повторно сжать данные с помощью кодека `codec_name`;   
 -   `TO DISK 'aaa'` - переместить данные на диск `aaa`;
 -   `TO VOLUME 'bbb'` - переместить данные на том `bbb`;
 -   `GROUP BY` -  агрегировать данные.

-В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо).
+В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения и сжатия условие `WHERE` не применимо).

 Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы.

@ -486,7 +491,7 @@ TTL expr

 **Примеры**

-Создание таблицы с TTL:
+Создание таблицы с `TTL`:

 ``` sql
 CREATE TABLE example_table
@ -502,7 +507,7 @@ TTL d + INTERVAL 1 MONTH [DELETE],
    d + INTERVAL 2 WEEK TO DISK 'bbb';
 ```

-Изменение TTL:
+Изменение `TTL`:

 ``` sql
 ALTER TABLE example_table
@ -523,6 +528,21 @@ ORDER BY d
 TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1;
 ```

+Создание таблицы, в которой куски с устаревшими данными повторно сжимаются:
+
+```sql
+CREATE TABLE table_for_recompression
+(
+    d DateTime,
+    key UInt64,
+    value String
+) ENGINE MergeTree()
+ORDER BY tuple()
+PARTITION BY key
+TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPRESS CODEC(LZ4HC(10))
+SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
+```
+
 Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк.

 ``` sql
@ -539,14 +559,18 @@ ORDER BY (k1, k2)
 TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
 ```

-**Удаление данных**
+### Удаление устаревших данных {#mergetree-removing-expired-data}

-Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных.
+Данные с истекшим `TTL` удаляются, когда ClickHouse мёржит куски данных.

 Когда ClickHouse видит, что некоторые данные устарели, он выполняет внеплановые мёржи. Для управление частотой подобных мёржей, можно задать настройку `merge_with_ttl_timeout`. Если её значение слишком низкое, придется выполнять много внеплановых мёржей, которые могут начать потреблять значительную долю ресурсов сервера.

 Если вы выполните запрос `SELECT` между слияниями вы можете получить устаревшие данные. Чтобы избежать этого используйте запрос [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) перед `SELECT`.

+**См. также**
+
+- настройку [ttl_only_drop_parts](../../../operations/settings/settings.md#ttl_only_drop_parts)
+
 ## Хранение данных таблицы на нескольких блочных устройствах {#table_engine-mergetree-multiple-volumes}

 ### Введение {#introduction}
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -1865,10 +1865,25 @@ ClickHouse генерирует исключение

 ## input_format_parallel_parsing {#input-format-parallel-parsing}

-   Тип: bool
-   Значение по умолчанию: True
+Включает или отключает режим, при котором входящие данные разбиваются на части, парсинг каждой из которых осуществляется параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow).

-Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow.
+Возможные значения:
+
+-   1 — включен режим параллельного разбора.
+-   0 — отключен режим параллельного разбора.
+
+Значение по умолчанию: `0`.
+
+## output_format_parallel_formatting {#output-format-parallel-formatting}
+
+Включает или отключает режим, при котором исходящие данные форматируются параллельно с сохранением исходного порядка. Поддерживается только для форматов [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) и [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
+
+Возможные значения:
+
+-   1 — включен режим параллельного форматирования.
+-   0 — отключен режим параллельного форматирования.
+
+Значение по умолчанию: `0`.

 ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing}

--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -383,8 +383,8 @@ void LocalServer::processQueries()
        throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);

    /// Authenticate and create a context to execute queries.
-    Session session{global_context, ClientInfo::Interface::TCP};
-    session.authenticate("default", "", Poco::Net::SocketAddress{});
+    Session session{global_context, ClientInfo::Interface::LOCAL};
+    session.authenticate("default", "", {});

    /// Use the same context for all queries.
    auto context = session.makeQueryContext();
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -737,6 +737,10 @@ if (ThreadFuzzer::instance().isEffective())
            setupTmpPath(log, disk->getPath());
    }

+    /// Storage keeping all the backups.
+    fs::create_directories(path / "backups");
+    global_context->setBackupsVolume(config().getString("backups_path", path / "backups"), config().getString("backups_policy", ""));
+
    /** Directory with 'flags': files indicating temporary settings for the server set by system administrator.
      * Flags may be cleared automatically after being applied by the server.
      * Examples: do repair of local data; clone all replicated tables from replica.
--- a/src/AggregateFunctions/AggregateFunctionIf.cpp
+++ b/src/AggregateFunctions/AggregateFunctionIf.cpp
@ -1,5 +1,5 @@
-#include <AggregateFunctions/AggregateFunctionIf.h>
 #include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
+#include <AggregateFunctions/AggregateFunctionIf.h>
 #include "AggregateFunctionNull.h"


@ -11,6 +11,7 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_AGGREGATION;
 }

 class AggregateFunctionCombinatorIf final : public IAggregateFunctionCombinator
@ -37,6 +38,10 @@ public:
        const DataTypes & arguments,
        const Array & params) const override
    {
+        if (nested_function->getName().find(getName()) != String::npos)
+        {
+            throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "nested function for {0}-combinator must not have {0}-combinator", getName());
+        }
        return std::make_shared<AggregateFunctionIf>(nested_function, arguments, params);
    }
 };
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@ -56,6 +56,8 @@ template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted
 template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
 template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;

+template <typename Value, bool float_return> using FuncQuantileBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, false>;
+template <typename Value, bool float_return> using FuncQuantilesBFloat16Weighted = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16Weighted, true, std::conditional_t<float_return, Float64, void>, true>;

 template <template <typename, bool> class Function>
 static constexpr bool supportDecimal()
@ -167,6 +169,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
    factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
    factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile<FuncQuantilesBFloat16>, properties });

+    factory.registerFunction(NameQuantileBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantileBFloat16Weighted>);
+    factory.registerFunction(NameQuantilesBFloat16Weighted::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16Weighted>);
+
    /// 'median' is an alias for 'quantile'
    factory.registerAlias("median", NameQuantile::name);
    factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
@ -179,6 +184,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
    factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
    factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
    factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
+    factory.registerAlias("medianBFloat16Weighted", NameQuantileBFloat16Weighted::name);
 }

 }
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@ -237,5 +237,7 @@ struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDi

 struct NameQuantileBFloat16 { static constexpr auto name = "quantileBFloat16"; };
 struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; };
+struct NameQuantileBFloat16Weighted { static constexpr auto name = "quantileBFloat16Weighted"; };
+struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBFloat16Weighted"; };

 }
--- a/src/Backups/BackupEntryConcat.cpp
+++ b/src/Backups/BackupEntryConcat.cpp
@ -0,0 +1,28 @@
+#include <Backups/BackupEntryConcat.h>
+#include <IO/ConcatReadBuffer.h>
+
+
+namespace DB
+{
+BackupEntryConcat::BackupEntryConcat(
+    BackupEntryPtr first_source_,
+    BackupEntryPtr second_source_,
+    const std::optional<UInt128> & checksum_)
+    : first_source(std::move(first_source_))
+    , second_source(std::move(second_source_))
+    , checksum(checksum_)
+{
+}
+
+UInt64 BackupEntryConcat::getSize() const
+{
+    if (!size)
+        size = first_source->getSize() + second_source->getSize();
+    return *size;
+}
+
+std::unique_ptr<ReadBuffer> BackupEntryConcat::getReadBuffer() const
+{
+    return std::make_unique<ConcatReadBuffer>(first_source->getReadBuffer(), second_source->getReadBuffer());
+}
+}
--- a/src/Backups/BackupEntryConcat.h
+++ b/src/Backups/BackupEntryConcat.h
@ -0,0 +1,30 @@
+#pragma once
+
+#include <Backups/IBackupEntry.h>
+
+
+namespace DB
+{
+
+/// Concatenates data of two backup entries.
+class BackupEntryConcat : public IBackupEntry
+{
+public:
+    /// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
+    BackupEntryConcat(
+        BackupEntryPtr first_source_,
+        BackupEntryPtr second_source_,
+        const std::optional<UInt128> & checksum_ = {});
+
+    UInt64 getSize() const override;
+    std::optional<UInt128> getChecksum() const override { return checksum; }
+    std::unique_ptr<ReadBuffer> getReadBuffer() const override;
+
+private:
+    BackupEntryPtr first_source;
+    BackupEntryPtr second_source;
+    mutable std::optional<UInt64> size;
+    std::optional<UInt128> checksum;
+};
+
+}
--- a/src/Backups/BackupEntryFromAppendOnlyFile.cpp
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.cpp
@ -0,0 +1,35 @@
+#include <Backups/BackupEntryFromAppendOnlyFile.h>
+#include <IO/LimitReadBuffer.h>
+
+
+namespace DB
+{
+
+BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
+    const String & file_path_,
+    const std::optional<UInt64> & file_size_,
+    const std::optional<UInt128> & checksum_,
+    const std::shared_ptr<Poco::TemporaryFile> & temporary_file_)
+    : BackupEntryFromImmutableFile(file_path_, file_size_, checksum_, temporary_file_)
+    , limit(BackupEntryFromImmutableFile::getSize())
+{
+}
+
+BackupEntryFromAppendOnlyFile::BackupEntryFromAppendOnlyFile(
+    const DiskPtr & disk_,
+    const String & file_path_,
+    const std::optional<UInt64> & file_size_,
+    const std::optional<UInt128> & checksum_,
+    const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
+    : BackupEntryFromImmutableFile(disk_, file_path_, file_size_, checksum_, temporary_file_)
+    , limit(BackupEntryFromImmutableFile::getSize())
+{
+}
+
+std::unique_ptr<ReadBuffer> BackupEntryFromAppendOnlyFile::getReadBuffer() const
+{
+    auto buf = BackupEntryFromImmutableFile::getReadBuffer();
+    return std::make_unique<LimitReadBuffer>(std::move(buf), limit, true);
+}
+
+}
--- a/src/Backups/BackupEntryFromAppendOnlyFile.h
+++ b/src/Backups/BackupEntryFromAppendOnlyFile.h
@ -0,0 +1,35 @@
+#pragma once
+
+#include <Backups/BackupEntryFromImmutableFile.h>
+
+
+namespace DB
+{
+
+/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed
+/// the file can be appended with new data, but the bytes which are already in the file won't be changed.
+class BackupEntryFromAppendOnlyFile : public BackupEntryFromImmutableFile
+{
+public:
+    /// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
+    BackupEntryFromAppendOnlyFile(
+        const String & file_path_,
+        const std::optional<UInt64> & file_size_ = {},
+        const std::optional<UInt128> & checksum_ = {},
+        const std::shared_ptr<Poco::TemporaryFile> & temporary_file_ = {});
+
+    BackupEntryFromAppendOnlyFile(
+        const DiskPtr & disk_,
+        const String & file_path_,
+        const std::optional<UInt64> & file_size_ = {},
+        const std::optional<UInt128> & checksum_ = {},
+        const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
+
+    UInt64 getSize() const override { return limit; }
+    std::unique_ptr<ReadBuffer> getReadBuffer() const override;
+
+private:
+    const UInt64 limit;
+};
+
+}
--- a/src/Backups/BackupEntryFromImmutableFile.cpp
+++ b/src/Backups/BackupEntryFromImmutableFile.cpp
@ -0,0 +1,47 @@
+#include <Backups/BackupEntryFromImmutableFile.h>
+#include <Disks/IDisk.h>
+#include <IO/createReadBufferFromFileBase.h>
+#include <Poco/File.h>
+
+
+namespace DB
+{
+
+BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
+    const String & file_path_,
+    const std::optional<UInt64> & file_size_,
+    const std::optional<UInt128> & checksum_,
+    const std::shared_ptr<Poco::TemporaryFile> & temporary_file_)
+    : file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file(temporary_file_)
+{
+}
+
+BackupEntryFromImmutableFile::BackupEntryFromImmutableFile(
+    const DiskPtr & disk_,
+    const String & file_path_,
+    const std::optional<UInt64> & file_size_,
+    const std::optional<UInt128> & checksum_,
+    const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_)
+    : disk(disk_), file_path(file_path_), file_size(file_size_), checksum(checksum_), temporary_file_on_disk(temporary_file_)
+{
+}
+
+BackupEntryFromImmutableFile::~BackupEntryFromImmutableFile() = default;
+
+UInt64 BackupEntryFromImmutableFile::getSize() const
+{
+    std::lock_guard lock{get_file_size_mutex};
+    if (!file_size)
+        file_size = disk ? disk->getFileSize(file_path) : Poco::File(file_path).getSize();
+    return *file_size;
+}
+
+std::unique_ptr<ReadBuffer> BackupEntryFromImmutableFile::getReadBuffer() const
+{
+    if (disk)
+        return disk->readFile(file_path);
+    else
+        return createReadBufferFromFileBase(file_path, 0, 0, 0, nullptr);
+}
+
+}
--- a/src/Backups/BackupEntryFromImmutableFile.h
+++ b/src/Backups/BackupEntryFromImmutableFile.h
@ -0,0 +1,51 @@
+#pragma once
+
+#include <Backups/IBackupEntry.h>
+#include <mutex>
+
+namespace Poco { class TemporaryFile; }
+
+namespace DB
+{
+class TemporaryFileOnDisk;
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+
+/// Represents a file prepared to be included in a backup, assuming that until this backup entry is destroyed the file won't be changed.
+class BackupEntryFromImmutableFile : public IBackupEntry
+{
+public:
+    /// The constructor is allowed to not set `file_size_` or `checksum_`, in that case it will be calculated from the data.
+    BackupEntryFromImmutableFile(
+        const String & file_path_,
+        const std::optional<UInt64> & file_size_ = {},
+        const std::optional<UInt128> & checksum_ = {},
+        const std::shared_ptr<Poco::TemporaryFile> & temporary_file_ = {});
+
+    BackupEntryFromImmutableFile(
+        const DiskPtr & disk_,
+        const String & file_path_,
+        const std::optional<UInt64> & file_size_ = {},
+        const std::optional<UInt128> & checksum_ = {},
+        const std::shared_ptr<TemporaryFileOnDisk> & temporary_file_ = {});
+
+    ~BackupEntryFromImmutableFile() override;
+
+    UInt64 getSize() const override;
+    std::optional<UInt128> getChecksum() const override { return checksum; }
+    std::unique_ptr<ReadBuffer> getReadBuffer() const override;
+
+    String getFilePath() const { return file_path; }
+    DiskPtr getDisk() const { return disk; }
+
+private:
+    const DiskPtr disk;
+    const String file_path;
+    mutable std::optional<UInt64> file_size;
+    mutable std::mutex get_file_size_mutex;
+    const std::optional<UInt128> checksum;
+    const std::shared_ptr<Poco::TemporaryFile> temporary_file;
+    const std::shared_ptr<TemporaryFileOnDisk> temporary_file_on_disk;
+};
+
+}
--- a/src/Backups/BackupEntryFromMemory.cpp
+++ b/src/Backups/BackupEntryFromMemory.cpp
@ -0,0 +1,23 @@
+#include <Backups/BackupEntryFromMemory.h>
+#include <IO/ReadBufferFromString.h>
+
+
+namespace DB
+{
+
+BackupEntryFromMemory::BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_)
+    : BackupEntryFromMemory(String{reinterpret_cast<const char *>(data_), size_}, checksum_)
+{
+}
+
+BackupEntryFromMemory::BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_)
+    : data(std::move(data_)), checksum(checksum_)
+{
+}
+
+std::unique_ptr<ReadBuffer> BackupEntryFromMemory::getReadBuffer() const
+{
+    return std::make_unique<ReadBufferFromString>(data);
+}
+
+}
--- a/src/Backups/BackupEntryFromMemory.h
+++ b/src/Backups/BackupEntryFromMemory.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <Backups/IBackupEntry.h>
+#include <IO/ReadBufferFromString.h>
+
+
+namespace DB
+{
+
+/// Represents small preloaded data to be included in a backup.
+class BackupEntryFromMemory : public IBackupEntry
+{
+public:
+    /// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
+    BackupEntryFromMemory(const void * data_, size_t size_, const std::optional<UInt128> & checksum_ = {});
+    BackupEntryFromMemory(String data_, const std::optional<UInt128> & checksum_ = {});
+
+    UInt64 getSize() const override { return data.size(); }
+    std::optional<UInt128> getChecksum() const override { return checksum; }
+    std::unique_ptr<ReadBuffer> getReadBuffer() const override;
+
+private:
+    const String data;
+    const std::optional<UInt128> checksum;
+};
+
+}
--- a/src/Backups/BackupEntryFromSmallFile.cpp
+++ b/src/Backups/BackupEntryFromSmallFile.cpp
@ -0,0 +1,39 @@
+#include <Backups/BackupEntryFromSmallFile.h>
+#include <Disks/IDisk.h>
+#include <IO/createReadBufferFromFileBase.h>
+#include <IO/ReadHelpers.h>
+
+
+namespace DB
+{
+namespace
+{
+    String readFile(const String & file_path)
+    {
+        auto buf = createReadBufferFromFileBase(file_path, 0, 0, 0, nullptr);
+        String s;
+        readStringUntilEOF(s, *buf);
+        return s;
+    }
+
+    String readFile(const DiskPtr & disk, const String & file_path)
+    {
+        auto buf = disk->readFile(file_path);
+        String s;
+        readStringUntilEOF(s, *buf);
+        return s;
+    }
+}
+
+
+BackupEntryFromSmallFile::BackupEntryFromSmallFile(const String & file_path_, const std::optional<UInt128> & checksum_)
+    : BackupEntryFromMemory(readFile(file_path_), checksum_), file_path(file_path_)
+{
+}
+
+BackupEntryFromSmallFile::BackupEntryFromSmallFile(
+    const DiskPtr & disk_, const String & file_path_, const std::optional<UInt128> & checksum_)
+    : BackupEntryFromMemory(readFile(disk_, file_path_), checksum_), disk(disk_), file_path(file_path_)
+{
+}
+}
--- a/src/Backups/BackupEntryFromSmallFile.h
+++ b/src/Backups/BackupEntryFromSmallFile.h
@ -0,0 +1,34 @@
+#pragma once
+
+#include <Backups/BackupEntryFromMemory.h>
+
+
+namespace DB
+{
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+
+/// Represents a file prepared to be included in a backup,
+/// assuming that the file is small and can be easily loaded into memory.
+class BackupEntryFromSmallFile : public BackupEntryFromMemory
+{
+public:
+    /// The constructor is allowed to not set `checksum_`, in that case it will be calculated from the data.
+    BackupEntryFromSmallFile(
+        const String & file_path_,
+        const std::optional<UInt128> & checksum_ = {});
+
+    BackupEntryFromSmallFile(
+        const DiskPtr & disk_,
+        const String & file_path_,
+        const std::optional<UInt128> & checksum_ = {});
+
+    String getFilePath() const { return file_path; }
+    DiskPtr getDisk() const { return disk; }
+
+private:
+    const DiskPtr disk;
+    const String file_path;
+};
+
+}
--- a/src/Backups/BackupFactory.cpp
+++ b/src/Backups/BackupFactory.cpp
@ -0,0 +1,65 @@
+#include <Backups/BackupFactory.h>
+#include <Backups/BackupInDirectory.h>
+#include <Interpreters/Context.h>
+#include <Disks/IVolume.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BACKUP_NOT_FOUND;
+    extern const int BACKUP_ALREADY_EXISTS;
+    extern const int NOT_ENOUGH_SPACE;
+    extern const int LOGICAL_ERROR;
+}
+
+
+BackupFactory & BackupFactory::instance()
+{
+    static BackupFactory the_instance;
+    return the_instance;
+}
+
+void BackupFactory::setBackupsVolume(VolumePtr backups_volume_)
+{
+    backups_volume = backups_volume_;
+}
+
+BackupMutablePtr BackupFactory::createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup) const
+{
+    if (!backups_volume)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume");
+
+    for (const auto & disk : backups_volume->getDisks())
+    {
+        if (disk->exists(backup_name))
+            throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(backup_name));
+    }
+
+    auto reservation = backups_volume->reserve(estimated_backup_size);
+    if (!reservation)
+        throw Exception(
+            ErrorCodes::NOT_ENOUGH_SPACE,
+            "Couldn't reserve {} bytes of free space for new backup {}",
+            estimated_backup_size,
+            quoteString(backup_name));
+
+    return std::make_shared<BackupInDirectory>(IBackup::OpenMode::WRITE, reservation->getDisk(), backup_name, base_backup);
+}
+
+BackupPtr BackupFactory::openBackup(const String & backup_name, const BackupPtr & base_backup) const
+{
+    if (!backups_volume)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No backups volume");
+
+    for (const auto & disk : backups_volume->getDisks())
+    {
+        if (disk->exists(backup_name))
+            return std::make_shared<BackupInDirectory>(IBackup::OpenMode::READ, disk, backup_name, base_backup);
+    }
+
+    throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(backup_name));
+}
+
+}
--- a/src/Backups/BackupFactory.h
+++ b/src/Backups/BackupFactory.h
@ -0,0 +1,38 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <boost/noncopyable.hpp>
+#include <memory>
+
+
+namespace DB
+{
+class IBackup;
+using BackupPtr = std::shared_ptr<const IBackup>;
+using BackupMutablePtr = std::shared_ptr<IBackup>;
+class Context;
+using ContextMutablePtr = std::shared_ptr<Context>;
+class IVolume;
+using VolumePtr = std::shared_ptr<IVolume>;
+
+
+/// Factory for implementations of the IBackup interface.
+class BackupFactory : boost::noncopyable
+{
+public:
+    static BackupFactory & instance();
+
+    /// Must be called to initialize the backup factory.
+    void setBackupsVolume(VolumePtr backups_volume_);
+
+    /// Creates a new backup and open it for writing.
+    BackupMutablePtr createBackup(const String & backup_name, UInt64 estimated_backup_size, const BackupPtr & base_backup = {}) const;
+
+    /// Opens an existing backup for reading.
+    BackupPtr openBackup(const String & backup_name, const BackupPtr & base_backup = {}) const;
+
+private:
+    VolumePtr backups_volume;
+};
+
+}
--- a/src/Backups/BackupInDirectory.cpp
+++ b/src/Backups/BackupInDirectory.cpp
@ -0,0 +1,454 @@
+#include <Backups/BackupInDirectory.h>
+#include <Backups/BackupFactory.h>
+#include <Backups/BackupEntryConcat.h>
+#include <Backups/BackupEntryFromImmutableFile.h>
+#include <Backups/BackupEntryFromMemory.h>
+#include <Backups/IBackupEntry.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/typeid_cast.h>
+#include <Common/quoteString.h>
+#include <Disks/DiskSelector.h>
+#include <Disks/IDisk.h>
+#include <IO/HashingReadBuffer.h>
+#include <IO/ReadBufferFromFileBase.h>
+#include <IO/ReadHelpers.h>
+#include <IO/SeekableReadBuffer.h>
+#include <IO/WriteBufferFromFileBase.h>
+#include <IO/WriteHelpers.h>
+#include <IO/copyData.h>
+#include <boost/range/adaptor/map.hpp>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BACKUP_NOT_FOUND;
+    extern const int BACKUP_ALREADY_EXISTS;
+    extern const int BACKUP_VERSION_NOT_SUPPORTED;
+    extern const int BACKUP_DAMAGED;
+    extern const int NO_BASE_BACKUP;
+    extern const int WRONG_BASE_BACKUP;
+    extern const int BACKUP_ENTRY_ALREADY_EXISTS;
+    extern const int BACKUP_ENTRY_NOT_FOUND;
+    extern const int BAD_ARGUMENTS;
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+    const UInt64 BACKUP_VERSION = 1;
+}
+
+BackupInDirectory::BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_)
+    : open_mode(open_mode_), disk(disk_), path(path_), path_with_sep(path_), base_backup(base_backup_)
+{
+    if (!path_with_sep.ends_with('/'))
+        path_with_sep += '/';
+    trimRight(path, '/');
+    open();
+}
+
+BackupInDirectory::~BackupInDirectory()
+{
+    close();
+}
+
+void BackupInDirectory::open()
+{
+    if (open_mode == OpenMode::WRITE)
+    {
+        if (disk->exists(path))
+            throw Exception(ErrorCodes::BACKUP_ALREADY_EXISTS, "Backup {} already exists", quoteString(path));
+        disk->createDirectories(path);
+        directory_was_created = true;
+        writePathToBaseBackup();
+    }
+
+    if (open_mode == OpenMode::READ)
+    {
+        if (!disk->isDirectory(path))
+            throw Exception(ErrorCodes::BACKUP_NOT_FOUND, "Backup {} not found", quoteString(path));
+        readContents();
+        readPathToBaseBackup();
+    }
+}
+
+void BackupInDirectory::close()
+{
+    if (open_mode == OpenMode::WRITE)
+    {
+        if (!finalized && directory_was_created)
+        {
+            /// Creating of the backup wasn't finished correctly,
+            /// so the backup cannot be used and it's better to remove its files.
+            disk->removeRecursive(path);
+        }
+    }
+}
+
+void BackupInDirectory::writePathToBaseBackup()
+{
+    String file_path = path_with_sep + ".base_backup";
+    if (!base_backup)
+    {
+        disk->removeFileIfExists(file_path);
+        return;
+    }
+    auto out = disk->writeFile(file_path);
+    writeString(base_backup->getPath(), *out);
+}
+
+void BackupInDirectory::readPathToBaseBackup()
+{
+    if (base_backup)
+        return;
+    String file_path = path_with_sep + ".base_backup";
+    if (!disk->exists(file_path))
+        return;
+    auto in = disk->readFile(file_path);
+    String base_backup_path;
+    readStringUntilEOF(base_backup_path, *in);
+    if (base_backup_path.empty())
+        return;
+    base_backup = BackupFactory::instance().openBackup(base_backup_path);
+}
+
+void BackupInDirectory::writeContents()
+{
+    auto out = disk->writeFile(path_with_sep + ".contents");
+    writeVarUInt(BACKUP_VERSION, *out);
+
+    writeVarUInt(infos.size(), *out);
+    for (const auto & [path_in_backup, info] : infos)
+    {
+        writeBinary(path_in_backup, *out);
+        writeVarUInt(info.size, *out);
+        if (info.size)
+        {
+            writeBinary(info.checksum, *out);
+            writeVarUInt(info.base_size, *out);
+            if (info.base_size && (info.base_size != info.size))
+                writeBinary(info.base_checksum, *out);
+        }
+    }
+}
+
+void BackupInDirectory::readContents()
+{
+    auto in = disk->readFile(path_with_sep + ".contents");
+    UInt64 version;
+    readVarUInt(version, *in);
+    if (version != BACKUP_VERSION)
+        throw Exception(ErrorCodes::BACKUP_VERSION_NOT_SUPPORTED, "Backup {}: Version {} is not supported", quoteString(path), version);
+
+    size_t num_infos;
+    readVarUInt(num_infos, *in);
+    infos.clear();
+    for (size_t i = 0; i != num_infos; ++i)
+    {
+        String path_in_backup;
+        readBinary(path_in_backup, *in);
+        EntryInfo info;
+        readVarUInt(info.size, *in);
+        if (info.size)
+        {
+            readBinary(info.checksum, *in);
+            readVarUInt(info.base_size, *in);
+            if (info.base_size && (info.base_size != info.size))
+                readBinary(info.base_checksum, *in);
+            else if (info.base_size)
+                info.base_checksum = info.checksum;
+        }
+        infos.emplace(path_in_backup, info);
+    }
+}
+
+IBackup::OpenMode BackupInDirectory::getOpenMode() const
+{
+    return open_mode;
+}
+
+String BackupInDirectory::getPath() const
+{
+    return path;
+}
+
+Strings BackupInDirectory::list(const String & prefix, const String & terminator) const
+{
+    if (!prefix.ends_with('/') && !prefix.empty())
+        throw Exception("prefix should end with '/'", ErrorCodes::BAD_ARGUMENTS);
+    std::lock_guard lock{mutex};
+    Strings elements;
+    for (auto it = infos.lower_bound(prefix); it != infos.end(); ++it)
+    {
+        const String & name = it->first;
+        if (!name.starts_with(prefix))
+            break;
+        size_t start_pos = prefix.length();
+        size_t end_pos = String::npos;
+        if (!terminator.empty())
+            end_pos = name.find(terminator, start_pos);
+        std::string_view new_element = std::string_view{name}.substr(start_pos, end_pos - start_pos);
+        if (!elements.empty() && (elements.back() == new_element))
+            continue;
+        elements.push_back(String{new_element});
+    }
+    return elements;
+}
+
+bool BackupInDirectory::exists(const String & name) const
+{
+    std::lock_guard lock{mutex};
+    return infos.count(name) != 0;
+}
+
+size_t BackupInDirectory::getSize(const String & name) const
+{
+    std::lock_guard lock{mutex};
+    auto it = infos.find(name);
+    if (it == infos.end())
+        throw Exception(
+            ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
+    return it->second.size;
+}
+
+UInt128 BackupInDirectory::getChecksum(const String & name) const
+{
+    std::lock_guard lock{mutex};
+    auto it = infos.find(name);
+    if (it == infos.end())
+        throw Exception(
+            ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
+    return it->second.checksum;
+}
+
+
+BackupEntryPtr BackupInDirectory::read(const String & name) const
+{
+    std::lock_guard lock{mutex};
+    auto it = infos.find(name);
+    if (it == infos.end())
+        throw Exception(
+            ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Backup {}: Entry {} not found in the backup", quoteString(path), quoteString(name));
+
+    const auto & info = it->second;
+    if (!info.size)
+    {
+        /// Entry's data is empty.
+        return std::make_unique<BackupEntryFromMemory>(nullptr, 0, UInt128{0, 0});
+    }
+
+    if (!info.base_size)
+    {
+        /// Data goes completely from this backup, the base backup isn't used.
+        return std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size, info.checksum);
+    }
+
+    if (info.size < info.base_size)
+    {
+        throw Exception(
+            ErrorCodes::BACKUP_DAMAGED,
+            "Backup {}: Entry {} has its data size less than in the base backup {}: {} < {}",
+            quoteString(path), quoteString(name), quoteString(base_backup->getPath()), info.size, info.base_size);
+    }
+
+    if (!base_backup)
+    {
+        throw Exception(
+            ErrorCodes::NO_BASE_BACKUP,
+            "Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
+            quoteString(path), quoteString(name));
+    }
+
+    if (!base_backup->exists(name))
+    {
+        throw Exception(
+            ErrorCodes::WRONG_BASE_BACKUP,
+            "Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
+            quoteString(path), quoteString(name));
+    }
+
+    auto base_entry = base_backup->read(name);
+    auto base_size = base_entry->getSize();
+    if (base_size != info.base_size)
+    {
+        throw Exception(
+            ErrorCodes::WRONG_BASE_BACKUP,
+            "Backup {}: Entry {} has unexpected size in the base backup {}: {} (expected size: {})",
+            quoteString(path), quoteString(name), quoteString(base_backup->getPath()), base_size, info.base_size);
+    }
+
+    auto base_checksum = base_entry->getChecksum();
+    if (base_checksum && (*base_checksum != info.base_checksum))
+    {
+        throw Exception(
+            ErrorCodes::WRONG_BASE_BACKUP,
+            "Backup {}: Entry {} has unexpected checksum in the base backup {}",
+            quoteString(path), quoteString(name), quoteString(base_backup->getPath()));
+    }
+
+    if (info.size == info.base_size)
+    {
+        /// Data goes completely from the base backup (nothing goes from this backup).
+        return base_entry;
+    }
+
+    /// The beginning of the data goes from the base backup,
+    /// and the ending goes from this backup.
+    return std::make_unique<BackupEntryConcat>(
+        std::move(base_entry),
+        std::make_unique<BackupEntryFromImmutableFile>(disk, path_with_sep + name, info.size - info.base_size),
+        info.checksum);
+}
+
+
+void BackupInDirectory::write(const String & name, BackupEntryPtr entry)
+{
+    std::lock_guard lock{mutex};
+    if (open_mode != OpenMode::WRITE)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
+
+    if (infos.contains(name))
+        throw Exception(
+            ErrorCodes::BACKUP_ENTRY_ALREADY_EXISTS, "Backup {}: Entry {} already exists", quoteString(path), quoteString(name));
+
+    UInt64 size = entry->getSize();
+    std::optional<UInt128> checksum = entry->getChecksum();
+
+    /// Check if the entry's data is empty.
+    if (!size)
+    {
+        infos.emplace(name, EntryInfo{});
+        return;
+    }
+
+    /// Check if a entry with such name exists in the base backup.
+    bool base_exists = (base_backup && base_backup->exists(name));
+    UInt64 base_size = 0;
+    UInt128 base_checksum{0, 0};
+    if (base_exists)
+    {
+        base_size = base_backup->getSize(name);
+        base_checksum = base_backup->getChecksum(name);
+    }
+
+    std::unique_ptr<ReadBuffer> read_buffer; /// We'll set that later.
+    UInt64 read_pos = 0; /// Current position in read_buffer.
+
+    /// Determine whether it's possible to receive this entry's data from the base backup completely or partly.
+    bool use_base = false;
+    if (base_exists && base_size)
+    {
+        if (size == base_size)
+        {
+            /// The size is the same, we need to compare checksums to find out
+            /// if the entry's data has not been changed since the base backup.
+            if (!checksum)
+            {
+                read_buffer = entry->getReadBuffer();
+                HashingReadBuffer hashing_read_buffer{*read_buffer};
+                hashing_read_buffer.ignore(size);
+                read_pos = size;
+                checksum = hashing_read_buffer.getHash();
+            }
+            if (checksum == base_checksum)
+                use_base = true; /// The data has not been changed.
+        }
+        else if (size > base_size)
+        {
+            /// The size has been increased, we need to calculate a partial checksum to find out
+            /// if the entry's data has been only appended since the base backup.
+            read_buffer = entry->getReadBuffer();
+            HashingReadBuffer hashing_read_buffer{*read_buffer};
+            hashing_read_buffer.ignore(base_size);
+            UInt128 partial_checksum = hashing_read_buffer.getHash();
+            read_pos = base_size;
+            if (!checksum)
+            {
+                hashing_read_buffer.ignore(size - base_size);
+                checksum = hashing_read_buffer.getHash();
+                read_pos = size;
+            }
+            if (partial_checksum == base_checksum)
+                use_base = true; /// The data has been appended.
+        }
+    }
+
+    if (use_base && (size == base_size))
+    {
+        /// The entry's data has not been changed since the base backup.
+        EntryInfo info;
+        info.size = base_size;
+        info.checksum = base_checksum;
+        info.base_size = base_size;
+        info.base_checksum = base_checksum;
+        infos.emplace(name, info);
+        return;
+    }
+
+    {
+        /// Either the entry wasn't exist in the base backup
+        /// or the entry has data appended to the end of the data from the base backup.
+        /// In both those cases we have to copy data to this backup.
+
+        /// Find out where the start position to copy data is.
+        auto copy_pos = use_base ? base_size : 0;
+
+        /// Move the current read position to the start position to copy data.
+        /// If `read_buffer` is seekable it's easier, otherwise we can use ignore().
+        if ((read_pos > copy_pos) && !typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
+        {
+            read_buffer.reset();
+            read_pos = 0;
+        }
+
+        if (!read_buffer)
+            read_buffer = entry->getReadBuffer();
+
+        if (read_pos != copy_pos)
+        {
+            if (auto * seekable_buffer = typeid_cast<SeekableReadBuffer *>(read_buffer.get()))
+                seekable_buffer->seek(copy_pos, SEEK_SET);
+            else if (copy_pos)
+                read_buffer->ignore(copy_pos - read_pos);
+        }
+
+        /// If we haven't received or calculated a checksum yet, calculate it now.
+        ReadBuffer * maybe_hashing_read_buffer = read_buffer.get();
+        std::optional<HashingReadBuffer> hashing_read_buffer;
+        if (!checksum)
+            maybe_hashing_read_buffer = &hashing_read_buffer.emplace(*read_buffer);
+
+        /// Copy the entry's data after `copy_pos`.
+        String out_file_path = path_with_sep + name;
+        disk->createDirectories(directoryPath(out_file_path));
+        auto out = disk->writeFile(out_file_path);
+
+        copyData(*maybe_hashing_read_buffer, *out, size - copy_pos);
+
+        if (hashing_read_buffer)
+            checksum = hashing_read_buffer->getHash();
+
+        /// Done!
+        EntryInfo info;
+        info.size = size;
+        info.checksum = *checksum;
+        if (use_base)
+        {
+            info.base_size = base_size;
+            info.base_checksum = base_checksum;
+        }
+        infos.emplace(name, info);
+    }
+}
+
+void BackupInDirectory::finalizeWriting()
+{
+    if (open_mode != OpenMode::WRITE)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal operation: Cannot write to a backup opened for reading");
+    writeContents();
+    finalized = true;
+}
+
+}
--- a/src/Backups/BackupInDirectory.h
+++ b/src/Backups/BackupInDirectory.h
@ -0,0 +1,66 @@
+#pragma once
+
+#include <Backups/IBackup.h>
+#include <map>
+#include <mutex>
+
+
+namespace DB
+{
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+
+/// Represents a backup stored on a disk.
+/// A backup is stored as a directory, each entry is stored as a file in that directory.
+/// Also three system files are stored:
+/// 1) ".base" is an XML file with information about the base backup.
+/// 2) ".contents" is a binary file containing a list of all entries along with their sizes
+/// and checksums and information whether the base backup should be used for each entry
+/// 3) ".write_lock" is a temporary empty file which is created before writing of a backup
+/// and deleted after finishing that writing.
+class BackupInDirectory : public IBackup
+{
+public:
+    BackupInDirectory(OpenMode open_mode_, const DiskPtr & disk_, const String & path_, const std::shared_ptr<const IBackup> & base_backup_ = {});
+    ~BackupInDirectory() override;
+
+    OpenMode getOpenMode() const override;
+    String getPath() const override;
+    Strings list(const String & prefix, const String & terminator) const override;
+    bool exists(const String & name) const override;
+    size_t getSize(const String & name) const override;
+    UInt128 getChecksum(const String & name) const override;
+    BackupEntryPtr read(const String & name) const override;
+    void write(const String & name, BackupEntryPtr entry) override;
+    void finalizeWriting() override;
+
+private:
+    void open();
+    void close();
+    void writePathToBaseBackup();
+    void readPathToBaseBackup();
+    void writeContents();
+    void readContents();
+
+    struct EntryInfo
+    {
+        UInt64 size = 0;
+        UInt128 checksum{0, 0};
+
+        /// for incremental backups
+        UInt64 base_size = 0;
+        UInt128 base_checksum{0, 0};
+    };
+
+    const OpenMode open_mode;
+    const DiskPtr disk;
+    String path;
+    String path_with_sep;
+    std::shared_ptr<const IBackup> base_backup;
+    std::map<String, EntryInfo> infos;
+    bool directory_was_created = false;
+    bool finalized = false;
+    mutable std::mutex mutex;
+};
+
+}
--- a/src/Backups/BackupRenamingConfig.cpp
+++ b/src/Backups/BackupRenamingConfig.cpp
@ -0,0 +1,89 @@
+#include <Backups/BackupRenamingConfig.h>
+#include <Parsers/ASTBackupQuery.h>
+
+
+namespace DB
+{
+using Kind = ASTBackupQuery::Kind;
+using ElementType = ASTBackupQuery::ElementType;
+
+void BackupRenamingConfig::setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name)
+{
+    old_to_new_table_names[old_table_name] = new_table_name;
+}
+
+void BackupRenamingConfig::setNewDatabaseName(const String & old_database_name, const String & new_database_name)
+{
+    old_to_new_database_names[old_database_name] = new_database_name;
+}
+
+void BackupRenamingConfig::setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name)
+{
+    old_to_new_temporary_table_names[old_temporary_table_name] = new_temporary_table_name;
+}
+
+void BackupRenamingConfig::setFromBackupQuery(const ASTBackupQuery & backup_query)
+{
+    setFromBackupQueryElements(backup_query.elements);
+}
+
+void BackupRenamingConfig::setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements)
+{
+    for (const auto & element : backup_query_elements)
+    {
+        switch (element.type)
+        {
+            case ElementType::TABLE: [[fallthrough]];
+            case ElementType::DICTIONARY:
+            {
+                const auto & new_name = element.new_name.second.empty() ? element.name : element.new_name;
+                setNewTableName(element.name, new_name);
+                break;
+            }
+
+            case ASTBackupQuery::DATABASE:
+            {
+                const auto & new_name = element.new_name.first.empty() ? element.name.first : element.new_name.first;
+                setNewDatabaseName(element.name.first, new_name);
+                break;
+            }
+
+            case ASTBackupQuery::TEMPORARY_TABLE:
+            {
+                const auto & new_name = element.new_name.second.empty() ? element.name.second : element.new_name.second;
+                setNewTemporaryTableName(element.name.second, new_name);
+                break;
+            }
+
+            case ASTBackupQuery::ALL_DATABASES: break;
+            case ASTBackupQuery::ALL_TEMPORARY_TABLES: break;
+            case ASTBackupQuery::EVERYTHING: break;
+        }
+    }
+}
+
+DatabaseAndTableName BackupRenamingConfig::getNewTableName(const DatabaseAndTableName & old_table_name) const
+{
+    auto it = old_to_new_table_names.find(old_table_name);
+    if (it != old_to_new_table_names.end())
+        return it->second;
+    return {getNewDatabaseName(old_table_name.first), old_table_name.second};
+}
+
+const String & BackupRenamingConfig::getNewDatabaseName(const String & old_database_name) const
+{
+    auto it = old_to_new_database_names.find(old_database_name);
+    if (it != old_to_new_database_names.end())
+        return it->second;
+    return old_database_name;
+}
+
+const String & BackupRenamingConfig::getNewTemporaryTableName(const String & old_temporary_table_name) const
+{
+    auto it = old_to_new_temporary_table_names.find(old_temporary_table_name);
+    if (it != old_to_new_temporary_table_names.end())
+        return it->second;
+    return old_temporary_table_name;
+}
+
+}
--- a/src/Backups/BackupRenamingConfig.h
+++ b/src/Backups/BackupRenamingConfig.h
@ -0,0 +1,39 @@
+#pragma once
+
+#include <Parsers/ASTBackupQuery.h>
+#include <Core/Types.h>
+#include <map>
+#include <unordered_map>
+
+
+namespace DB
+{
+using DatabaseAndTableName = std::pair<String, String>;
+
+/// Keeps information about renamings of databases or tables being processed
+/// while we're making a backup or while we're restoring from a backup.
+class BackupRenamingConfig
+{
+public:
+    BackupRenamingConfig() = default;
+
+    void setNewTableName(const DatabaseAndTableName & old_table_name, const DatabaseAndTableName & new_table_name);
+    void setNewDatabaseName(const String & old_database_name, const String & new_database_name);
+    void setNewTemporaryTableName(const String & old_temporary_table_name, const String & new_temporary_table_name);
+    void setFromBackupQuery(const ASTBackupQuery & backup_query);
+    void setFromBackupQueryElements(const ASTBackupQuery::Elements & backup_query_elements);
+
+    /// Changes names according to the renaming.
+    DatabaseAndTableName getNewTableName(const DatabaseAndTableName & old_table_name) const;
+    const String & getNewDatabaseName(const String & old_database_name) const;
+    const String & getNewTemporaryTableName(const String & old_temporary_table_name) const;
+
+private:
+    std::map<DatabaseAndTableName, DatabaseAndTableName> old_to_new_table_names;
+    std::unordered_map<String, String> old_to_new_database_names;
+    std::unordered_map<String, String> old_to_new_temporary_table_names;
+};
+
+using BackupRenamingConfigPtr = std::shared_ptr<const BackupRenamingConfig>;
+
+}
--- a/src/Backups/BackupSettings.cpp
+++ b/src/Backups/BackupSettings.cpp
@ -0,0 +1,6 @@
+#include <Backups/BackupSettings.h>
+
+namespace DB
+{
+IMPLEMENT_SETTINGS_TRAITS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
+}
--- a/src/Backups/BackupSettings.h
+++ b/src/Backups/BackupSettings.h
@ -0,0 +1,16 @@
+#pragma once
+
+#include <Core/BaseSettings.h>
+
+
+namespace DB
+{
+
+#define LIST_OF_BACKUP_SETTINGS(M) \
+    M(String, base_backup, "", "Name of the base backup. Only differences made after the base backup will be included in a newly created backup, so this option allows to make an incremental backup.", 0) \
+
+DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(BackupSettingsTraits, LIST_OF_BACKUP_SETTINGS)
+
+struct BackupSettings : public BaseSettings<BackupSettingsTraits> {};
+
+}
--- a/src/Backups/BackupUtils.cpp
+++ b/src/Backups/BackupUtils.cpp
@ -0,0 +1,830 @@
+#include <Backups/BackupUtils.h>
+#include <Backups/BackupEntryFromMemory.h>
+#include <Backups/BackupRenamingConfig.h>
+#include <Backups/IBackup.h>
+#include <Backups/hasCompatibleDataToRestoreTable.h>
+#include <Backups/renameInCreateQuery.h>
+#include <Common/escapeForFileName.h>
+#include <Databases/IDatabase.h>
+#include <IO/ReadHelpers.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/formatAST.h>
+#include <Storages/IStorage.h>
+#include <common/insertAtEnd.h>
+#include <boost/range/adaptor/reversed.hpp>
+#include <filesystem>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BACKUP_ELEMENT_DUPLICATE;
+    extern const int BACKUP_IS_EMPTY;
+    extern const int LOGICAL_ERROR;
+    extern const int TABLE_ALREADY_EXISTS;
+    extern const int CANNOT_RESTORE_TABLE;
+}
+
+namespace
+{
+    using Kind = ASTBackupQuery::Kind;
+    using Element = ASTBackupQuery::Element;
+    using Elements = ASTBackupQuery::Elements;
+    using ElementType = ASTBackupQuery::ElementType;
+
+    /// Replace elements of types DICTIONARY or EVERYTHING with elements of other types.
+    void replaceElementTypesWithBaseElementTypes(Elements & elements)
+    {
+        for (size_t i = 0; i != elements.size(); ++i)
+        {
+            auto & element = elements[i];
+            switch (element.type)
+            {
+                case ElementType::DICTIONARY:
+                {
+                    element.type = ElementType::TABLE;
+                    break;
+                }
+
+                case ElementType::EVERYTHING:
+                {
+                    element.type = ElementType::ALL_DATABASES;
+                    auto & new_element = elements.emplace_back();
+                    new_element.type = ElementType::ALL_TEMPORARY_TABLES;
+                    break;
+                }
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    /// Replaces an empty database with the current database.
+    void replaceEmptyDatabaseWithCurrentDatabase(Elements & elements, const String & current_database)
+    {
+        for (auto & element : elements)
+        {
+            if (element.type == ElementType::TABLE)
+            {
+                if (element.name.first.empty() && !element.name.second.empty())
+                    element.name.first = current_database;
+                if (element.new_name.first.empty() && !element.new_name.second.empty())
+                    element.new_name.first = current_database;
+            }
+        }
+    }
+
+    /// Replaces elements of types TEMPORARY_TABLE or ALL_TEMPORARY_TABLES with elements of type TABLE or DATABASE.
+    void replaceTemporaryTablesWithTemporaryDatabase(Elements & elements)
+    {
+        for (size_t i = 0; i != elements.size(); ++i)
+        {
+            auto & element = elements[i];
+            switch (element.type)
+            {
+                case ElementType::TEMPORARY_TABLE:
+                {
+                    element.type = ElementType::TABLE;
+                    element.name.first = DatabaseCatalog::TEMPORARY_DATABASE;
+                    if (element.new_name.first.empty() && !element.new_name.second.empty())
+                        element.new_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
+                    break;
+                }
+
+                case ElementType::ALL_TEMPORARY_TABLES:
+                {
+                    element.type = ElementType::DATABASE;
+                    element.name.first = DatabaseCatalog::TEMPORARY_DATABASE;
+                    break;
+                }
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    /// Set new names if they are not specified.
+    void setNewNamesIfNotSet(Elements & elements)
+    {
+        for (auto & element : elements)
+        {
+            switch (element.type)
+            {
+                case ElementType::TABLE:
+                {
+                    if (element.new_name.second.empty())
+                        element.new_name = element.name;
+                    break;
+                }
+
+                case ElementType::DATABASE:
+                {
+                    if (element.new_name.first.empty())
+                        element.new_name = element.name;
+                    break;
+                }
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    /// Removes duplications in the elements of a backup query by removing some excessive elements and by updating except_lists.
+    /// This function helps deduplicate elements in queries like "BACKUP ALL DATABASES, DATABASE xxx USING NAME yyy"
+    /// (we need a deduplication for that query because `ALL DATABASES` includes `xxx` however we don't want
+    /// to backup/restore the same database twice while executing the same query).
+    /// Also this function slightly reorders elements: it puts databases before tables and dictionaries they contain.
+    void deduplicateAndReorderElements(Elements & elements)
+    {
+        std::set<size_t> skip_indices; /// Indices of elements which should be removed in the end of this function.
+        size_t index_all_databases = static_cast<size_t>(-1); /// Index of the first element of type ALL_DATABASES or -1 if not found.
+
+        struct DatabaseInfo
+        {
+            size_t index = static_cast<size_t>(-1);
+            std::unordered_map<std::string_view, size_t> tables;
+        };
+        std::unordered_map<std::string_view, DatabaseInfo> databases; /// Found databases and tables.
+
+        for (size_t i = 0; i != elements.size(); ++i)
+        {
+            auto & element = elements[i];
+            switch (element.type)
+            {
+                case ElementType::TABLE:
+                {
+                    auto & tables = databases.emplace(element.name.first, DatabaseInfo{}).first->second.tables;
+                    auto it = tables.find(element.name.second);
+                    if (it == tables.end())
+                    {
+                        tables.emplace(element.name.second, i);
+                    }
+                    else
+                    {
+                        size_t prev_index = it->second;
+                        if ((elements[i].new_name == elements[prev_index].new_name)
+                            && (elements[i].partitions.empty() == elements[prev_index].partitions.empty()))
+                        {
+                            insertAtEnd(elements[prev_index].partitions, elements[i].partitions);
+                            skip_indices.emplace(i);
+                        }
+                        else
+                        {
+                            throw Exception(
+                                "Table " + backQuote(element.name.first) + "." + backQuote(element.name.second) + " was specified twice",
+                                ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
+                        }
+                    }
+                    break;
+                }
+
+                case ElementType::DATABASE:
+                {
+                    auto it = databases.find(element.name.first);
+                    if (it == databases.end())
+                    {
+                        DatabaseInfo new_db_info;
+                        new_db_info.index = i;
+                        databases.emplace(element.name.first, new_db_info);
+                    }
+                    else if (it->second.index == static_cast<size_t>(-1))
+                    {
+                        it->second.index = i;
+                    }
+                    else
+                    {
+                        size_t prev_index = it->second.index;
+                        if ((elements[i].new_name == elements[prev_index].new_name)
+                            && (elements[i].except_list == elements[prev_index].except_list))
+                        {
+                            skip_indices.emplace(i);
+                        }
+                        else
+                        {
+                            throw Exception("Database " + backQuote(element.name.first) + " was specified twice", ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
+                        }
+
+                    }
+                    break;
+                }
+
+                case ElementType::ALL_DATABASES:
+                {
+                    if (index_all_databases == static_cast<size_t>(-1))
+                    {
+                        index_all_databases = i;
+                    }
+                    else
+                    {
+                        size_t prev_index = index_all_databases;
+                        if (elements[i].except_list == elements[prev_index].except_list)
+                            skip_indices.emplace(i);
+                        else
+                            throw Exception("The tag ALL DATABASES was specified twice", ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
+                    }
+                    break;
+                }
+
+                default:
+                    /// replaceElementTypesWithBaseElementTypes() and replaceTemporaryTablesWithTemporaryDatabase() should have removed all other element types.
+                    throw Exception("Unexpected element type: " + std::to_string(static_cast<int>(element.type)), ErrorCodes::LOGICAL_ERROR);
+            }
+        }
+
+        if (index_all_databases != static_cast<size_t>(-1))
+        {
+            for (auto & [database_name, database] : databases)
+            {
+                elements[index_all_databases].except_list.emplace(database_name);
+                if (database.index == static_cast<size_t>(-1))
+                {
+                    auto & new_element = elements.emplace_back();
+                    new_element.type = ElementType::DATABASE;
+                    new_element.name.first = database_name;
+                    new_element.new_name = new_element.name;
+                    database.index = elements.size() - 1;
+                }
+            }
+        }
+
+        for (auto & [database_name, database] : databases)
+        {
+            if (database.index == static_cast<size_t>(-1))
+                continue;
+            for (const auto & [table_name, table_index] : database.tables)
+                elements[database.index].except_list.emplace(table_name);
+        }
+
+        /// Reorder the elements: databases should be before tables and dictionaries they contain.
+        for (auto & [database_name, database] : databases)
+        {
+            if (database.index == static_cast<size_t>(-1))
+                continue;
+            size_t min_index = std::numeric_limits<size_t>::max();
+            auto min_index_it = database.tables.end();
+            for (auto it = database.tables.begin(); it != database.tables.end(); ++it)
+            {
+                if (min_index > it->second)
+                {
+                    min_index = it->second;
+                    min_index_it = it;
+                }
+            }
+            if (database.index > min_index)
+            {
+                std::swap(elements[database.index], elements[min_index]);
+                std::swap(database.index, min_index_it->second);
+            }
+        }
+
+        for (auto skip_index : skip_indices | boost::adaptors::reversed)
+            elements.erase(elements.begin() + skip_index);
+    }
+
+    Elements adjustElements(const Elements & elements, const String & current_database)
+    {
+        auto res = elements;
+        replaceElementTypesWithBaseElementTypes(res);
+        replaceEmptyDatabaseWithCurrentDatabase(res, current_database);
+        replaceTemporaryTablesWithTemporaryDatabase(res);
+        setNewNamesIfNotSet(res);
+        deduplicateAndReorderElements(res);
+        return res;
+    }
+
+    String getDataPathInBackup(const DatabaseAndTableName & table_name)
+    {
+        if (table_name.first.empty() || table_name.second.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
+        assert(!table_name.first.empty() && !table_name.second.empty());
+        return String{"data/"} + escapeForFileName(table_name.first) + "/" + escapeForFileName(table_name.second) + "/";
+    }
+
+    String getDataPathInBackup(const IAST & create_query)
+    {
+        const auto & create = create_query.as<const ASTCreateQuery &>();
+        if (create.table.empty())
+            return {};
+        if (create.temporary)
+            return getDataPathInBackup({DatabaseCatalog::TEMPORARY_DATABASE, create.table});
+        return getDataPathInBackup({create.database, create.table});
+    }
+
+    String getMetadataPathInBackup(const DatabaseAndTableName & table_name)
+    {
+        if (table_name.first.empty() || table_name.second.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name and table name must not be empty");
+        return String{"metadata/"} + escapeForFileName(table_name.first) + "/" + escapeForFileName(table_name.second) + ".sql";
+    }
+
+    String getMetadataPathInBackup(const String & database_name)
+    {
+        if (database_name.empty())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name must not be empty");
+        return String{"metadata/"} + escapeForFileName(database_name) + ".sql";
+    }
+
+    String getMetadataPathInBackup(const IAST & create_query)
+    {
+        const auto & create = create_query.as<const ASTCreateQuery &>();
+        if (create.table.empty())
+            return getMetadataPathInBackup(create.database);
+        if (create.temporary)
+            return getMetadataPathInBackup({DatabaseCatalog::TEMPORARY_DATABASE, create.table});
+        return getMetadataPathInBackup({create.database, create.table});
+    }
+
+    void backupCreateQuery(const IAST & create_query, BackupEntries & backup_entries)
+    {
+        auto metadata_entry = std::make_unique<BackupEntryFromMemory>(serializeAST(create_query));
+        String metadata_path = getMetadataPathInBackup(create_query);
+        backup_entries.emplace_back(metadata_path, std::move(metadata_entry));
+    }
+
+    void backupTable(
+        const DatabaseAndTable & database_and_table,
+        const String & table_name,
+        const ASTs & partitions,
+        const ContextPtr & context,
+        const BackupRenamingConfigPtr & renaming_config,
+        BackupEntries & backup_entries)
+    {
+        const auto & database = database_and_table.first;
+        const auto & storage = database_and_table.second;
+        context->checkAccess(AccessType::SELECT, database->getDatabaseName(), table_name);
+
+        auto create_query = database->getCreateTableQuery(table_name, context);
+        ASTPtr new_create_query = renameInCreateQuery(create_query, renaming_config, context);
+        backupCreateQuery(*new_create_query, backup_entries);
+
+        auto data_backup = storage->backup(partitions, context);
+        if (!data_backup.empty())
+        {
+            String data_path = getDataPathInBackup(*new_create_query);
+            for (auto & [path_in_backup, backup_entry] : data_backup)
+                backup_entries.emplace_back(data_path + path_in_backup, std::move(backup_entry));
+        }
+    }
+
+    void backupDatabase(
+        const DatabasePtr & database,
+        const std::set<String> & except_list,
+        const ContextPtr & context,
+        const BackupRenamingConfigPtr & renaming_config,
+        BackupEntries & backup_entries)
+    {
+        context->checkAccess(AccessType::SHOW_TABLES, database->getDatabaseName());
+
+        auto create_query = database->getCreateDatabaseQuery();
+        ASTPtr new_create_query = renameInCreateQuery(create_query, renaming_config, context);
+        backupCreateQuery(*new_create_query, backup_entries);
+
+        for (auto it = database->getTablesIteratorForBackup(context); it->isValid(); it->next())
+        {
+            if (except_list.contains(it->name()))
+                continue;
+            backupTable({database, it->table()}, it->name(), {}, context, renaming_config, backup_entries);
+        }
+    }
+
+    void backupAllDatabases(
+        const std::set<String> & except_list,
+        const ContextPtr & context,
+        const BackupRenamingConfigPtr & renaming_config,
+        BackupEntries & backup_entries)
+    {
+        for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases())
+        {
+            if (except_list.contains(database_name))
+                continue;
+            if (database_name == DatabaseCatalog::SYSTEM_DATABASE || database_name == DatabaseCatalog::TEMPORARY_DATABASE)
+                continue;
+            backupDatabase(database, {}, context, renaming_config, backup_entries);
+        }
+    }
+
+    void makeDatabaseIfNotExists(const String & database_name, ContextMutablePtr context)
+    {
+        if (DatabaseCatalog::instance().isDatabaseExist(database_name))
+            return;
+
+        /// We create and execute `create` query for the database name.
+        auto create_query = std::make_shared<ASTCreateQuery>();
+        create_query->database = database_name;
+        create_query->if_not_exists = true;
+        InterpreterCreateQuery create_interpreter{create_query, context};
+        create_interpreter.execute();
+    }
+
+    ASTPtr readCreateQueryFromBackup(const DatabaseAndTableName & table_name, const BackupPtr & backup)
+    {
+        String create_query_path = getMetadataPathInBackup(table_name);
+        auto read_buffer = backup->read(create_query_path)->getReadBuffer();
+        String create_query_str;
+        readStringUntilEOF(create_query_str, *read_buffer);
+        read_buffer.reset();
+        ParserCreateQuery create_parser;
+        return parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    }
+
+    ASTPtr readCreateQueryFromBackup(const String & database_name, const BackupPtr & backup)
+    {
+        String create_query_path = getMetadataPathInBackup(database_name);
+        auto read_buffer = backup->read(create_query_path)->getReadBuffer();
+        String create_query_str;
+        readStringUntilEOF(create_query_str, *read_buffer);
+        read_buffer.reset();
+        ParserCreateQuery create_parser;
+        return parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+    }
+
+    void restoreTable(
+        const DatabaseAndTableName & table_name,
+        const ASTs & partitions,
+        ContextMutablePtr context,
+        const BackupPtr & backup,
+        const BackupRenamingConfigPtr & renaming_config,
+        RestoreObjectsTasks & restore_tasks)
+    {
+        ASTPtr create_query = readCreateQueryFromBackup(table_name, backup);
+        auto new_create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(renameInCreateQuery(create_query, renaming_config, context));
+
+        restore_tasks.emplace_back([table_name, new_create_query, partitions, context, backup]() -> RestoreDataTasks
+        {
+            DatabaseAndTableName new_table_name{new_create_query->database, new_create_query->table};
+            if (new_create_query->temporary)
+                new_table_name.first = DatabaseCatalog::TEMPORARY_DATABASE;
+
+            context->checkAccess(AccessType::INSERT, new_table_name.first, new_table_name.second);
+
+            StoragePtr storage;
+            for (size_t try_index = 0; try_index != 10; ++try_index)
+            {
+                if (DatabaseCatalog::instance().isTableExist({new_table_name.first, new_table_name.second}, context))
+                {
+                    DatabasePtr existing_database;
+                    StoragePtr existing_storage;
+                    std::tie(existing_database, existing_storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable({new_table_name.first, new_table_name.second}, context);
+                    if (existing_storage)
+                    {
+                        if (auto existing_table_create_query = existing_database->tryGetCreateTableQuery(new_table_name.second, context))
+                        {
+                            if (hasCompatibleDataToRestoreTable(*new_create_query, existing_table_create_query->as<ASTCreateQuery &>()))
+                            {
+                                storage = existing_storage;
+                                break;
+                            }
+                            else
+                            {
+                                String error_message = (new_table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
+                                    ? ("Temporary table " + backQuoteIfNeed(new_table_name.second) + " already exists")
+                                    : ("Table " + backQuoteIfNeed(new_table_name.first) + "." + backQuoteIfNeed(new_table_name.second)
+                                       + " already exists");
+                                throw Exception(error_message, ErrorCodes::CANNOT_RESTORE_TABLE);
+                            }
+                        }
+                    }
+                }
+
+                makeDatabaseIfNotExists(new_table_name.first, context);
+
+                try
+                {
+                    InterpreterCreateQuery create_interpreter{new_create_query, context};
+                    create_interpreter.execute();
+                }
+                catch (Exception & e)
+                {
+                    if (e.code() != ErrorCodes::TABLE_ALREADY_EXISTS)
+                        throw;
+                }
+            }
+
+            if (!storage)
+            {
+                String error_message = (new_table_name.first == DatabaseCatalog::TEMPORARY_DATABASE)
+                    ? ("Could not create temporary table " + backQuoteIfNeed(new_table_name.second) + " for restoring")
+                    : ("Could not create table " + backQuoteIfNeed(new_table_name.first) + "." + backQuoteIfNeed(new_table_name.second)
+                       + " for restoring");
+                throw Exception(error_message, ErrorCodes::CANNOT_RESTORE_TABLE);
+            }
+
+            String data_path_in_backup = getDataPathInBackup(table_name);
+            RestoreDataTasks restore_data_tasks = storage->restoreFromBackup(backup, data_path_in_backup, partitions, context);
+
+            /// Keep `storage` alive while we're executing `restore_data_tasks`.
+            for (auto & restore_data_task : restore_data_tasks)
+                restore_data_task = [restore_data_task, storage]() { restore_data_task(); };
+
+            return restore_data_tasks;
+        });
+    }
+
+    void restoreDatabase(const String & database_name, const std::set<String> & except_list, ContextMutablePtr context, const BackupPtr & backup, const BackupRenamingConfigPtr & renaming_config, RestoreObjectsTasks & restore_tasks)
+    {
+        ASTPtr create_query = readCreateQueryFromBackup(database_name, backup);
+        auto new_create_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(renameInCreateQuery(create_query, renaming_config, context));
+
+        restore_tasks.emplace_back([database_name, new_create_query, except_list, context, backup, renaming_config]() -> RestoreDataTasks
+        {
+            const String & new_database_name = new_create_query->database;
+            context->checkAccess(AccessType::SHOW_TABLES, new_database_name);
+
+            if (!DatabaseCatalog::instance().isDatabaseExist(new_database_name))
+            {
+                /// We create and execute `create` query for the database name.
+                new_create_query->if_not_exists = true;
+                InterpreterCreateQuery create_interpreter{new_create_query, context};
+                create_interpreter.execute();
+            }
+
+            RestoreObjectsTasks restore_objects_tasks;
+            Strings table_names = backup->list("metadata/" + escapeForFileName(database_name) + "/", "/");
+            for (const String & table_name : table_names)
+            {
+                if (except_list.contains(table_name))
+                    continue;
+                restoreTable({database_name, table_name}, {}, context, backup, renaming_config, restore_objects_tasks);
+            }
+
+            RestoreDataTasks restore_data_tasks;
+            for (auto & restore_object_task : restore_objects_tasks)
+                insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
+            return restore_data_tasks;
+        });
+    }
+
+    void restoreAllDatabases(const std::set<String> & except_list, ContextMutablePtr context, const BackupPtr & backup, const BackupRenamingConfigPtr & renaming_config, RestoreObjectsTasks & restore_tasks)
+    {
+        restore_tasks.emplace_back([except_list, context, backup, renaming_config]() -> RestoreDataTasks
+        {
+            Strings database_names = backup->list("metadata/", "/");
+            RestoreObjectsTasks restore_objects_tasks;
+            for (const String & database_name : database_names)
+            {
+                if (except_list.contains(database_name))
+                    continue;
+                restoreDatabase(database_name, {}, context, backup, renaming_config, restore_objects_tasks);
+            }
+
+            RestoreDataTasks restore_data_tasks;
+            for (auto & restore_object_task : restore_objects_tasks)
+                insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
+            return restore_data_tasks;
+        });
+    }
+}
+
+
+BackupEntries makeBackupEntries(const Elements & elements, const ContextPtr & context)
+{
+    BackupEntries backup_entries;
+
+    auto elements2 = adjustElements(elements, context->getCurrentDatabase());
+    auto renaming_config = std::make_shared<BackupRenamingConfig>();
+    renaming_config->setFromBackupQueryElements(elements2);
+
+    for (const auto & element : elements2)
+    {
+        switch (element.type)
+        {
+            case ElementType::TABLE:
+            {
+                const String & database_name = element.name.first;
+                const String & table_name = element.name.second;
+                auto [database, storage] = DatabaseCatalog::instance().getDatabaseAndTable({database_name, table_name}, context);
+                backupTable({database, storage}, table_name, element.partitions, context, renaming_config, backup_entries);
+                break;
+            }
+
+            case ElementType::DATABASE:
+            {
+                const String & database_name = element.name.first;
+                auto database = DatabaseCatalog::instance().getDatabase(database_name, context);
+                backupDatabase(database, element.except_list, context, renaming_config, backup_entries);
+                break;
+            }
+
+            case ElementType::ALL_DATABASES:
+            {
+                backupAllDatabases(element.except_list, context, renaming_config, backup_entries);
+                break;
+            }
+
+            default:
+                throw Exception("Unexpected element type", ErrorCodes::LOGICAL_ERROR); /// other element types have been removed in deduplicateElements()
+        }
+    }
+
+    /// A backup cannot be empty.
+    if (backup_entries.empty())
+        throw Exception("Backup must not be empty", ErrorCodes::BACKUP_IS_EMPTY);
+
+    /// Check that all backup entries are unique.
+    std::sort(
+        backup_entries.begin(),
+        backup_entries.end(),
+        [](const std::pair<String, std::unique_ptr<IBackupEntry>> & lhs, const std::pair<String, std::unique_ptr<IBackupEntry>> & rhs)
+        {
+            return lhs.first < rhs.first;
+        });
+    auto adjacent = std::adjacent_find(backup_entries.begin(), backup_entries.end());
+    if (adjacent != backup_entries.end())
+        throw Exception("Cannot write multiple entries with the same name " + quoteString(adjacent->first), ErrorCodes::BACKUP_ELEMENT_DUPLICATE);
+
+    return backup_entries;
+}
+
+UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr & base_backup)
+{
+    UInt64 total_size = 0;
+    for (const auto & [name, entry] : backup_entries)
+    {
+        UInt64 data_size = entry->getSize();
+        if (base_backup)
+        {
+            if (base_backup->exists(name) && (data_size == base_backup->getSize(name)))
+            {
+                auto checksum = entry->getChecksum();
+                if (checksum && (*checksum == base_backup->getChecksum(name)))
+                    continue;
+            }
+        }
+        total_size += data_size;
+    }
+    return total_size;
+}
+
+void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads)
+{
+    if (!num_threads)
+        num_threads = 1;
+    std::vector<ThreadFromGlobalPool> threads;
+    size_t num_active_threads = 0;
+    std::mutex mutex;
+    std::condition_variable cond;
+    std::exception_ptr exception;
+
+    for (auto & name_and_entry : backup_entries)
+    {
+        auto & name = name_and_entry.first;
+        auto & entry = name_and_entry.second;
+
+        {
+            std::unique_lock lock{mutex};
+            if (exception)
+                break;
+            cond.wait(lock, [&] { return num_active_threads < num_threads; });
+            if (exception)
+                break;
+            ++num_active_threads;
+        }
+
+        threads.emplace_back([backup, &name, &entry, &mutex, &cond, &num_active_threads, &exception]()
+        {
+            try
+            {
+                backup->write(name, std::move(entry));
+            }
+            catch (...)
+            {
+                std::lock_guard lock{mutex};
+                if (!exception)
+                    exception = std::current_exception();
+            }
+
+            {
+                std::lock_guard lock{mutex};
+                --num_active_threads;
+                cond.notify_all();
+            }
+        });
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    backup_entries.clear();
+
+    if (exception)
+    {
+        /// We don't call finalizeWriting() if an error occurs.
+        /// And IBackup's implementation should remove the backup in its destructor if finalizeWriting() hasn't called before.
+        std::rethrow_exception(exception);
+    }
+
+    backup->finalizeWriting();
+}
+
+
+RestoreObjectsTasks makeRestoreTasks(const Elements & elements, ContextMutablePtr context, const BackupPtr & backup)
+{
+    RestoreObjectsTasks restore_tasks;
+
+    auto elements2 = adjustElements(elements, context->getCurrentDatabase());
+    auto renaming_config = std::make_shared<BackupRenamingConfig>();
+    renaming_config->setFromBackupQueryElements(elements2);
+
+    for (const auto & element : elements2)
+    {
+        switch (element.type)
+        {
+            case ElementType::TABLE:
+            {
+                const String & database_name = element.name.first;
+                const String & table_name = element.name.second;
+                restoreTable({database_name, table_name}, element.partitions, context, backup, renaming_config, restore_tasks);
+                break;
+            }
+
+            case ElementType::DATABASE:
+            {
+                const String & database_name = element.name.first;
+                auto database = DatabaseCatalog::instance().getDatabase(database_name, context);
+                restoreDatabase(database_name, element.except_list, context, backup, renaming_config, restore_tasks);
+                break;
+            }
+
+            case ElementType::ALL_DATABASES:
+            {
+                restoreAllDatabases(element.except_list, context, backup, renaming_config, restore_tasks);
+                break;
+            }
+
+            default:
+                throw Exception("Unexpected element type", ErrorCodes::LOGICAL_ERROR); /// other element types have been removed in deduplicateElements()
+        }
+    }
+
+    return restore_tasks;
+}
+
+
+void executeRestoreTasks(RestoreObjectsTasks && restore_tasks, size_t num_threads)
+{
+    if (!num_threads)
+        num_threads = 1;
+
+    RestoreDataTasks restore_data_tasks;
+    for (auto & restore_object_task : restore_tasks)
+        insertAtEnd(restore_data_tasks, std::move(restore_object_task)());
+    restore_tasks.clear();
+
+    std::vector<ThreadFromGlobalPool> threads;
+    size_t num_active_threads = 0;
+    std::mutex mutex;
+    std::condition_variable cond;
+    std::exception_ptr exception;
+
+    for (auto & restore_data_task : restore_data_tasks)
+    {
+        {
+            std::unique_lock lock{mutex};
+            if (exception)
+                break;
+            cond.wait(lock, [&] { return num_active_threads < num_threads; });
+            if (exception)
+                break;
+            ++num_active_threads;
+        }
+
+        threads.emplace_back([&restore_data_task, &mutex, &cond, &num_active_threads, &exception]() mutable
+        {
+            try
+            {
+                restore_data_task();
+                restore_data_task = {};
+            }
+            catch (...)
+            {
+                std::lock_guard lock{mutex};
+                if (!exception)
+                    exception = std::current_exception();
+            }
+
+            {
+                std::lock_guard lock{mutex};
+                --num_active_threads;
+                cond.notify_all();
+            }
+        });
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    restore_data_tasks.clear();
+
+    if (exception)
+        std::rethrow_exception(exception);
+}
+
+}
--- a/src/Backups/BackupUtils.h
+++ b/src/Backups/BackupUtils.h
@ -0,0 +1,39 @@
+#pragma once
+
+#include <Parsers/ASTBackupQuery.h>
+
+
+namespace DB
+{
+
+class IBackup;
+using BackupPtr = std::shared_ptr<const IBackup>;
+using BackupMutablePtr = std::shared_ptr<IBackup>;
+class IBackupEntry;
+using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
+using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
+using RestoreDataTask = std::function<void()>;
+using RestoreDataTasks = std::vector<RestoreDataTask>;
+using RestoreObjectTask = std::function<RestoreDataTasks()>;
+using RestoreObjectsTasks = std::vector<RestoreObjectTask>;
+class Context;
+using ContextPtr = std::shared_ptr<const Context>;
+using ContextMutablePtr = std::shared_ptr<Context>;
+
+
+/// Prepares backup entries.
+BackupEntries makeBackupEntries(const ASTBackupQuery::Elements & elements, const ContextPtr & context);
+
+/// Estimate total size of the backup which would be written from the specified entries.
+UInt64 estimateBackupSize(const BackupEntries & backup_entries, const BackupPtr & base_backup);
+
+/// Write backup entries to an opened backup.
+void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, size_t num_threads);
+
+/// Prepare restore tasks.
+RestoreObjectsTasks makeRestoreTasks(const ASTBackupQuery::Elements & elements, ContextMutablePtr context, const BackupPtr & backup);
+
+/// Execute restore tasks.
+void executeRestoreTasks(RestoreObjectsTasks && restore_tasks, size_t num_threads);
+
+}
--- a/src/Backups/CMakeLists.txt
+++ b/src/Backups/CMakeLists.txt
--- a/src/Backups/IBackup.h
+++ b/src/Backups/IBackup.h
@ -0,0 +1,65 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <memory>
+
+
+namespace DB
+{
+class IBackupEntry;
+using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
+
+/// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names.
+/// A backup can be either incremental or non-incremental. An incremental backup doesn't store
+/// the data of the entries which are not changed compared to its base backup.
+class IBackup
+{
+public:
+    virtual ~IBackup() = default;
+
+    enum class OpenMode
+    {
+        READ,
+        WRITE,
+    };
+
+    /// A backup can be open either in READ or WRITE mode.
+    virtual OpenMode getOpenMode() const = 0;
+
+    /// Returns the path to the backup.
+    virtual String getPath() const = 0;
+
+    /// Returns names of entries stored in the backup.
+    /// If `prefix` isn't empty the function will return only the names starting with
+    /// the prefix (but without the prefix itself).
+    /// If the `terminator` isn't empty the function will returns only parts of the names
+    /// before the terminator. For example, list("", "") returns names of all the entries
+    /// in the backup; and list("data/", "/") return kind of a list of folders and
+    /// files stored in the "data/" directory inside the backup.
+    virtual Strings list(const String & prefix = "", const String & terminator = "/") const = 0;
+
+    /// Checks if an entry with a specified name exists.
+    virtual bool exists(const String & name) const = 0;
+
+    /// Returns the size of the entry's data.
+    /// This function does the same as `read(name)->getSize()` but faster.
+    virtual size_t getSize(const String & name) const = 0;
+
+    /// Returns the checksum of the entry's data.
+    /// This function does the same as `read(name)->getCheckum()` but faster.
+    virtual UInt128 getChecksum(const String & name) const = 0;
+
+    /// Reads an entry from the backup.
+    virtual BackupEntryPtr read(const String & name) const = 0;
+
+    /// Puts a new entry to the backup.
+    virtual void write(const String & name, BackupEntryPtr entry) = 0;
+
+    /// Finalizes writing the backup, should be called after all entries have been successfully written.
+    virtual void finalizeWriting() = 0;
+};
+
+using BackupPtr = std::shared_ptr<const IBackup>;
+using BackupMutablePtr = std::shared_ptr<IBackup>;
+
+}
--- a/src/Backups/IBackupEntry.h
+++ b/src/Backups/IBackupEntry.h
@ -0,0 +1,32 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <memory>
+#include <optional>
+#include <vector>
+
+namespace DB
+{
+class ReadBuffer;
+
+/// A backup entry represents some data which should be written to the backup or has been read from the backup.
+class IBackupEntry
+{
+public:
+    virtual ~IBackupEntry() = default;
+
+    /// Returns the size of the data.
+    virtual UInt64 getSize() const = 0;
+
+    /// Returns the checksum of the data if it's precalculated.
+    /// Can return nullopt which means the checksum should be calculated from the read buffer.
+    virtual std::optional<UInt128> getChecksum() const { return {}; }
+
+    /// Returns a read buffer for reading the data.
+    virtual std::unique_ptr<ReadBuffer> getReadBuffer() const = 0;
+};
+
+using BackupEntryPtr = std::unique_ptr<IBackupEntry>;
+using BackupEntries = std::vector<std::pair<String, BackupEntryPtr>>;
+
+}
--- a/src/Backups/hasCompatibleDataToRestoreTable.cpp
+++ b/src/Backups/hasCompatibleDataToRestoreTable.cpp
@ -0,0 +1,22 @@
+#include <Backups/hasCompatibleDataToRestoreTable.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/formatAST.h>
+
+
+namespace DB
+{
+
+bool hasCompatibleDataToRestoreTable(const ASTCreateQuery & query1, const ASTCreateQuery & query2)
+{
+    /// TODO: Write more subtle condition here.
+    auto q1 = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query1.clone());
+    auto q2 = typeid_cast<std::shared_ptr<ASTCreateQuery>>(query2.clone());
+
+    /// Remove UUIDs.
+    q1->uuid = UUIDHelpers::Nil;
+    q2->uuid = UUIDHelpers::Nil;
+
+    return serializeAST(*q1) == serializeAST(*q2);
+}
+
+}
--- a/src/Backups/hasCompatibleDataToRestoreTable.h
+++ b/src/Backups/hasCompatibleDataToRestoreTable.h
@ -0,0 +1,11 @@
+#pragma once
+
+
+namespace DB
+{
+class ASTCreateQuery;
+
+/// Whether the data of the first table can be inserted to the second table.
+bool hasCompatibleDataToRestoreTable(const ASTCreateQuery & query1, const ASTCreateQuery & query2);
+
+}
--- a/src/Backups/renameInCreateQuery.cpp
+++ b/src/Backups/renameInCreateQuery.cpp
@ -0,0 +1,276 @@
+#include <Backups/renameInCreateQuery.h>
+#include <Backups/BackupRenamingConfig.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <TableFunctions/TableFunctionFactory.h>
+#include <Interpreters/InDepthNodeVisitor.h>
+#include <Interpreters/evaluateConstantExpression.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+namespace
+{
+    class RenameInCreateQueryTransformMatcher
+    {
+    public:
+        struct Data
+        {
+            BackupRenamingConfigPtr renaming_config;
+            ContextPtr context;
+        };
+
+        static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
+
+        static void visit(ASTPtr & ast, const Data & data)
+        {
+            if (auto * create = ast->as<ASTCreateQuery>())
+                visitCreateQuery(*create, data);
+            else if (auto * expr = ast->as<ASTTableExpression>())
+                visitTableExpression(*expr, data);
+            else if (auto * function = ast->as<ASTFunction>())
+                visitFunction(*function, data);
+            else if (auto * dictionary = ast->as<ASTDictionary>())
+                visitDictionary(*dictionary, data);
+        }
+
+    private:
+        /// Replaces names of tables and databases used in a CREATE query, which can be either CREATE TABLE or
+        /// CREATE DICTIONARY or CREATE VIEW or CREATE TEMPORARY TABLE or CREATE DATABASE query.
+        static void visitCreateQuery(ASTCreateQuery & create, const Data & data)
+        {
+            if (create.temporary)
+            {
+                if (create.table.empty())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Table name specified in the CREATE TEMPORARY TABLE query must not be empty");
+                create.table = data.renaming_config->getNewTemporaryTableName(create.table);
+            }
+            else if (create.table.empty())
+            {
+                if (create.database.empty())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE DATABASE query must not be empty");
+                create.database = data.renaming_config->getNewDatabaseName(create.database);
+            }
+            else
+            {
+                if (create.database.empty())
+                    throw Exception(ErrorCodes::LOGICAL_ERROR, "Database name specified in the CREATE TABLE query must not be empty");
+                std::tie(create.database, create.table) = data.renaming_config->getNewTableName({create.database, create.table});
+            }
+
+            create.uuid = UUIDHelpers::Nil;
+
+            if (!create.as_table.empty() && !create.as_database.empty())
+                std::tie(create.as_database, create.as_table) = data.renaming_config->getNewTableName({create.as_database, create.as_table});
+
+            if (!create.to_table_id.table_name.empty() && !create.to_table_id.database_name.empty())
+            {
+                auto to_table = data.renaming_config->getNewTableName({create.to_table_id.database_name, create.to_table_id.table_name});
+                create.to_table_id = StorageID{to_table.first, to_table.second};
+            }
+        }
+
+        /// Replaces names of a database and a table in a expression like `db`.`table`
+        static void visitTableExpression(ASTTableExpression & expr, const Data & data)
+        {
+            if (!expr.database_and_table_name)
+                return;
+
+            ASTIdentifier * id = expr.database_and_table_name->as<ASTIdentifier>();
+            if (!id)
+                return;
+
+            auto table_id = id->createTable();
+            if (!table_id)
+                return;
+
+            const String & db_name = table_id->getDatabaseName();
+            const String & table_name = table_id->shortName();
+            if (db_name.empty() || table_name.empty())
+                return;
+
+            String new_db_name, new_table_name;
+            std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
+            if ((new_db_name == db_name) && (new_table_name == table_name))
+                return;
+
+            expr.database_and_table_name = std::make_shared<ASTIdentifier>(Strings{new_db_name, new_table_name});
+            expr.children.push_back(expr.database_and_table_name);
+        }
+
+        /// Replaces names of tables and databases used in arguments of a table function or a table engine.
+        static void visitFunction(ASTFunction & function, const Data & data)
+        {
+            if ((function.name == "merge") || (function.name == "Merge"))
+            {
+                visitFunctionMerge(function, data);
+            }
+            else if ((function.name == "remote") || (function.name == "remoteSecure") || (function.name == "cluster") ||
+                     (function.name == "clusterAllReplicas") || (function.name == "Distributed"))
+            {
+                visitFunctionRemote(function, data);
+            }
+        }
+
+        /// Replaces a database's name passed via an argument of the function merge() or the table engine Merge.
+        static void visitFunctionMerge(ASTFunction & function, const Data & data)
+        {
+            if (!function.arguments)
+                return;
+
+            /// The first argument is a database's name and we can rename it.
+            /// The second argument is a regular expression and we can do nothing about it.
+            auto & args = function.arguments->as<ASTExpressionList &>().children;
+            size_t db_name_arg_index = 0;
+            if (args.size() <= db_name_arg_index)
+                return;
+
+            String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_arg_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
+            if (db_name.empty())
+                return;
+
+            String new_db_name = data.renaming_config->getNewDatabaseName(db_name);
+            if (new_db_name == db_name)
+                return;
+            args[db_name_arg_index] = std::make_shared<ASTLiteral>(new_db_name);
+        }
+
+        /// Replaces names of a table and a database passed via arguments of the function remote() or cluster() or the table engine Distributed.
+        static void visitFunctionRemote(ASTFunction & function, const Data & data)
+        {
+            if (!function.arguments)
+                return;
+
+            /// The first argument is an address or cluster's name, so we skip it.
+            /// The second argument can be either 'db.name' or just 'db' followed by the third argument 'table'.
+            auto & args = function.arguments->as<ASTExpressionList &>().children;
+
+            const auto * second_arg_as_function = args[1]->as<ASTFunction>();
+            if (second_arg_as_function && TableFunctionFactory::instance().isTableFunctionName(second_arg_as_function->name))
+                return;
+
+            size_t db_name_index = 1;
+            if (args.size() <= db_name_index)
+                return;
+
+            String db_name = evaluateConstantExpressionForDatabaseName(args[db_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
+
+            String table_name;
+            size_t table_name_index = static_cast<size_t>(-1);
+            size_t dot = String::npos;
+            if (function.name != "Distributed")
+                dot = db_name.find('.');
+            if (dot != String::npos)
+            {
+                table_name = db_name.substr(dot + 1);
+                db_name.resize(dot);
+            }
+            else
+            {
+                table_name_index = 2;
+                if (args.size() <= table_name_index)
+                    return;
+                table_name = evaluateConstantExpressionForDatabaseName(args[table_name_index], data.context)->as<ASTLiteral &>().value.safeGet<String>();
+            }
+
+            if (db_name.empty() || table_name.empty())
+                return;
+
+            String new_db_name, new_table_name;
+            std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
+            if ((new_db_name == db_name) && (new_table_name == table_name))
+                return;
+
+            if (table_name_index != static_cast<size_t>(-1))
+            {
+                if (new_db_name != db_name)
+                    args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
+                if (new_table_name != table_name)
+                    args[table_name_index] = std::make_shared<ASTLiteral>(new_table_name);
+            }
+            else
+            {
+                args[db_name_index] = std::make_shared<ASTLiteral>(new_db_name);
+                args.insert(args.begin() + db_name_index + 1, std::make_shared<ASTLiteral>(new_table_name));
+            }
+        }
+
+        /// Replaces names of a table and a database used in source parameters of a dictionary.
+        static void visitDictionary(ASTDictionary & dictionary, const Data & data)
+        {
+            if (!dictionary.source || dictionary.source->name != "clickhouse" || !dictionary.source->elements)
+                return;
+
+            auto & elements = dictionary.source->elements->as<ASTExpressionList &>().children;
+            String db_name, table_name;
+            size_t db_name_index = static_cast<size_t>(-1);
+            size_t table_name_index = static_cast<size_t>(-1);
+
+            for (size_t i = 0; i != elements.size(); ++i)
+            {
+                auto & pair = elements[i]->as<ASTPair &>();
+                if (pair.first == "db")
+                {
+                    if (db_name_index != static_cast<size_t>(-1))
+                        return;
+                    db_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
+                    db_name_index = i;
+                }
+                else if (pair.first == "table")
+                {
+                    if (table_name_index != static_cast<size_t>(-1))
+                        return;
+                    table_name = pair.second->as<ASTLiteral &>().value.safeGet<String>();
+                    table_name_index = i;
+                }
+            }
+
+            if (db_name.empty() || table_name.empty())
+                return;
+
+            String new_db_name, new_table_name;
+            std::tie(new_db_name, new_table_name) = data.renaming_config->getNewTableName({db_name, table_name});
+            if ((new_db_name == db_name) && (new_table_name == table_name))
+                return;
+
+            if (new_db_name != db_name)
+            {
+                auto & pair = elements[db_name_index]->as<ASTPair &>();
+                pair.replace(pair.second, std::make_shared<ASTLiteral>(new_db_name));
+            }
+            if (new_table_name != table_name)
+            {
+                auto & pair = elements[table_name_index]->as<ASTPair &>();
+                pair.replace(pair.second, std::make_shared<ASTLiteral>(new_table_name));
+            }
+        }
+    };
+
+    using RenameInCreateQueryTransformVisitor = InDepthNodeVisitor<RenameInCreateQueryTransformMatcher, false>;
+}
+
+
+ASTPtr renameInCreateQuery(const ASTPtr & ast, const BackupRenamingConfigPtr & renaming_config, const ContextPtr & context)
+{
+    auto new_ast = ast->clone();
+    try
+    {
+        RenameInCreateQueryTransformVisitor::Data data{renaming_config, context};
+        RenameInCreateQueryTransformVisitor{data}.visit(new_ast);
+        return new_ast;
+    }
+    catch (...)
+    {
+        tryLogCurrentException("Backup", "Error while renaming in AST");
+        return ast;
+    }
+}
+
+}
--- a/src/Backups/renameInCreateQuery.h
+++ b/src/Backups/renameInCreateQuery.h
@ -0,0 +1,16 @@
+#pragma once
+
+#include <memory>
+
+namespace DB
+{
+class IAST;
+using ASTPtr = std::shared_ptr<IAST>;
+class Context;
+using ContextPtr = std::shared_ptr<const Context>;
+class BackupRenamingConfig;
+using BackupRenamingConfigPtr = std::shared_ptr<const BackupRenamingConfig>;
+
+/// Changes names in AST according to the renaming settings.
+ASTPtr renameInCreateQuery(const ASTPtr & ast, const BackupRenamingConfigPtr & renaming_config, const ContextPtr & context);
+}
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -45,6 +45,7 @@ if (COMPILER_GCC)
 endif ()

 add_subdirectory (Access)
+add_subdirectory (Backups)
 add_subdirectory (Columns)
 add_subdirectory (Common)
 add_subdirectory (Core)
@ -180,6 +181,7 @@ macro(add_object_library name common_path)
 endmacro()

 add_object_library(clickhouse_access Access)
+add_object_library(clickhouse_backups Backups)
 add_object_library(clickhouse_core Core)
 add_object_library(clickhouse_core_mysql Core/MySQL)
 add_object_library(clickhouse_compression Compression)
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -566,6 +566,17 @@
    M(595, BZIP2_STREAM_ENCODER_FAILED) \
    M(596, INTERSECT_OR_EXCEPT_RESULT_STRUCTURES_MISMATCH) \
    M(597, NO_SUCH_ERROR_CODE) \
+    M(598, BACKUP_ALREADY_EXISTS) \
+    M(599, BACKUP_NOT_FOUND) \
+    M(600, BACKUP_VERSION_NOT_SUPPORTED) \
+    M(601, BACKUP_DAMAGED) \
+    M(602, NO_BASE_BACKUP) \
+    M(603, WRONG_BASE_BACKUP) \
+    M(604, BACKUP_ENTRY_ALREADY_EXISTS) \
+    M(605, BACKUP_ENTRY_NOT_FOUND) \
+    M(606, BACKUP_IS_EMPTY) \
+    M(607, BACKUP_ELEMENT_DUPLICATE) \
+    M(608, CANNOT_RESTORE_TABLE) \
    \
    M(598, FUNCTION_ALREADY_EXISTS) \
    M(599, CANNOT_DROP_SYSTEM_FUNCTION) \
--- a/src/Common/StringUtils/StringUtils.h
+++ b/src/Common/StringUtils/StringUtils.h
@ -276,14 +276,30 @@ inline void trimLeft(std::string_view & str, char c = ' ')
        str.remove_prefix(1);
 }

+inline void trimLeft(std::string & str, char c = ' ')
+{
+    str.erase(0, str.find_first_not_of(c));
+}
+
 inline void trimRight(std::string_view & str, char c = ' ')
 {
    while (str.ends_with(c))
        str.remove_suffix(1);
 }

+inline void trimRight(std::string & str, char c = ' ')
+{
+    str.erase(str.find_last_not_of(c) + 1);
+}
+
 inline void trim(std::string_view & str, char c = ' ')
 {
    trimLeft(str, c);
    trimRight(str, c);
 }
+
+inline void trim(std::string & str, char c = ' ')
+{
+    trimRight(str, c);
+    trimLeft(str, c);
+}
--- a/src/Common/isValidUTF8.cpp
+++ b/src/Common/isValidUTF8.cpp
@ -0,0 +1,131 @@
+#include <Common/isValidUTF8.h>
+#include <cstring>
+
+/// inspired by https://github.com/cyb70289/utf8/
+
+/*
+MIT License
+
+Copyright (c) 2019 Yibo Cai
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
+
+/*
+* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
+*
+* Table 3-7. Well-Formed UTF-8 Byte Sequences
+*
+* +--------------------+------------+-------------+------------+-------------+
+* | Code Points        | First Byte | Second Byte | Third Byte | Fourth Byte |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+0000..U+007F     | 00..7F     |             |            |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+0080..U+07FF     | C2..DF     | 80..BF      |            |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+0800..U+0FFF     | E0         | A0..BF      | 80..BF     |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+1000..U+CFFF     | E1..EC     | 80..BF      | 80..BF     |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+D000..U+D7FF     | ED         | 80..9F      | 80..BF     |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+E000..U+FFFF     | EE..EF     | 80..BF      | 80..BF     |             |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+10000..U+3FFFF   | F0         | 90..BF      | 80..BF     | 80..BF      |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+40000..U+FFFFF   | F1..F3     | 80..BF      | 80..BF     | 80..BF      |
+* +--------------------+------------+-------------+------------+-------------+
+* | U+100000..U+10FFFF | F4         | 80..8F      | 80..BF     | 80..BF      |
+* +--------------------+------------+-------------+------------+-------------+
+*/
+namespace DB
+{
+
+namespace UTF8
+{
+
+UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
+{
+    while (len)
+    {
+        int bytes;
+        const UInt8 byte1 = data[0];
+        /* 00..7F */
+        if (byte1 <= 0x7F)
+        {
+            bytes = 1;
+        }
+        /* C2..DF, 80..BF */
+        else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
+        {
+            bytes = 2;
+        }
+        else if (len >= 3)
+        {
+            const UInt8 byte2 = data[1];
+            bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
+            bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
+
+            if (byte2_ok && byte3_ok &&
+                /* E0, A0..BF, 80..BF */
+                ((byte1 == 0xE0 && byte2 >= 0xA0) ||
+                 /* E1..EC, 80..BF, 80..BF */
+                 (byte1 >= 0xE1 && byte1 <= 0xEC) ||
+                 /* ED, 80..9F, 80..BF */
+                 (byte1 == 0xED && byte2 <= 0x9F) ||
+                 /* EE..EF, 80..BF, 80..BF */
+                 (byte1 >= 0xEE && byte1 <= 0xEF)))
+            {
+                bytes = 3;
+            }
+            else if (len >= 4)
+            {
+                bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
+                if (byte2_ok && byte3_ok && byte4_ok &&
+                    /* F0, 90..BF, 80..BF, 80..BF */
+                    ((byte1 == 0xF0 && byte2 >= 0x90) ||
+                     /* F1..F3, 80..BF, 80..BF, 80..BF */
+                     (byte1 >= 0xF1 && byte1 <= 0xF3) ||
+                     /* F4, 80..8F, 80..BF, 80..BF */
+                     (byte1 == 0xF4 && byte2 <= 0x8F)))
+                {
+                    bytes = 4;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                return false;
+            }
+        }
+        else
+        {
+            return false;
+        }
+        len -= bytes;
+        data += bytes;
+    }
+    return true;
+}
+
+}
+}
--- a/src/Common/isValidUTF8.h
+++ b/src/Common/isValidUTF8.h
@ -0,0 +1,10 @@
+#pragma once
+
+#include <common/types.h>
+
+namespace DB::UTF8
+{
+
+UInt8 isValidUTF8(const UInt8 * data, UInt64 len);
+
+}
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@ -116,6 +116,7 @@ SRCS(
    hasLinuxCapability.cpp
    hex.cpp
    isLocalAddress.cpp
+    isValidUTF8.cpp
    malloc.cpp
    memory.cpp
    new_delete.cpp
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -28,12 +28,6 @@ namespace ErrorCodes
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }

-template <typename T, typename SFINAE = void>
-struct NearestFieldTypeImpl;
-
-template <typename T>
-using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
-
 class Field;
 using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;

@ -168,6 +162,12 @@ template <> constexpr inline bool is_decimal_field<DecimalField<Decimal64>> = tr
 template <> constexpr inline bool is_decimal_field<DecimalField<Decimal128>> = true;
 template <> constexpr inline bool is_decimal_field<DecimalField<Decimal256>> = true;

+template <typename T, typename SFINAE = void>
+struct NearestFieldTypeImpl;
+
+template <typename T>
+using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
+
 /// char may be signed or unsigned, and behave identically to signed char or unsigned char,
 ///  but they are always three different types.
 /// signedness of char is different in Linux on x86 and Linux on ARM.
@ -230,6 +230,16 @@ struct NearestFieldTypeImpl<T, std::enable_if_t<std::is_enum_v<T>>>
    using Type = NearestFieldType<std::underlying_type_t<T>>;
 };

+template <typename T>
+decltype(auto) castToNearestFieldType(T && x)
+{
+    using U = NearestFieldType<std::decay_t<T>>;
+    if constexpr (std::is_same_v<std::decay_t<T>, U>)
+        return std::forward<T>(x);
+    else
+        return U(x);
+}
+
 /** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
  * NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
  */
@ -322,9 +332,10 @@ public:

    /// Templates to avoid ambiguity.
    template <typename T, typename Z = void *>
-    using enable_if_not_field_or_stringlike_t = std::enable_if_t<
-        !std::is_same_v<std::decay_t<T>, Field>
-        && !std::is_same_v<NearestFieldType<std::decay_t<T>>, String>, Z>;
+    using enable_if_not_field_or_bool_or_stringlike_t = std::enable_if_t<
+        !std::is_same_v<std::decay_t<T>, Field> &&
+        !std::is_same_v<std::decay_t<T>, bool> &&
+        !std::is_same_v<NearestFieldType<std::decay_t<T>>, String>, Z>;

    Field() //-V730
        : which(Types::Null)
@ -345,7 +356,9 @@ public:
    }

    template <typename T>
-    Field(T && rhs, enable_if_not_field_or_stringlike_t<T> = nullptr);
+    Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T> = nullptr);
+
+    Field(bool rhs) : Field(castToNearestFieldType(rhs)) {}

    /// Create a string inplace.
    Field(const std::string_view & str) { create(str.data(), str.size()); }
@ -395,9 +408,11 @@ public:
    /// 1. float <--> int needs explicit cast
    /// 2. customized types needs explicit cast
    template <typename T>
-    enable_if_not_field_or_stringlike_t<T, Field> &
+    enable_if_not_field_or_bool_or_stringlike_t<T, Field> &
    operator=(T && rhs);

+    Field & operator= (bool rhs) { return *this = castToNearestFieldType(rhs); }
+
    Field & operator= (const std::string_view & str);
    Field & operator= (const String & str) { return *this = std::string_view{str}; }
    Field & operator= (String && str);
@ -876,24 +891,14 @@ template <> inline constexpr const char * TypeName<AggregateFunctionStateData> =


 template <typename T>
-decltype(auto) castToNearestFieldType(T && x)
-{
-    using U = NearestFieldType<std::decay_t<T>>;
-    if constexpr (std::is_same_v<std::decay_t<T>, U>)
-        return std::forward<T>(x);
-    else
-        return U(x);
-}
-
-template <typename T>
-Field::Field(T && rhs, enable_if_not_field_or_stringlike_t<T>) //-V730
+Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t<T>) //-V730
 {
    auto && val = castToNearestFieldType(std::forward<T>(rhs));
    createConcrete(std::forward<decltype(val)>(val));
 }

 template <typename T>
-Field::enable_if_not_field_or_stringlike_t<T, Field> &
+Field::enable_if_not_field_or_bool_or_stringlike_t<T, Field> &
 Field::operator=(T && rhs)
 {
    auto && val = castToNearestFieldType(std::forward<T>(rhs));
@ -908,7 +913,6 @@ Field::operator=(T && rhs)
    return *this;
 }

-
 inline Field & Field::operator=(const std::string_view & str)
 {
    if (which != Types::String)
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -120,7 +120,7 @@ class IColumn;
    M(UInt64, parallel_replicas_count, 0, "", 0) \
    M(UInt64, parallel_replica_offset, 0, "", 0) \
    \
-    M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
+    M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
    \
    M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
    M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \
@ -157,8 +157,8 @@ class IColumn;
    M(UInt64, min_bytes_to_use_mmap_io, 0, "The minimum number of bytes for reading the data with mmap option during SELECT queries execution. 0 - disabled.", 0) \
    M(Bool, checksum_on_read, true, "Validate checksums on reading. It is enabled by default and should be always enabled in production. Please do not expect any benefits in disabling this setting. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network.", 0) \
    \
-    M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
-    M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
+    M(Bool, force_index_by_date, false, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
+    M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
    M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
    \
    M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
@ -171,8 +171,8 @@ class IColumn;
    M(UInt64, priority, 0, "Priority of the query. 1 - the highest, higher value - lower priority; 0 - do not use priorities.", 0) \
    M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
    \
-    M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
-    M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
+    M(Bool, log_queries, true, "Log requests and write the log to the system table.", 0) \
+    M(Bool, log_formatted_queries, false, "Log formatted queries and write the log to the system table.", 0) \
    M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
    M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log/query_views_log.", 0) \
    M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
@ -198,10 +198,10 @@ class IColumn;
    \
    M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \
    \
-    M(Bool, enable_http_compression, 0, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \
+    M(Bool, enable_http_compression, false, "Compress the result if the client over HTTP said that it understands data compressed by gzip or deflate.", 0) \
    M(Int64, http_zlib_compression_level, 3, "Compression level - used if the client on HTTP said that it understands data compressed by gzip or deflate.", 0) \
    \
-    M(Bool, http_native_compression_disable_checksumming_on_decompress, 0, "If you uncompress the POST data from the client compressed by the native format, do not check the checksum.", 0) \
+    M(Bool, http_native_compression_disable_checksumming_on_decompress, false, "If you uncompress the POST data from the client compressed by the native format, do not check the checksum.", 0) \
    \
    M(String, count_distinct_implementation, "uniqExact", "What aggregate function to use for implementation of count(DISTINCT ...)", 0) \
    \
@ -215,9 +215,9 @@ class IColumn;
    \
    M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \
    \
-    M(Bool, fsync_metadata, 1, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
+    M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
    \
-    M(Bool, join_use_nulls, 0, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
+    M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
    \
    M(JoinStrictness, join_default_strictness, JoinStrictness::ALL, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
    M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
@ -225,7 +225,7 @@ class IColumn;
    M(UInt64, preferred_block_size_bytes, 1000000, "", 0) \
    \
    M(UInt64, max_replica_delay_for_distributed_queries, 300, "If set, distributed queries of Replicated tables will choose servers with replication delay in seconds less than the specified value (not inclusive). Zero means do not take delay into account.", 0) \
-    M(Bool, fallback_to_stale_replicas_for_distributed_queries, 1, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \
+    M(Bool, fallback_to_stale_replicas_for_distributed_queries, true, "Suppose max_replica_delay_for_distributed_queries is set and all replicas for the queried table are stale. If this setting is enabled, the query will be performed anyway, otherwise the error will be reported.", 0) \
    M(UInt64, preferred_max_column_in_block_size_bytes, 0, "Limit on max column size in block while reading. Helps to decrease cache misses count. Should be close to L2 cache size.", 0) \
    \
    M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
@ -241,7 +241,7 @@ class IColumn;
    /** Settings for testing connection collector */ \
    M(Milliseconds, sleep_in_receive_cancel_ms, 0, "Time to sleep in receiving cancel in TCPHandler", 0) \
    \
-    M(Bool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
+    M(Bool, insert_allow_materialized_columns, false, "If setting is enabled, Allow materialized columns in INSERT.", 0) \
    M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
    M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
    M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \
@ -354,24 +354,26 @@ class IColumn;
    M(UInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.", 0)\
    M(UInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.", 0) \
    \
+    M(UInt64, max_backup_threads, 0, "The maximum number of threads to execute a BACKUP or RESTORE request. By default, it is determined automatically.", 0) \
+    \
    M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
    M(Bool, log_query_settings, true, "Log query settings into the query_log.", 0) \
    M(Bool, log_query_threads, true, "Log query threads into system.query_thread_log table. This setting have effect only when 'log_queries' is true.", 0) \
    M(Bool, log_query_views, true, "Log query dependent views into system.query_views_log table. This setting have effect only when 'log_queries' is true.", 0) \
    M(String, log_comment, "", "Log comment into system.query_log table and server log. It can be set to arbitrary string no longer than max_query_size.", 0) \
    M(LogsLevel, send_logs_level, LogsLevel::fatal, "Send server text logs with specified minimum level to client. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \
-    M(Bool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.", 0) \
-    M(Bool, enable_optimize_predicate_expression_to_final_subquery, 1, "Allow push predicate to final subquery.", 0) \
-    M(Bool, allow_push_predicate_when_subquery_contains_with, 1, "Allows push predicate when subquery contains WITH clause", 0) \
+    M(Bool, enable_optimize_predicate_expression, true, "If it is set to true, optimize predicates to subqueries.", 0) \
+    M(Bool, enable_optimize_predicate_expression_to_final_subquery, true, "Allow push predicate to final subquery.", 0) \
+    M(Bool, allow_push_predicate_when_subquery_contains_with, true, "Allows push predicate when subquery contains WITH clause", 0) \
    \
    M(UInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.", 0) \
    M(Bool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.", 0) \
    M(Bool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \
    \
-    M(Bool, prefer_localhost_replica, 1, "1 - always send query to local replica, if it exists. 0 - choose replica to send query between local and remote ones according to load_balancing", 0) \
+    M(Bool, prefer_localhost_replica, true, "If it's true then queries will be always sent to local replica (if it exists). If it's false then replica to send a query will be chosen between local and remote ones according to load_balancing", 0) \
    M(UInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.", 0) \
    M(UInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, "Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in user profile. Note that content is parsed and external tables are created in memory before start of query execution. And this is the only limit that has effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).", 0) \
-    M(Bool, calculate_text_stack_trace, 1, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \
+    M(Bool, calculate_text_stack_trace, true, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.", 0) \
    M(Bool, allow_ddl, true, "If it is set to true, then a user is allowed to executed DDL queries.", 0) \
    M(Bool, parallel_view_processing, false, "Enables pushing to attached views concurrently instead of sequentially.", 0) \
    M(Bool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
@ -518,8 +520,8 @@ class IColumn;

 #define FORMAT_FACTORY_SETTINGS(M) \
    M(Char, format_csv_delimiter, ',', "The character to be considered as a delimiter in CSV data. If setting with a string, a string has to have a length of 1.", 0) \
-    M(Bool, format_csv_allow_single_quotes, 1, "If it is set to true, allow strings in single quotes.", 0) \
-    M(Bool, format_csv_allow_double_quotes, 1, "If it is set to true, allow strings in double quotes.", 0) \
+    M(Bool, format_csv_allow_single_quotes, true, "If it is set to true, allow strings in single quotes.", 0) \
+    M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
    M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
    M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
    M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \
--- a/src/Core/tests/gtest_move_field.cpp
+++ b/src/Core/tests/gtest_move_field.cpp
@ -3,6 +3,40 @@

 using namespace DB;

+GTEST_TEST(Field, FromBool)
+{
+    {
+        Field f{false};
+        ASSERT_EQ(f.getType(), Field::Types::UInt64);
+        ASSERT_EQ(f.get<UInt64>(), 0);
+        ASSERT_EQ(f.get<bool>(), false);
+    }
+
+    {
+        Field f{true};
+        ASSERT_EQ(f.getType(), Field::Types::UInt64);
+        ASSERT_EQ(f.get<UInt64>(), 1);
+        ASSERT_EQ(f.get<bool>(), true);
+    }
+
+    {
+        Field f;
+        f = false;
+        ASSERT_EQ(f.getType(), Field::Types::UInt64);
+        ASSERT_EQ(f.get<UInt64>(), 0);
+        ASSERT_EQ(f.get<bool>(), false);
+    }
+
+    {
+        Field f;
+        f = true;
+        ASSERT_EQ(f.getType(), Field::Types::UInt64);
+        ASSERT_EQ(f.get<UInt64>(), 1);
+        ASSERT_EQ(f.get<bool>(), true);
+    }
+}
+
+
 GTEST_TEST(Field, Move)
 {
    Field f;
--- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp
+++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp
@ -70,7 +70,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(

        // Do not deduplicate insertions into MV if the main insertion is Ok
        if (disable_deduplication_for_children)
-            insert_context->setSetting("insert_deduplicate", Field{false});
+            insert_context->setSetting("insert_deduplicate", false);

        // Separate min_insert_block_size_rows/min_insert_block_size_bytes for children
        if (insert_settings.min_insert_block_size_rows_for_materialized_views)
--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@ -403,7 +403,7 @@ void DatabaseAtomic::assertCanBeDetached(bool cleanup)
 }

 DatabaseTablesIteratorPtr
-DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name)
+DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const
 {
    auto base_iter = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name);
    return std::make_unique<AtomicDatabaseTablesSnapshotIterator>(std::move(typeid_cast<DatabaseTablesSnapshotIterator &>(*base_iter)));
--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@ -45,7 +45,7 @@ public:

    void drop(ContextPtr /*context*/) override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override;

--- a/src/Databases/DatabaseDictionary.cpp
+++ b/src/Databases/DatabaseDictionary.cpp
@ -52,7 +52,7 @@ DatabaseDictionary::DatabaseDictionary(const String & name_, ContextPtr context_
 {
 }

-Tables DatabaseDictionary::listTables(const FilterByNameFunction & filter_by_name)
+Tables DatabaseDictionary::listTables(const FilterByNameFunction & filter_by_name) const
 {
    Tables tables;
    auto load_results = getContext()->getExternalDictionariesLoader().getLoadResults(filter_by_name);
@ -77,7 +77,7 @@ StoragePtr DatabaseDictionary::tryGetTable(const String & table_name, ContextPtr
    return createStorageDictionary(getDatabaseName(), load_result, getContext());
 }

-DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name)
+DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const
 {
    return std::make_unique<DatabaseTablesSnapshotIterator>(listTables(filter_by_table_name), getDatabaseName());
 }
--- a/src/Databases/DatabaseDictionary.h
+++ b/src/Databases/DatabaseDictionary.h
@ -34,7 +34,7 @@ public:

    StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    bool empty() const override;

@ -50,7 +50,7 @@ protected:
 private:
    Poco::Logger * log;

-    Tables listTables(const FilterByNameFunction & filter_by_name);
+    Tables listTables(const FilterByNameFunction & filter_by_name) const;
 };

 }
--- a/src/Databases/DatabaseLazy.cpp
+++ b/src/Databases/DatabaseLazy.cpp
@ -143,7 +143,7 @@ StoragePtr DatabaseLazy::tryGetTable(const String & table_name) const
    return loadTable(table_name);
 }

-DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name)
+DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const
 {
    std::lock_guard lock(mutex);
    Strings filtered_tables;
@ -304,7 +304,7 @@ void DatabaseLazy::clearExpiredTables() const
 }


-DatabaseLazyIterator::DatabaseLazyIterator(DatabaseLazy & database_, Strings && table_names_)
+DatabaseLazyIterator::DatabaseLazyIterator(const DatabaseLazy & database_, Strings && table_names_)
    : IDatabaseTablesIterator(database_.database_name)
    , database(database_)
    , table_names(std::move(table_names_))
--- a/src/Databases/DatabaseLazy.h
+++ b/src/Databases/DatabaseLazy.h
@ -64,7 +64,7 @@ public:

    bool empty() const override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override;

@ -119,7 +119,7 @@ class DatabaseLazyIterator final : public IDatabaseTablesIterator
 {
 public:
    DatabaseLazyIterator(
-        DatabaseLazy & database_,
+        const DatabaseLazy & database_,
        Strings && table_names_);

    void next() override;
--- a/src/Databases/DatabaseOnDisk.h
+++ b/src/Databases/DatabaseOnDisk.h
@ -11,7 +11,6 @@ namespace DB
 {

 class Context;
-
 std::pair<String, StoragePtr> createTableFromAST(
    ASTCreateQuery ast_create_query,
    const String & database_name,
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@ -1,6 +1,7 @@
 #include <Databases/DatabasesCommon.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/Context.h>
+#include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/formatAST.h>
 #include <Storages/StorageDictionary.h>
@ -40,7 +41,7 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name, Con
    return {};
 }

-DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name)
+DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const
 {
    std::lock_guard lock(mutex);
    if (!filter_by_table_name)
--- a/src/Databases/DatabasesCommon.h
+++ b/src/Databases/DatabasesCommon.h
@ -29,7 +29,7 @@ public:

    StoragePtr detachTable(const String & table_name) override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    void shutdown() override;

--- a/src/Databases/IDatabase.h
+++ b/src/Databases/IDatabase.h
@ -137,7 +137,7 @@ public:

    /// Get an iterator that allows you to pass through all the tables.
    /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above.
-    virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) = 0;
+    virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0;

    /// Is the database empty.
    virtual bool empty() const = 0;
@ -240,6 +240,12 @@ public:
        throw Exception(getEngineName() + ": RENAME DATABASE is not supported", ErrorCodes::NOT_IMPLEMENTED);
    }

+    /// Whether the contained tables should be written to a backup.
+    virtual DatabaseTablesIteratorPtr getTablesIteratorForBackup(ContextPtr context) const
+    {
+        return getTablesIterator(context); /// By default we backup each table.
+    }
+
    /// Returns path for persistent data storage if the database supports it, empty string otherwise
    virtual String getDataPath() const { return {}; }

--- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp
@ -186,7 +186,7 @@ StoragePtr DatabaseMaterializedMySQL<Base>::tryGetTable(const String & name, Con

 template <typename Base>
 DatabaseTablesIteratorPtr
-DatabaseMaterializedMySQL<Base>::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name)
+DatabaseMaterializedMySQL<Base>::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const
 {
    if (!MaterializedMySQLSyncThread::isMySQLSyncThread())
    {
--- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h
+++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h
@ -61,7 +61,7 @@ public:

    StoragePtr tryGetTable(const String & name, ContextPtr context_) const override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override;

    void assertCalledFromSyncThreadOrDrop(const char * method) const;

--- a/src/Databases/MySQL/DatabaseMySQL.cpp
+++ b/src/Databases/MySQL/DatabaseMySQL.cpp
@ -84,7 +84,7 @@ bool DatabaseMySQL::empty() const
    return true;
 }

-DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name)
+DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name) const
 {
    Tables tables;
    std::lock_guard<std::mutex> lock(mutex);
--- a/src/Databases/MySQL/DatabaseMySQL.h
+++ b/src/Databases/MySQL/DatabaseMySQL.h
@ -57,7 +57,7 @@ public:

    bool empty() const override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    ASTPtr getCreateDatabaseQuery() const override;

--- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp
@ -203,7 +203,7 @@ void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context)


 DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator(
-        ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name)
+    ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const
 {
    /// Modify context into nested_context and pass query to Atomic database.
    return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name);
--- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h
+++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h
@ -46,7 +46,7 @@ public:
    void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override;

    DatabaseTablesIteratorPtr getTablesIterator(
-            ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) override;
+            ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override;

    StoragePtr tryGetTable(const String & name, ContextPtr context) const override;

--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@ -88,7 +88,7 @@ bool DatabasePostgreSQL::empty() const
 }


-DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */)
+DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */) const
 {
    std::lock_guard<std::mutex> lock(mutex);

--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h
@ -50,7 +50,7 @@ public:

    void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    bool isTableExist(const String & name, ContextPtr context) const override;
    StoragePtr tryGetTable(const String & name, ContextPtr context) const override;
--- a/src/Databases/SQLite/DatabaseSQLite.cpp
+++ b/src/Databases/SQLite/DatabaseSQLite.cpp
@ -44,7 +44,7 @@ bool DatabaseSQLite::empty() const
 }


-DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &)
+DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &) const
 {
    std::lock_guard<std::mutex> lock(mutex);

--- a/src/Databases/SQLite/DatabaseSQLite.h
+++ b/src/Databases/SQLite/DatabaseSQLite.h
@ -34,7 +34,7 @@ public:

    StoragePtr tryGetTable(const String & name, ContextPtr context) const override;

-    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) override;
+    DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;

    bool empty() const override;

--- a/src/Dictionaries/ClickHouseDictionarySource.cpp
+++ b/src/Dictionaries/ClickHouseDictionarySource.cpp
@ -256,8 +256,8 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
        if (configuration.is_local)
        {
            /// Start local session in case when the dictionary is loaded in-process (without TCP communication).
-            local_session = std::make_shared<Session>(global_context, ClientInfo::Interface::TCP);
-            local_session->authenticate(configuration.user, configuration.password, Poco::Net::SocketAddress{"127.0.0.1", 0});
+            local_session = std::make_shared<Session>(global_context, ClientInfo::Interface::LOCAL);
+            local_session->authenticate(configuration.user, configuration.password, {});
            context = local_session->makeQueryContext();
            context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix));
        }
--- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp
+++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp
@ -297,7 +297,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
        /** Currently parallel parsing input format cannot read exactly max_block_size rows from input,
         *  so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof.
         */
-        context->setSetting("input_format_parallel_parsing", Field{false});
+        context->setSetting("input_format_parallel_parsing", false);

        String settings_config_prefix = config_prefix + ".executable_pool";

--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@ -6,25 +6,37 @@
 #include <bitset>
 #include <random>
 #include <utility>
-#include <IO/ReadBufferFromString.h>
-#include <Interpreters/Context.h>
-#include <IO/ReadBufferFromS3.h>
+
+#include <boost/algorithm/string.hpp>
+
+#include <common/unit.h>
+
+#include <Common/checkStackSize.h>
+#include <Common/createHardLink.h>
+#include <Common/quoteString.h>
+#include <Common/thread_local_rng.h>
+
 #include <Disks/ReadIndirectBufferFromRemoteFS.h>
 #include <Disks/WriteIndirectBufferFromRemoteFS.h>
+
+#include <Interpreters/Context.h>
+
+#include <IO/ReadBufferFromS3.h>
+#include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/SeekAvoidingReadBuffer.h>
 #include <IO/WriteBufferFromS3.h>
 #include <IO/WriteHelpers.h>
-#include <Common/createHardLink.h>
-#include <Common/quoteString.h>
-#include <Common/thread_local_rng.h>
-#include <Common/checkStackSize.h>
-#include <boost/algorithm/string.hpp>
+
 #include <aws/s3/model/CopyObjectRequest.h> // Y_IGNORE
 #include <aws/s3/model/DeleteObjectsRequest.h> // Y_IGNORE
 #include <aws/s3/model/GetObjectRequest.h> // Y_IGNORE
 #include <aws/s3/model/ListObjectsV2Request.h> // Y_IGNORE
 #include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
+#include <aws/s3/model/CreateMultipartUploadRequest.h> // Y_IGNORE
+#include <aws/s3/model/CompleteMultipartUploadRequest.h> // Y_IGNORE
+#include <aws/s3/model/UploadPartCopyRequest.h> // Y_IGNORE
+#include <aws/s3/model/AbortMultipartUploadRequest.h> // Y_IGNORE


 namespace DB
@ -388,16 +400,7 @@ void DiskS3::saveSchemaVersion(const int & version)

 void DiskS3::updateObjectMetadata(const String & key, const ObjectMetadata & metadata)
 {
-    auto settings = current_settings.get();
-    Aws::S3::Model::CopyObjectRequest request;
-    request.SetCopySource(bucket + "/" + key);
-    request.SetBucket(bucket);
-    request.SetKey(key);
-    request.SetMetadata(metadata);
-    request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
-
-    auto outcome = settings->client->CopyObject(request);
-    throwIfError(outcome);
+    copyObjectImpl(bucket, key, bucket, key, std::nullopt, metadata);
 }

 void DiskS3::migrateFileToRestorableSchema(const String & path)
@ -553,18 +556,124 @@ void DiskS3::listObjects(const String & source_bucket, const String & source_pat
    } while (outcome.GetResult().GetIsTruncated());
 }

-void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const
+void DiskS3::copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+    std::optional<Aws::S3::Model::HeadObjectResult> head) const
+{
+    if (head && (head->GetContentLength() >= static_cast<Int64>(5_GiB)))
+        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head);
+    else
+        copyObjectImpl(src_bucket, src_key, dst_bucket, dst_key);
+}
+
+void DiskS3::copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+    std::optional<Aws::S3::Model::HeadObjectResult> head,
+    std::optional<std::reference_wrapper<const ObjectMetadata>> metadata) const
 {
    auto settings = current_settings.get();
    Aws::S3::Model::CopyObjectRequest request;
    request.SetCopySource(src_bucket + "/" + src_key);
    request.SetBucket(dst_bucket);
    request.SetKey(dst_key);
+    if (metadata)
+    {
+        request.SetMetadata(*metadata);
+        request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE);
+    }

    auto outcome = settings->client->CopyObject(request);
+
+    if (!outcome.IsSuccess() && outcome.GetError().GetExceptionName() == "EntityTooLarge")
+    { // Can't come here with MinIO, MinIO allows single part upload for large objects.
+        copyObjectMultipartImpl(src_bucket, src_key, dst_bucket, dst_key, head, metadata);
+        return;
+    }
+
    throwIfError(outcome);
 }

+void DiskS3::copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+    std::optional<Aws::S3::Model::HeadObjectResult> head,
+    std::optional<std::reference_wrapper<const ObjectMetadata>> metadata) const
+{
+    LOG_DEBUG(log, "Multipart copy upload has created. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, Metadata: {}",
+        src_bucket, src_key, dst_bucket, dst_key, metadata ? "REPLACE" : "NOT_SET");
+
+    auto settings = current_settings.get();
+
+    if (!head)
+        head = headObject(src_bucket, src_key);
+
+    size_t size = head->GetContentLength();
+
+    String multipart_upload_id;
+
+    {
+        Aws::S3::Model::CreateMultipartUploadRequest request;
+        request.SetBucket(dst_bucket);
+        request.SetKey(dst_key);
+        if (metadata)
+            request.SetMetadata(*metadata);
+
+        auto outcome = settings->client->CreateMultipartUpload(request);
+
+        throwIfError(outcome);
+
+        multipart_upload_id = outcome.GetResult().GetUploadId();
+    }
+
+    std::vector<String> part_tags;
+
+    size_t upload_part_size = settings->s3_min_upload_part_size;
+    for (size_t position = 0, part_number = 1; position < size; ++part_number, position += upload_part_size)
+    {
+        Aws::S3::Model::UploadPartCopyRequest part_request;
+        part_request.SetCopySource(src_bucket + "/" + src_key);
+        part_request.SetBucket(dst_bucket);
+        part_request.SetKey(dst_key);
+        part_request.SetUploadId(multipart_upload_id);
+        part_request.SetPartNumber(part_number);
+        part_request.SetCopySourceRange(fmt::format("bytes={}-{}", position, std::min(size, position + upload_part_size) - 1));
+
+        auto outcome = settings->client->UploadPartCopy(part_request);
+        if (!outcome.IsSuccess())
+        {
+            Aws::S3::Model::AbortMultipartUploadRequest abort_request;
+            abort_request.SetBucket(dst_bucket);
+            abort_request.SetKey(dst_key);
+            abort_request.SetUploadId(multipart_upload_id);
+            settings->client->AbortMultipartUpload(abort_request);
+            // In error case we throw exception later with first error from UploadPartCopy
+        }
+        throwIfError(outcome);
+
+        auto etag = outcome.GetResult().GetCopyPartResult().GetETag();
+        part_tags.push_back(etag);
+    }
+
+    {
+        Aws::S3::Model::CompleteMultipartUploadRequest req;
+        req.SetBucket(dst_bucket);
+        req.SetKey(dst_key);
+        req.SetUploadId(multipart_upload_id);
+
+        Aws::S3::Model::CompletedMultipartUpload multipart_upload;
+        for (size_t i = 0; i < part_tags.size(); ++i)
+        {
+            Aws::S3::Model::CompletedPart part;
+            multipart_upload.AddParts(part.WithETag(part_tags[i]).WithPartNumber(i + 1));
+        }
+
+        req.SetMultipartUpload(multipart_upload);
+
+        auto outcome = settings->client->CompleteMultipartUpload(req);
+
+        throwIfError(outcome);
+
+        LOG_DEBUG(log, "Multipart copy upload has completed. Src Bucket: {}, Src Key: {}, Dst Bucket: {}, Dst Key: {}, "
+            "Upload_id: {}, Parts: {}", src_bucket, src_key, dst_bucket, dst_key, multipart_upload_id, part_tags.size());
+    }
+}
+
 struct DiskS3::RestoreInformation
 {
    UInt64 revision = LATEST_REVISION;
@ -757,7 +866,7 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so

        /// Copy object if we restore to different bucket / path.
        if (bucket != source_bucket || remote_fs_root_path != source_path)
-            copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key);
+            copyObject(source_bucket, key, bucket, remote_fs_root_path + relative_key, head_result);

        metadata.addObject(relative_key, head_result.GetContentLength());
        metadata.save();
--- a/src/Disks/S3/DiskS3.h
+++ b/src/Disks/S3/DiskS3.h
@ -7,6 +7,7 @@
 #if USE_AWS_S3

 #include <atomic>
+#include <optional>
 #include <common/logger_useful.h>
 #include "Disks/DiskFactory.h"
 #include "Disks/Executor.h"
@ -131,7 +132,15 @@ private:

    Aws::S3::Model::HeadObjectResult headObject(const String & source_bucket, const String & key) const;
    void listObjects(const String & source_bucket, const String & source_path, std::function<bool(const Aws::S3::Model::ListObjectsV2Result &)> callback) const;
-    void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key) const;
+    void copyObject(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+        std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt) const;
+
+    void copyObjectImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+        std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
+        std::optional<std::reference_wrapper<const ObjectMetadata>> metadata = std::nullopt) const;
+    void copyObjectMultipartImpl(const String & src_bucket, const String & src_key, const String & dst_bucket, const String & dst_key,
+        std::optional<Aws::S3::Model::HeadObjectResult> head = std::nullopt,
+        std::optional<std::reference_wrapper<const ObjectMetadata>> metadata = std::nullopt) const;

    /// Restore S3 metadata files on file system.
    void restore();
--- a/src/Disks/TemporaryFileOnDisk.cpp
+++ b/src/Disks/TemporaryFileOnDisk.cpp
@ -0,0 +1,27 @@
+#include <Disks/TemporaryFileOnDisk.h>
+#include <Disks/IDisk.h>
+#include <Poco/TemporaryFile.h>
+
+
+namespace DB
+{
+
+TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_)
+    : disk(disk_)
+{
+    String dummy_prefix = "a/";
+    filepath = Poco::TemporaryFile::tempName(dummy_prefix);
+    dummy_prefix += "tmp";
+    assert(filepath.starts_with(dummy_prefix));
+    filepath.replace(0, dummy_prefix.length(), prefix_);
+}
+
+TemporaryFileOnDisk::~TemporaryFileOnDisk()
+{
+#if 1
+    if (disk && !filepath.empty())
+        disk->removeRecursive(filepath);
+#endif
+}
+
+}
--- a/src/Disks/TemporaryFileOnDisk.h
+++ b/src/Disks/TemporaryFileOnDisk.h
@ -0,0 +1,29 @@
+#pragma once
+
+#include <Core/Types.h>
+#include <memory>
+
+namespace DB
+{
+class IDisk;
+using DiskPtr = std::shared_ptr<IDisk>;
+
+/// This class helps with the handling of temporary files or directories.
+/// A unique name for the temporary file or directory is automatically chosen based on a specified prefix.
+/// Optionally can create a directory in the constructor.
+/// The destructor always removes the temporary file or directory with all contained files.
+class TemporaryFileOnDisk
+{
+public:
+    TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_ = "tmp");
+    ~TemporaryFileOnDisk();
+
+    DiskPtr getDisk() const { return disk; }
+    const String & getPath() const { return filepath; }
+
+private:
+    DiskPtr disk;
+    String filepath;
+};
+
+}
--- a/src/Formats/MySQLSource.cpp
+++ b/src/Formats/MySQLSource.cpp
@ -100,12 +100,12 @@ void MySQLWithFailoverSource::onStart()
        catch (const mysqlxx::ConnectionLost & ecl)  /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
        {
            LOG_WARNING(log, "Failed connection ({}/{}). Trying to reconnect... (Info: {})", count_connect_attempts, settings->default_num_tries_on_connection_loss, ecl.displayText());
-        }

-        if (++count_connect_attempts > settings->default_num_tries_on_connection_loss)
-        {
-            LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss);
-            throw;
+            if (++count_connect_attempts > settings->default_num_tries_on_connection_loss)
+            {
+                LOG_ERROR(log, "Failed to create connection to MySQL. ({}/{})", count_connect_attempts, settings->default_num_tries_on_connection_loss);
+                throw;
+            }
        }
    }

--- a/src/Functions/Regexps.h
+++ b/src/Functions/Regexps.h
@ -61,7 +61,7 @@ namespace Regexps
    template <bool like, bool no_capture, bool case_insensitive = false>
    inline Pool::Pointer get(const std::string & pattern)
    {
-        /// C++11 has thread-safe function-local statics on most modern compilers.
+        /// C++11 has thread-safe function-local static on most modern compilers.
        static Pool known_regexps; /// Different variables for different pattern parameters.

        return known_regexps.get(pattern, [&pattern]
@ -257,7 +257,7 @@ namespace MultiRegexps
    template <bool save_indices, bool CompileForEditDistance>
    inline Regexps * get(const std::vector<StringRef> & patterns, std::optional<UInt32> edit_distance)
    {
-        /// C++11 has thread-safe function-local statics on most modern compilers.
+        /// C++11 has thread-safe function-local static on most modern compilers.
        static Pool known_regexps; /// Different variables for different pattern parameters.

        std::vector<String> str_patterns;
--- a/src/Functions/isValidUTF8.cpp
+++ b/src/Functions/isValidUTF8.cpp
@ -1,14 +1,7 @@
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionStringOrArrayToT.h>
-
-#include <cstring>
-
-#ifdef __SSE4_1__
-#    include <emmintrin.h>
-#    include <smmintrin.h>
-#    include <tmmintrin.h>
-#endif
+#include <Common/isValidUTF8.h>

 namespace DB
 {
@ -71,75 +64,8 @@ SOFTWARE.
 * +--------------------+------------+-------------+------------+-------------+
 */

-    static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len)
-    {
-        while (len)
-        {
-            int bytes;
-            const UInt8 byte1 = data[0];
-            /* 00..7F */
-            if (byte1 <= 0x7F)
-            {
-                bytes = 1;
-            }
-            /* C2..DF, 80..BF */
-            else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
-            {
-                bytes = 2;
-            }
-            else if (len >= 3)
-            {
-                const UInt8 byte2 = data[1];
-                bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
-                bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
-
-                if (byte2_ok && byte3_ok &&
-                    /* E0, A0..BF, 80..BF */
-                    ((byte1 == 0xE0 && byte2 >= 0xA0) ||
-                     /* E1..EC, 80..BF, 80..BF */
-                     (byte1 >= 0xE1 && byte1 <= 0xEC) ||
-                     /* ED, 80..9F, 80..BF */
-                     (byte1 == 0xED && byte2 <= 0x9F) ||
-                     /* EE..EF, 80..BF, 80..BF */
-                     (byte1 >= 0xEE && byte1 <= 0xEF)))
-                {
-                    bytes = 3;
-                }
-                else if (len >= 4)
-                {
-                    bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
-                    if (byte2_ok && byte3_ok && byte4_ok &&
-                        /* F0, 90..BF, 80..BF, 80..BF */
-                        ((byte1 == 0xF0 && byte2 >= 0x90) ||
-                         /* F1..F3, 80..BF, 80..BF, 80..BF */
-                         (byte1 >= 0xF1 && byte1 <= 0xF3) ||
-                         /* F4, 80..8F, 80..BF, 80..BF */
-                         (byte1 == 0xF4 && byte2 <= 0x8F)))
-                    {
-                        bytes = 4;
-                    }
-                    else
-                    {
-                        return false;
-                    }
-                }
-                else
-                {
-                    return false;
-                }
-            }
-            else
-            {
-                return false;
-            }
-            len -= bytes;
-            data += bytes;
-        }
-        return true;
-    }
-
 #ifndef __SSE4_1__
-    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
+    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return DB::UTF8::isValidUTF8(data, len); }
 #else
    static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
    {
--- a/src/Functions/pointInPolygon.cpp
+++ b/src/Functions/pointInPolygon.cpp
@ -188,7 +188,7 @@ public:
            /// Preprocessing can be computationally heavy but dramatically speeds up matching.

            using Pool = ObjectPoolMap<PointInConstPolygonImpl, UInt128>;
-            /// C++11 has thread-safe function-local statics.
+            /// C++11 has thread-safe function-local static.
            static Pool known_polygons;

            auto factory = [&polygon]()
--- a/src/Functions/stringToH3.cpp
+++ b/src/Functions/stringToH3.cpp
@ -85,7 +85,7 @@ private:
        {
            auto h3index = h3index_source.getWhole();

-            // covert to std::string and get the c_str to have the delimiting \0 at the end.
+            // convert to std::string and get the c_str to have the delimiting \0 at the end.
            auto h3index_str = StringRef(h3index.data, h3index.size).toString();
            res_data[row_num] = stringToH3(h3index_str.c_str());

--- a/src/IO/ConcatReadBuffer.h
+++ b/src/IO/ConcatReadBuffer.h
@ -17,6 +17,7 @@ public:

 protected:
    ReadBuffers buffers;
+    bool own_buffers = false;
    ReadBuffers::iterator current;

    bool nextImpl() override
@ -61,7 +62,34 @@ public:
        assert(!buffers.empty());
    }

-    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ConcatReadBuffer({&buf1, &buf2}) {}
+    ConcatReadBuffer(ReadBuffer & buf1, ReadBuffer & buf2) : ConcatReadBuffer(ReadBuffers{&buf1, &buf2}) {}
+
+    ConcatReadBuffer(std::vector<std::unique_ptr<ReadBuffer>> buffers_) : ReadBuffer(nullptr, 0)
+    {
+        own_buffers = true;
+        buffers.reserve(buffers_.size());
+        for (auto & buffer : buffers_)
+            buffers.emplace_back(buffer.release());
+        current = buffers.begin();
+    }
+
+    ConcatReadBuffer(std::unique_ptr<ReadBuffer> buf1, std::unique_ptr<ReadBuffer> buf2) : ReadBuffer(nullptr, 0)
+    {
+        own_buffers = true;
+        buffers.reserve(2);
+        buffers.emplace_back(buf1.release());
+        buffers.emplace_back(buf2.release());
+        current = buffers.begin();
+    }
+
+    ~ConcatReadBuffer() override
+    {
+        if (own_buffers)
+        {
+            for (auto * buffer : buffers)
+                delete buffer;
+        }
+    }
 };

 }
--- a/Show More
+++ b/Show More