Merge branch 'master' into traverse_shadow_remote_data_paths

2024-11-14 03:25:15 +00:00 · 2024-03-12 15:18:52 +01:00 · 2024-03-12 15:18:52 +01:00 · 3257bfa918
commit 3257bfa918
parent e59564a9d9 19b1a675a1
178 changed files with 1369 additions and 9319 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
    # set CPU time limit to 1000 seconds
    set (RLIMIT_CPU 1000)

-    # -fsanitize=memory is too heavy
-    if (SANITIZE STREQUAL "memory")
+    # -fsanitize=memory and address are too heavy
+    if (SANITIZE)
       set (RLIMIT_DATA 10000000000) # 10G
    endif()

--- a/docker/packager/README.md
+++ b/docker/packager/README.md
@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root        10  clickhouse-benchmark -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-clang -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-client -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-compressor -> clickhouse
-lrwxrwxrwx 1 root root        10  clickhouse-copier -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-extract-from-config -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-format -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-lld -> clickhouse
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m
 ENV UBSAN_OPTIONS='print_stacktrace=1'
 ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'

+# for external_symbolizer_path
+RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
+
 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
 ENV LC_ALL en_US.UTF-8

--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -14,7 +14,6 @@ RUN apt-get update \
        libclang-${LLVM_VERSION}-dev \
        libclang-rt-${LLVM_VERSION}-dev \
        lld-${LLVM_VERSION} \
-        llvm-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
        lsof \
        ninja-build \
@ -37,8 +36,6 @@ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3

 # This symlink is required by gcc to find the lld linker
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
-# for external_symbolizer_path
-RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
 # FIXME: workaround for "The imported target "merge-fdata" references the file" error
 # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
 RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -343,7 +343,7 @@ quit
        # which is confusing.
        task_exit_code=$fuzzer_exit_code
        echo "failure" > status.txt
-        echo "Achtung!" > description.txt
+        echo "Let op!" > description.txt
        echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt
        { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt
    fi
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -26,6 +26,8 @@ RUN apt-get update \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
        /etc/apt/sources.list \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -170,7 +170,7 @@ RESTORE TABLE test.table PARTITIONS '2', '3'

 ### Backups as tar archives

-Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. 
+Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.

 Write a backup as a tar:
 ```
@ -444,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe

 Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective.

-### clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
-
 For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.

 ### Manipulations with Parts {#manipulations-with-parts}
--- a/docs/en/operations/system-tables/metrics.md
+++ b/docs/en/operations/system-tables/metrics.md
@ -513,10 +513,6 @@ Part was moved to another disk and should be deleted in own destructor.

 Not active data part with identity refcounter, it is deleting right now by a cleaner.

-### PartsInMemory
-
-In-memory parts.
-
 ### PartsOutdated

 Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.
--- a/docs/en/operations/utilities/clickhouse-copier.md
+++ b/docs/en/operations/utilities/clickhouse-copier.md
@ -1,187 +0,0 @@
---
-slug: /en/operations/utilities/clickhouse-copier
-sidebar_position: 59
-sidebar_label: clickhouse-copier
---
-
-# clickhouse-copier 
-
-Copies data from the tables in one cluster to tables in another (or the same) cluster.
-
-:::note    
-To get a consistent copy, the data in the source tables and partitions should not change during the entire process.
-:::
-
-You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes.
-
-After starting, `clickhouse-copier`:
-
- Connects to ClickHouse Keeper and receives:
-
-    - Copying jobs.
-    - The state of the copying jobs.
-
- It performs the jobs.
-
-    Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary.
-
-`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly.
-
-To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located.
-
-## Running Clickhouse-copier {#running-clickhouse-copier}
-
-The utility should be run manually:
-
-``` bash
-$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-Parameters:
-
- `daemon` — Starts `clickhouse-copier` in daemon mode.
- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper.
- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`.
- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper.
- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false.
- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_<PID>` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched.
-
-## Format of keeper.xml {#format-of-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## Configuration of Copying Tasks {#configuration-of-copying-tasks}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-            <!--
-                source cluster & destination clusters accept exactly the same
-                parameters as parameters for the usual Distributed table
-                see https://clickhouse.com/docs/en/engines/table-engines/special/distributed/
-            -->
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-                        <!--
-                        <user>default</user>
-                        <password>default</password>
-                        <secure>1</secure>
-                        -->
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@ -2,13 +2,11 @@
 slug: /en/operations/utilities/
 sidebar_position: 56
 sidebar_label: List of tools and utilities
-pagination_next: 'en/operations/utilities/clickhouse-copier'
 ---

 # List of tools and utilities

 - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.
 - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings.
 - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries.
 - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data.
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@ -433,3 +433,292 @@ Result:
 │ [0,1,2,3,4,5,6,7] │
 └───────────────────┘
 ```
+
+## mortonEncode
+
+Calculates the Morton encoding (ZCurve) for a list of unsigned integers.
+
+The function has two modes of operation:
+- Simple
+- Expanded
+
+### Simple mode
+
+Accepts up to 8 unsigned integers as arguments and produces a UInt64 code.
+
+**Syntax**
+
+```sql
+mortonEncode(args)
+```
+
+**Parameters**
+
+- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+
+**Returned value**
+
+- A UInt64 code
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonEncode(1, 2, 3);
+```
+Result:
+
+```response
+53
+```
+
+### Expanded mode
+
+Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
+
+Each number in the mask configures the amount of range expansion:<br/>
+1 - no expansion<br/>
+2 - 2x expansion<br/>
+3 - 3x expansion<br/>
+...<br/>
+Up to 8x expansion.<br/>
+
+**Syntax**
+
+```sql
+mortonEncode(range_mask, args)
+```
+
+**Parameters**
+- `range_mask`: 1-8.
+- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+
+Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. 
+
+**Returned value**
+
+- A UInt64 code
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+
+**Example**
+
+Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
+For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
+
+Query:
+
+```sql
+SELECT mortonEncode((1,2), 1024, 16);
+```
+
+Result:
+
+```response
+1572864
+```
+
+Note: tuple size must be equal to the number of the other arguments.
+
+**Example**
+
+Morton encoding for one argument is always the argument itself:
+
+Query:
+
+```sql
+SELECT mortonEncode(1);
+```
+
+Result:
+
+```response
+1
+```
+
+**Example**
+
+It is also possible to expand one argument too:
+
+Query:
+
+```sql
+SELECT mortonEncode(tuple(2), 128);
+```
+
+Result:
+
+```response
+32768
+```
+
+**Example**
+
+You can also use column names in the function.
+
+Query:
+
+First create the table and insert some data.
+
+```sql
+create table morton_numbers(
+    n1 UInt32,
+    n2 UInt32,
+    n3 UInt16,
+    n4 UInt16,
+    n5 UInt8,
+    n6 UInt8,
+    n7 UInt8,
+    n8 UInt8
+)
+Engine=MergeTree()
+ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
+insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
+```
+Use column names instead of constants as function arguments to `mortonEncode`
+
+Query:
+
+```sql
+SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers;
+```
+
+Result:
+
+```response
+2155374165
+```
+
+**implementation details**
+
+Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
+
+## mortonDecode
+
+Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple.
+
+As with the `mortonEncode` function, this function has two modes of operation:
+- Simple
+- Expanded
+
+### Simple mode
+
+Accepts a resulting tuple size as the first argument and the code as the second argument.
+
+**Syntax**
+
+```sql
+mortonDecode(tuple_size, code)
+```
+
+**Parameters**
+- `tuple_size`: integer value no more than 8.
+- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
+
+**Returned value**
+
+- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonDecode(3, 53);
+```
+
+Result:
+
+```response
+["1","2","3"]
+```
+
+### Expanded mode
+
+Accepts a range mask (tuple) as a first argument and the code as the second argument.
+Each number in the mask configures the amount of range shrink:<br/>
+1 - no shrink<br/>
+2 - 2x shrink<br/> 
+3 - 3x shrink<br/>
+...<br/>
+Up to 8x shrink.<br/>
+
+Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
+For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
+As with the encode function, this is limited to 8 numbers at most.
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonDecode(1, 1);
+```
+
+Result:
+
+```response
+["1"]
+```
+
+**Example**
+
+It is also possible to shrink one argument:
+
+Query:
+
+```sql
+SELECT mortonDecode(tuple(2), 32768);
+```
+
+Result:
+
+```response
+["128"]
+```
+
+**Example**
+
+You can also use column names in the function.
+
+First create the table and insert some data.
+
+Query:
+```sql
+create table morton_numbers(
+    n1 UInt32,
+    n2 UInt32,
+    n3 UInt16,
+    n4 UInt16,
+    n5 UInt8,
+    n6 UInt8,
+    n7 UInt8,
+    n8 UInt8
+)
+Engine=MergeTree()
+ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
+insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
+```
+Use column names instead of constants as function arguments to `mortonDecode`
+
+Query:
+
+```sql
+select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers;
+```
+
+Result:
+
+```response
+1	2	3	4	5	6	7	8
+```
+
+
+
+
--- a/docs/en/sql-reference/functions/geo/polygon.md
+++ b/docs/en/sql-reference/functions/geo/polygon.md
@ -53,6 +53,62 @@ String starting with `POLYGON`

 Polygon

+## readWKTPoint
+
+The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format.
+
+### Syntax
+
+```sql
+readWKTPoint(wkt_string)
+```
+
+### Arguments
+
+- `wkt_string`: The input WKT string representing a Point geometry.
+
+### Returned value
+
+The function returns a ClickHouse internal representation of the Point geometry.
+
+### Example
+
+```sql
+SELECT readWKTPoint('POINT (1.2 3.4)');
+```
+
+```response
+(1.2,3.4)
+```
+
+## readWKTRing
+
+Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
+
+### Syntax
+
+```sql
+readWKTRing(wkt_string)
+```
+
+### Arguments
+
+- `wkt_string`: The input WKT string representing a Polygon geometry.
+
+### Returned value
+
+The function returns a ClickHouse internal representation of the ring (closed linestring) geometry.
+
+### Example
+
+```sql
+SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)');
+```
+
+```response
+[(1,1),(2,2),(3,3),(1,1)]
+```
+
 ## polygonsWithinSpherical

 Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -335,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest

 There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`).

-If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
+If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table.

 The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.

--- a/docs/ru/getting-started/tutorial.md
+++ b/docs/ru/getting-started/tutorial.md
@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
 INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
 ```

-:::danger Внимание!
-Этот подход не годится для сегментирования больших таблиц. Есть инструмент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), специально предназначенный для перераспределения любых больших таблиц.
-:::
-
 Как и следовало ожидать, вычислительно сложные запросы работают втрое быстрее, если они выполняются на трёх серверах, а не на одном.

 В данном случае мы использовали кластер из трёх сегментов с одной репликой для каждого.
--- a/docs/ru/operations/backup.md
+++ b/docs/ru/operations/backup.md
@ -24,10 +24,6 @@ sidebar_label: "Резервное копирование данных"

 Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным.

-## clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse.
-
 Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы.

 ## Манипуляции с партициями {#manipuliatsii-s-partitsiiami}
--- a/docs/ru/operations/utilities/clickhouse-copier.md
+++ b/docs/ru/operations/utilities/clickhouse-copier.md
@ -1,183 +0,0 @@
---
-slug: /ru/operations/utilities/clickhouse-copier
-sidebar_position: 59
-sidebar_label: clickhouse-copier
---
-
-# clickhouse-copier {#clickhouse-copier}
-
-Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера.
-
-Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper.
-
-После запуска, `clickhouse-copier`:
-
-   Соединяется с ZooKeeper и получает:
-
-    -   Задания на копирование.
-    -   Состояние заданий на копирование.
-
-   Выполняет задания.
-
-        Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их.
-
-`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их «на лету».
-
-Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные.
-
-## Запуск Clickhouse-copier {#zapusk-clickhouse-copier}
-
-Утилиту следует запускать вручную следующим образом:
-
-``` bash
-$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-Параметры запуска:
-
-   `daemon` - запускает `clickhouse-copier` в режиме демона.
-   `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper.
-   `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`.
-   `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper.
-   `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено.
-   `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_<PID>`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен.
-
-## Формат Zookeeper.xml {#format-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-		    <!--
-                source cluster & destination clusters accept exactly the same
-                parameters as parameters for the usual Distributed table
-                see https://clickhouse.com/docs/ru/engines/table-engines/special/distributed/
-            -->
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-						<!--
-                        <user>default</user>
-                        <password>default</password>
-                        <secure>1</secure>
-                        -->
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.
--- a/docs/ru/operations/utilities/index.md
+++ b/docs/ru/operations/utilities/index.md
@ -7,7 +7,6 @@ sidebar_position: 56
 # Утилиты ClickHouse {#utility-clickhouse}

 -   [clickhouse-local](clickhouse-local.md) - позволяет выполнять SQL-запросы над данными без остановки сервера ClickHouse, подобно утилите `awk`.
-   [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой.
 -   [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов.
 -   [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволяет форматировать входящие запросы.
 -   [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name

 Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно.

-**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. 
+**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.

 Пример:

@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;

 Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).

-Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
+Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу.

 Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.

--- a/docs/zh/getting-started/tutorial.md
+++ b/docs/zh/getting-started/tutorial.md
@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
 INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
 ```

-!!! warning "注意:"
-    这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。

 正如您所期望的那样，如果计算量大的查询使用3台服务器而不是一个，则运行速度快N倍。

--- a/docs/zh/operations/backup.md
+++ b/docs/zh/operations/backup.md
@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"

 某些本地文件系统提供快照功能（例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)），但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本，并将它们与用于`SELECT` 查询的 [分布式](../engines/table-engines/special/distributed.md) 表分离。 任何修改数据的查询都无法访问此类副本上的快照。 作为回报，这些副本可能具有特殊的硬件配置，每个服务器附加更多的磁盘，这将是经济高效的。

-## clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具，最初创建它是为了用于重新切分pb大小的表。 因为它能够在ClickHouse表和集群之间可靠地复制数据，所以它也可用于备份和还原数据。
-
-对于较小的数据量，一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。
-
 ## part操作 {#manipulations-with-parts}

 ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的，所以它通常不会因为旧数据而占用额外的磁盘空间。 创建的文件副本不由ClickHouse服务器处理，所以你可以把它们留在那里：你将有一个简单的备份，不需要任何额外的外部系统，但它仍然容易出现硬件问题。 出于这个原因，最好将它们远程复制到另一个位置，然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择，但是具有足够大容量的正常附加文件服务器也可以工作（在这种情况下，传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) 来进行).
--- a/docs/zh/operations/utilities/clickhouse-copier.md
+++ b/docs/zh/operations/utilities/clickhouse-copier.md
@ -1,172 +0,0 @@
---
-slug: /zh/operations/utilities/clickhouse-copier
---
-# clickhouse-copier {#clickhouse-copier}
-
-将数据从一个群集中的表复制到另一个（或相同）群集中的表。
-
-您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。
-
-开始后, `clickhouse-copier`:
-
-   连接到ZooKeeper并且接收:
-
-    -   复制作业。
-    -   复制作业的状态。
-
-   它执行的工作。
-
-        每个正在运行的进程都会选择源集群的“最接近”分片，然后将数据复制到目标集群，并在必要时重新分片数据。
-
-`clickhouse-copier` 跟踪ZooKeeper中的更改，并实时应用它们。
-
-为了减少网络流量，我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。
-
-## 运行Clickhouse-copier {#running-clickhouse-copier}
-
-该实用程序应手动运行:
-
-``` bash
-clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-参数:
-
-   `daemon` — 在守护进程模式下启动`clickhouse-copier`。
-   `config` — `zookeeper.xml`文件的路径，其中包含用于连接ZooKeeper的参数。
-   `task-path` — ZooKeeper节点的路径。 该节点用于同步`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。
-   `task-file` — 可选的非必须参数, 指定一个包含任务配置的参数文件, 用于初始上传到ZooKeeper。
-   `task-upload-force` — 即使节点已经存在，也强制上载`task-file`。
-   `base-dir` — 日志和辅助文件的路径。 启动时，`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_<PID>`子目录。 如果省略此参数，则会在启动`clickhouse-copier`的目录中创建目录。
-
-
-
-## Zookeeper.xml格式 {#format-of-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## 复制任务的配置 {#configuration-of-copying-tasks}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如，如果你改变的值 `max_workers`，运行任务的进程数也会发生变化。
--- a/docs/zh/operations/utilities/index.md
+++ b/docs/zh/operations/utilities/index.md
@ -4,5 +4,4 @@ slug: /zh/operations/utilities/
 # 实用工具 {#clickhouse-utility}

 -   [本地查询](clickhouse-local.md) —  在不停止ClickHouse服务的情况下，对数据执行查询操作(类似于 `awk` 命令)。
-   [跨集群复制](clickhouse-copier.md) —  在不同集群间复制数据。
 -   [性能测试](clickhouse-benchmark.md) — 连接到Clickhouse服务器，执行性能测试。
--- a/docs/zh/sql-reference/statements/alter.md
+++ b/docs/zh/sql-reference/statements/alter.md
@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 不支持对primary key或者sampling key中的列（在 `ENGINE` 表达式中用到的列）进行删除操作。改变包含在primary key中的列的类型时，如果操作不会导致数据的变化（例如，往Enum中添加一个值，或者将`DateTime` 类型改成 `UInt32`），那么这种操作是可行的。

-如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
+如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。

 `ALTER` 操作会阻塞对表的所有读写操作。换句话说，当一个大的 `SELECT` 语句和 `ALTER`同时执行时，`ALTER`会等待，直到 `SELECT` 执行结束。与此同时，当 `ALTER` 运行时，新的 sql 语句将会等待。

--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@ -50,8 +50,6 @@ contents:
  dst: /etc/init.d/clickhouse-server
 - src: clickhouse-server.service
  dst: /lib/systemd/system/clickhouse-server.service
- src: root/usr/bin/clickhouse-copier
-  dst: /usr/bin/clickhouse-copier
 - src: root/usr/bin/clickhouse-server
  dst: /usr/bin/clickhouse-server
 # clickhouse-keeper part
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -122,7 +122,6 @@ add_subdirectory (local)
 add_subdirectory (benchmark)
 add_subdirectory (extract-from-config)
 add_subdirectory (compressor)
-add_subdirectory (copier)
 add_subdirectory (format)
 add_subdirectory (obfuscator)
 add_subdirectory (install)
@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server)
 clickhouse_program_install(clickhouse-client client chc)
 clickhouse_program_install(clickhouse-local local chl ch)
 clickhouse_program_install(clickhouse-benchmark benchmark)
-clickhouse_program_install(clickhouse-copier copier)
 clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
 clickhouse_program_install(clickhouse-compressor compressor)
 clickhouse_program_install(clickhouse-format format)
--- a/programs/copier/Aliases.h
+++ b/programs/copier/Aliases.h
@ -1,15 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <utility>
-
-namespace DB
-{
-    using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
-
-    using DatabaseAndTableName = std::pair<String, String>;
-    using ListOfDatabasesAndTableNames = std::vector<DatabaseAndTableName>;
-}
--- a/programs/copier/CMakeLists.txt
+++ b/programs/copier/CMakeLists.txt
@ -1,28 +0,0 @@
-set(CLICKHOUSE_COPIER_SOURCES
-        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")
-
-set (CLICKHOUSE_COPIER_LINK
-        PRIVATE
-                clickhouse_common_zookeeper
-                clickhouse_common_config
-                clickhouse_parsers
-                clickhouse_functions
-                clickhouse_table_functions
-                clickhouse_aggregate_functions
-                string_utils
-
-        PUBLIC
-                daemon
-)
-
-set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-
-clickhouse_program_add(copier)
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
--- a/programs/copier/ClusterCopier.h
+++ b/programs/copier/ClusterCopier.h
@ -1,240 +0,0 @@
-#pragma once
-
-#include "Aliases.h"
-#include "Internals.h"
-#include "TaskCluster.h"
-#include "TaskShard.h"
-#include "TaskTable.h"
-#include "ShardPartition.h"
-#include "ShardPartitionPiece.h"
-#include "ZooKeeperStaff.h"
-
-
-namespace DB
-{
-
-class ClusterCopier : WithMutableContext
-{
-public:
-    ClusterCopier(const String & task_path_,
-                  const String & host_id_,
-                  const String & proxy_database_name_,
-                  ContextMutablePtr context_,
-                  LoggerRawPtr log_)
-            : WithMutableContext(context_),
-            task_zookeeper_path(task_path_),
-            host_id(host_id_),
-            working_database_name(proxy_database_name_),
-            log(log_) {}
-
-    void init();
-
-    template <typename T>
-    decltype(auto) retry(T && func, UInt64 max_tries = 100);
-
-    void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard);
-
-    /// Compute set of partitions, assume set of partitions aren't changed during the processing
-    void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0);
-
-    void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force);
-
-    void reloadTaskDescription();
-
-    void updateConfigIfNeeded();
-
-    void process(const ConnectionTimeouts & timeouts);
-
-    /// Disables DROP PARTITION commands that used to clear data after errors
-    void setSafeMode(bool is_safe_mode_ = true)
-    {
-        is_safe_mode = is_safe_mode_;
-    }
-
-    void setCopyFaultProbability(double copy_fault_probability_)
-    {
-        copy_fault_probability = copy_fault_probability_;
-    }
-
-    void setMoveFaultProbability(double move_fault_probability_)
-    {
-        move_fault_probability = move_fault_probability_;
-    }
-
-    void setExperimentalUseSampleOffset(bool value)
-    {
-        experimental_use_sample_offset = value;
-    }
-
-    void setMaxTableTries(UInt64 tries)
-    {
-        max_table_tries = tries;
-    }
-    void setMaxShardPartitionTries(UInt64 tries)
-    {
-        max_shard_partition_tries = tries;
-    }
-    void setMaxShardPartitionPieceTriesForAlter(UInt64 tries)
-    {
-        max_shard_partition_piece_tries_for_alter = tries;
-    }
-    void setRetryDelayMs(std::chrono::milliseconds ms)
-    {
-        retry_delay_ms = ms;
-    }
-
-protected:
-
-    String getWorkersPath() const
-    {
-        return task_cluster->task_zookeeper_path + "/task_active_workers";
-    }
-
-    String getWorkersPathVersion() const
-    {
-        return getWorkersPath() + "_version";
-    }
-
-    String getCurrentWorkerNodePath() const
-    {
-        return getWorkersPath() + "/" + host_id;
-    }
-
-    zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(
-            const zkutil::ZooKeeperPtr & zookeeper,
-            const String & description,
-            bool unprioritized);
-
-    /*
-     * Checks that partition piece or some other entity is clean.
-     * The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function.
-     * And is_dirty_flag_path is a parent of is_dirty_cleaned_path.
-     * */
-    static bool checkPartitionPieceIsClean(
-            const zkutil::ZooKeeperPtr & zookeeper,
-            const CleanStateClock & clean_state_clock,
-            const String & task_status_path);
-
-    bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition);
-
-    /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock.
-     * State of some task could change during the processing.
-     * We have to ensure that all shards have the finished state and there is no dirty flag.
-     * Moreover, we have to check status twice and check zxid, because state can change during the checking.
-     */
-
-    /* The same as function above
-     * Assume that we don't know on which shards do we have partition certain piece.
-     * We'll check them all (I mean shards that contain the whole partition)
-     * And shards that don't have certain piece MUST mark that piece is_done true.
-     * */
-    bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
-                                   size_t piece_number, const TasksShard & shards_with_partition);
-
-
-    /*Alter successful insertion to helping tables it will move all pieces to destination table*/
-    TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name);
-
-    /// Removes MATERIALIZED and ALIAS columns from create table query
-    static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns);
-
-    bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number,
-            const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock);
-
-    bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
-
-    TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
-    /// Job for copying partition from particular shard.
-    TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts,
-                                       ShardPartition & task_partition,
-                                       bool is_unprioritized_task);
-
-    TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts,
-                                                  ShardPartition & task_partition,
-                                                  bool is_unprioritized_task);
-
-    TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts,
-                                             ShardPartition & task_partition,
-                                             size_t current_piece_number,
-                                             bool is_unprioritized_task);
-
-    void dropAndCreateLocalTable(const ASTPtr & create_ast);
-
-    void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const;
-
-    void dropHelpingTables(const TaskTable & task_table);
-
-    void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number);
-
-    /// Is used for usage less disk space.
-    /// After all pieces were successfully moved to original destination
-    /// table we can get rid of partition pieces (partitions in helping tables).
-    void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name);
-
-    String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings);
-
-    ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
-
-    /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it.
-    void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true);
-
-    std::set<String> getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
-
-    bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name);
-
-    bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts,
-             TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number);
-
-    /*
-     * This class is used in executeQueryOnCluster function
-     * You can execute query on each shard (no sense it is executed on each replica of a shard or not)
-     * or you can execute query on each replica on each shard.
-     * First mode is useful for INSERTS queries.
-     * */
-    enum ClusterExecutionMode
-    {
-        ON_EACH_SHARD,
-        ON_EACH_NODE
-    };
-
-    /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
-      * Returns number of shards for which at least one replica executed query successfully
-      */
-    UInt64 executeQueryOnCluster(
-            const ClusterPtr & cluster,
-            const String & query,
-            const Settings & current_settings,
-            ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const;
-
-private:
-    String task_zookeeper_path;
-    String task_description_path;
-    String host_id;
-    String working_database_name;
-
-    /// Auto update config stuff
-    UInt64 task_description_current_version = 1;
-    std::atomic<UInt64> task_description_version{1};
-    Coordination::WatchCallback task_description_watch_callback;
-    /// ZooKeeper session used to set the callback
-    zkutil::ZooKeeperPtr task_description_watch_zookeeper;
-
-    ConfigurationPtr task_cluster_initial_config;
-    ConfigurationPtr task_cluster_current_config;
-
-    std::unique_ptr<TaskCluster> task_cluster;
-
-    bool is_safe_mode = false;
-    double copy_fault_probability = 0.0;
-    double move_fault_probability = 0.0;
-
-    bool experimental_use_sample_offset{false};
-
-    LoggerRawPtr log;
-
-    UInt64 max_table_tries = 3;
-    UInt64 max_shard_partition_tries = 3;
-    UInt64 max_shard_partition_piece_tries_for_alter = 10;
-    std::chrono::milliseconds retry_delay_ms{1000};
-};
-}
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -1,252 +0,0 @@
-#include "ClusterCopierApp.h"
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/StatusFile.h>
-#include <Common/TerminalSize.h>
-#include <Databases/registerDatabases.h>
-#include <IO/ConnectionTimeouts.h>
-#include <Interpreters/registerInterpreters.h>
-#include <Formats/registerFormats.h>
-#include <Common/scope_guard_safe.h>
-#include <unistd.h>
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace DB
-{
-
-/// ClusterCopierApp
-
-void ClusterCopierApp::initialize(Poco::Util::Application & self)
-{
-    is_help = config().has("help");
-    if (is_help)
-        return;
-
-    config_xml_path = config().getString("config-file");
-    task_path = config().getString("task-path");
-    log_level = config().getString("log-level", "info");
-    is_safe_mode = config().has("safe-mode");
-    is_status_mode = config().has("status");
-    if (config().has("copy-fault-probability"))
-        copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0);
-    if (config().has("move-fault-probability"))
-        move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0);
-    base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string();
-
-    max_table_tries = std::max<size_t>(config().getUInt("max-table-tries", 3), 1);
-    max_shard_partition_tries = std::max<size_t>(config().getUInt("max-shard-partition-tries", 3), 1);
-    max_shard_partition_piece_tries_for_alter = std::max<size_t>(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1);
-    retry_delay_ms = std::chrono::milliseconds(std::max<size_t>(config().getUInt("retry-delay-ms", 1000), 100));
-
-    if (config().has("experimental-use-sample-offset"))
-        experimental_use_sample_offset = config().getBool("experimental-use-sample-offset");
-
-    // process_id is '<hostname>#<start_timestamp>_<pid>'
-    time_t timestamp = Poco::Timestamp().epochTime();
-    auto curr_pid = Poco::Process::id();
-
-    process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
-    host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id;
-    process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id));
-    fs::create_directories(process_path);
-
-    /// Override variables for BaseDaemon
-    if (config().has("log-level"))
-        config().setString("logger.level", config().getString("log-level"));
-
-    if (config().has("base-dir") || !config().has("logger.log"))
-        config().setString("logger.log", fs::path(process_path) / "log.log");
-
-    if (config().has("base-dir") || !config().has("logger.errorlog"))
-        config().setString("logger.errorlog", fs::path(process_path) / "log.err.log");
-
-    Base::initialize(self);
-}
-
-
-void ClusterCopierApp::handleHelp(const std::string &, const std::string &)
-{
-    uint16_t terminal_width = 0;
-    if (isatty(STDIN_FILENO))
-        terminal_width = getTerminalWidth();
-
-    Poco::Util::HelpFormatter help_formatter(options());
-    if (terminal_width)
-        help_formatter.setWidth(terminal_width);
-    help_formatter.setCommand(commandName());
-    help_formatter.setHeader("Copies tables from one cluster to another");
-    help_formatter.setUsage("--config-file <config-file> --task-path <task-path>");
-    help_formatter.format(std::cerr);
-    help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/");
-
-    stopOptionsProcessing();
-}
-
-
-void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
-{
-    Base::defineOptions(options);
-
-    options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
-                          .argument("task-path").binding("task-path"));
-    options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
-                          .argument("task-file").binding("task-file"));
-    options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.")
-                          .argument("task-upload-force").binding("task-upload-force"));
-    options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
-                          .binding("safe-mode"));
-    options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
-                          .argument("copy-fault-probability").binding("copy-fault-probability"));
-    options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)")
-                              .argument("move-fault-probability").binding("move-fault-probability"));
-    options.addOption(Poco::Util::Option("log-level", "", "sets log level")
-                          .argument("log-level").binding("log-level"));
-    options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories")
-                          .argument("base-dir").binding("base-dir"));
-    options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k")
-                          .argument("experimental-use-sample-offset").binding("experimental-use-sample-offset"));
-    options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status"));
-
-    options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task")
-                          .argument("max-table-tries").binding("max-table-tries"));
-    options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task")
-                          .argument("max-shard-partition-tries").binding("max-shard-partition-tries"));
-    options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table")
-                          .argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter"));
-    options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries")
-                          .argument("retry-delay-ms").binding("retry-delay-ms"));
-
-    using Me = std::decay_t<decltype(*this)>;
-    options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help")
-                          .callback(Poco::Util::OptionCallback<Me>(this, &Me::handleHelp)));
-}
-
-
-void ClusterCopierApp::mainImpl()
-{
-    /// Status command
-    {
-        if (is_status_mode)
-        {
-            SharedContextHolder shared_context = Context::createShared();
-            auto context = Context::createGlobal(shared_context.get());
-            context->makeGlobalContext();
-            SCOPE_EXIT_SAFE(context->shutdown());
-
-            auto zookeeper = context->getZooKeeper();
-            auto status_json = zookeeper->get(task_path + "/status");
-
-            LOG_INFO(&logger(), "{}", status_json);
-            std::cout << status_json << std::endl;
-
-            context->resetZooKeeper();
-            return;
-        }
-    }
-    StatusFile status_file(process_path + "/status", StatusFile::write_full_info);
-    ThreadStatus thread_status;
-
-    auto * log = &logger();
-    LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision());
-
-    SharedContextHolder shared_context = Context::createShared();
-    auto context = Context::createGlobal(shared_context.get());
-    context->makeGlobalContext();
-    SCOPE_EXIT_SAFE(context->shutdown());
-
-    context->setConfig(loaded_config.configuration);
-    context->setApplicationType(Context::ApplicationType::LOCAL);
-    context->setPath(process_path + "/");
-
-    registerInterpreters();
-    registerFunctions();
-    registerAggregateFunctions();
-    registerTableFunctions();
-    registerDatabases();
-    registerStorages();
-    registerDictionaries();
-    registerDisks(/* global_skip_access_check= */ true);
-    registerFormats();
-
-    static const std::string default_database = "_local";
-    DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, context));
-    context->setCurrentDatabase(default_database);
-
-    /// Disable queries logging, since:
-    /// - There are bits that is not allowed for global context, like adding factories info (for the query_log)
-    /// - And anyway it is useless for copier.
-    context->setSetting("log_queries", false);
-
-    auto local_context = Context::createCopy(context);
-
-    /// Initialize query scope just in case.
-    CurrentThread::QueryScope query_scope(local_context);
-
-    auto copier = std::make_unique<ClusterCopier>(
-        task_path, host_id, default_database, local_context, log);
-    copier->setSafeMode(is_safe_mode);
-    copier->setCopyFaultProbability(copy_fault_probability);
-    copier->setMoveFaultProbability(move_fault_probability);
-    copier->setMaxTableTries(max_table_tries);
-    copier->setMaxShardPartitionTries(max_shard_partition_tries);
-    copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter);
-    copier->setRetryDelayMs(retry_delay_ms);
-    copier->setExperimentalUseSampleOffset(experimental_use_sample_offset);
-
-    auto task_file = config().getString("task-file", "");
-    if (!task_file.empty())
-        copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
-
-    zkutil::validateZooKeeperConfig(config());
-
-    copier->init();
-    copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));
-
-    /// Reset ZooKeeper before removing ClusterCopier.
-    /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object.
-    context->resetZooKeeper();
-}
-
-
-int ClusterCopierApp::main(const std::vector<std::string> &)
-{
-    if (is_help)
-        return 0;
-
-    try
-    {
-        mainImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__);
-        auto code = getCurrentExceptionCode();
-
-        return (code) ? code : -1;
-    }
-
-    return 0;
-}
-
-
-}
-
-#pragma GCC diagnostic ignored "-Wunused-function"
-#pragma GCC diagnostic ignored "-Wmissing-declarations"
-
-int mainEntryClickHouseClusterCopier(int argc, char ** argv)
-{
-    try
-    {
-        DB::ClusterCopierApp app;
-        return app.run(argc, argv);
-    }
-    catch (...)
-    {
-        std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
-        auto code = DB::getCurrentExceptionCode();
-
-        return (code) ? code : -1;
-    }
-}
--- a/programs/copier/ClusterCopierApp.h
+++ b/programs/copier/ClusterCopierApp.h
@ -1,99 +0,0 @@
-#pragma once
-
-#include <Poco/Util/ServerApplication.h>
-#include <Daemon/BaseDaemon.h>
-
-#include "ClusterCopier.h"
-
-/* clickhouse cluster copier util
- * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner.
- *
- * See overview in the docs: docs/en/utils/clickhouse-copier.md
- *
- * Implementation details:
- *
- * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through
- * Distributed table (to perform data resharding). So, worker job is a partition of a source shard.
- * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job.
- *
- * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on
- * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards.
- * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition
- * should stop, after a refilling procedure should start.
- *
- * ZooKeeper task node has the following structure:
- *  /task/path_root                     - path passed in --task-path parameter
- *      /description                    - contains user-defined XML config of the task
- *      /task_active_workers            - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation
- *          /server_fqdn#PID_timestamp  - cluster-copier worker ID
- *          ...
- *      /tables             - directory with table tasks
- *      /cluster.db.table1  - directory of table_hits task
- *          /partition1     - directory for partition1
- *              /shards     - directory for source cluster shards
- *                  /1      - worker job for the first shard of partition1 of table test.hits
- *                            Contains info about current status (Active or Finished) and worker ID.
- *                  /2
- *                  ...
- *              /partition_active_workers
- *                  /1      - for each job in /shards a corresponding ephemeral node created in /partition_active_workers
- *                            It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in
- *                            /partition_active_workers).
- *                            Also, it is used to track active workers in the partition (when we need to refill the partition we do
- *                            not DROP PARTITION while there are active workers)
- *                  /2
- *                  ...
- *              /is_dirty   - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is
- *                            detected). If the node appeared workers in this partition should stop and start cleaning and refilling
- *                            partition procedure.
- *                            During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition
- *                            workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node.
- *                  /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker.
- *      /cluster.db.table2
- *          ...
- */
-
-namespace DB
-{
-
-class ClusterCopierApp : public BaseDaemon
-{
-public:
-
-    void initialize(Poco::Util::Application & self) override;
-
-    void handleHelp(const std::string &, const std::string &);
-
-    void defineOptions(Poco::Util::OptionSet & options) override;
-
-    int main(const std::vector<std::string> &) override;
-
-private:
-
-    using Base = BaseDaemon;
-
-    void mainImpl();
-
-    std::string config_xml_path;
-    std::string task_path;
-    std::string log_level = "info";
-    bool is_safe_mode = false;
-    bool is_status_mode = false;
-    double copy_fault_probability = 0.0;
-    double move_fault_probability = 0.0;
-    bool is_help = false;
-
-    UInt64 max_table_tries = 3;
-    UInt64 max_shard_partition_tries = 3;
-    UInt64 max_shard_partition_piece_tries_for_alter = 10;
-    std::chrono::milliseconds retry_delay_ms{1000};
-
-    bool experimental_use_sample_offset{false};
-
-    std::string base_dir;
-    std::string process_path;
-    std::string process_id;
-    std::string host_id;
-};
-
-}
--- a/programs/copier/ClusterPartition.h
+++ b/programs/copier/ClusterPartition.h
@ -1,22 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-#include <map>
-
-namespace DB
-{
-
-/// Contains info about all shards that contain a partition
-struct ClusterPartition
-{
-    double elapsed_time_seconds = 0;
-    UInt64 bytes_copied = 0;
-    UInt64 rows_copied = 0;
-    UInt64 blocks_copied = 0;
-
-    UInt64 total_tries = 0;
-};
-
-using ClusterPartitions = std::map<String, ClusterPartition, std::greater<>>;
-
-}
--- a/programs/copier/Internals.cpp
+++ b/programs/copier/Internals.cpp
@ -1,280 +0,0 @@
-#include "Internals.h"
-#include <Parsers/ASTFunction.h>
-#include <Parsers/ASTIdentifier.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/Transforms/SquashingChunksTransform.h>
-#include <Storages/MergeTree/MergeTreeData.h>
-#include <Storages/extractKeyExpressionList.h>
-#include <QueryPipeline/QueryPipelineBuilder.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
-
-ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data)
-{
-    std::stringstream ss(xml_data);         // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    Poco::XML::InputSource input_source{ss};
-    return {new Poco::Util::XMLConfiguration{&input_source}};
-}
-
-String getQuotedTable(const String & database, const String & table)
-{
-    if (database.empty())
-        return backQuoteIfNeed(table);
-
-    return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
-}
-
-String getQuotedTable(const DatabaseAndTableName & db_and_table)
-{
-    return getQuotedTable(db_and_table.first, db_and_table.second);
-}
-
-
-// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
-std::shared_ptr<ASTStorage> createASTStorageDistributed(
-        const String & cluster_name, const String & database, const String & table,
-        const ASTPtr & sharding_key_ast)
-{
-    auto args = std::make_shared<ASTExpressionList>();
-    args->children.emplace_back(std::make_shared<ASTLiteral>(cluster_name));
-    args->children.emplace_back(std::make_shared<ASTIdentifier>(database));
-    args->children.emplace_back(std::make_shared<ASTIdentifier>(table));
-    if (sharding_key_ast)
-        args->children.emplace_back(sharding_key_ast);
-
-    auto engine = std::make_shared<ASTFunction>();
-    engine->name = "Distributed";
-    engine->arguments = args;
-
-    auto storage = std::make_shared<ASTStorage>();
-    storage->set(storage->engine, engine);
-
-    return storage;
-}
-
-
-Block getBlockWithAllStreamData(QueryPipelineBuilder builder)
-{
-    builder.addTransform(std::make_shared<SquashingChunksTransform>(
-        builder.getHeader(),
-        std::numeric_limits<size_t>::max(),
-        std::numeric_limits<size_t>::max()));
-
-    auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder));
-    Block block;
-    PullingPipelineExecutor executor(cur_pipeline);
-    executor.pull(block);
-
-    return block;
-}
-
-bool isExtendedDefinitionStorage(const ASTPtr & storage_ast)
-{
-    const auto & storage = storage_ast->as<ASTStorage &>();
-    return storage.partition_by || storage.order_by || storage.sample_by;
-}
-
-ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
-{
-    String storage_str = queryToString(storage_ast);
-
-    const auto & storage = storage_ast->as<ASTStorage &>();
-    const auto & engine = storage.engine->as<ASTFunction &>();
-
-    if (!endsWith(engine.name, "MergeTree"))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
-    }
-
-    if (isExtendedDefinitionStorage(storage_ast))
-    {
-        if (storage.partition_by)
-            return storage.partition_by->clone();
-
-        static const char * all = "all";
-        return std::make_shared<ASTLiteral>(Field(all, strlen(all)));
-    }
-    else
-    {
-        bool is_replicated = startsWith(engine.name, "Replicated");
-        size_t min_args = is_replicated ? 3 : 1;
-
-        if (!engine.arguments)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str);
-
-        ASTPtr arguments_ast = engine.arguments->clone();
-        ASTs & arguments = arguments_ast->children;
-
-        if (arguments.size() < min_args)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str);
-
-        ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1];
-        return makeASTFunction("toYYYYMM", month_arg->clone());
-    }
-}
-
-ASTPtr extractPrimaryKey(const ASTPtr & storage_ast)
-{
-    String storage_str = queryToString(storage_ast);
-
-    const auto & storage = storage_ast->as<ASTStorage &>();
-    const auto & engine = storage.engine->as<ASTFunction &>();
-
-    if (!endsWith(engine.name, "MergeTree"))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
-    }
-
-    if (!isExtendedDefinitionStorage(storage_ast))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str);
-    }
-
-    if (storage.primary_key)
-        return storage.primary_key->clone();
-
-    return nullptr;
-}
-
-
-ASTPtr extractOrderBy(const ASTPtr & storage_ast)
-{
-    String storage_str = queryToString(storage_ast);
-
-    const auto & storage = storage_ast->as<ASTStorage &>();
-    const auto & engine = storage.engine->as<ASTFunction &>();
-
-    if (!endsWith(engine.name, "MergeTree"))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
-    }
-
-    if (!isExtendedDefinitionStorage(storage_ast))
-    {
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str);
-    }
-
-    if (storage.order_by)
-        return storage.order_by->clone();
-
-    throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty");
-}
-
-/// Wraps only identifiers with backticks.
-std::string wrapIdentifiersWithBackticks(const ASTPtr & root)
-{
-    if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(root))
-        return backQuote(identifier->name());
-
-    if (auto function = std::dynamic_pointer_cast<ASTFunction>(root))
-        return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')';
-
-    if (auto expression_list = std::dynamic_pointer_cast<ASTExpressionList>(root))
-    {
-        Names function_arguments(expression_list->children.size());
-        for (size_t i = 0; i < expression_list->children.size(); ++i)
-            function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]);
-        return boost::algorithm::join(function_arguments, ", ");
-    }
-
-    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns.");
-}
-
-
-Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
-{
-    const auto sorting_key_ast = extractOrderBy(storage_ast);
-    const auto primary_key_ast = extractPrimaryKey(storage_ast);
-
-    const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast);
-    const auto primary_key_expr_list = primary_key_ast
-                           ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone();
-
-    /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless.
-
-    size_t primary_key_size = primary_key_expr_list->children.size();
-    size_t sorting_key_size = sorting_key_expr_list->children.size();
-
-    if (primary_key_size > sorting_key_size)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: "
-                        "{} is greater than the sorting key length: {}",
-                        primary_key_size, sorting_key_size);
-
-    Names primary_key_columns;
-    NameSet primary_key_columns_set;
-
-    for (size_t i = 0; i < sorting_key_size; ++i)
-    {
-        /// Column name could be represented as a f_1(f_2(...f_n(column_name))).
-        /// Each f_i could take one or more parameters.
-        /// We will wrap identifiers with backticks to allow non-standard identifier names.
-        String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
-
-        if (i < primary_key_size)
-        {
-            String pk_column = primary_key_expr_list->children[i]->getColumnName();
-            if (pk_column != sorting_key_column)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                                "Primary key must be a prefix of the sorting key, "
-                                "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column);
-
-            if (!primary_key_columns_set.emplace(pk_column).second)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns");
-
-            primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i]));
-        }
-    }
-
-    return primary_key_columns;
-}
-
-bool isReplicatedTableEngine(const ASTPtr & storage_ast)
-{
-    const auto & storage = storage_ast->as<ASTStorage &>();
-    const auto & engine = storage.engine->as<ASTFunction &>();
-
-    if (!endsWith(engine.name, "MergeTree"))
-    {
-        String storage_str = queryToString(storage_ast);
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str);
-    }
-
-    return startsWith(engine.name, "Replicated");
-}
-
-ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random)
-{
-    ShardPriority res;
-
-    if (replicas.empty())
-        return res;
-
-    res.is_remote = 1;
-    for (const auto & replica : replicas)
-    {
-        if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front()))
-        {
-            res.is_remote = 0;
-            break;
-        }
-    }
-
-    res.hostname_difference = std::numeric_limits<size_t>::max();
-    for (const auto & replica : replicas)
-    {
-        size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name);
-        res.hostname_difference = std::min(difference, res.hostname_difference);
-    }
-
-    res.random = random;
-    return res;
-}
-
-}
--- a/programs/copier/Internals.h
+++ b/programs/copier/Internals.h
@ -1,198 +0,0 @@
-#pragma once
-
-#include <chrono>
-#include <optional>
-#include <Poco/Util/XMLConfiguration.h>
-#include <Poco/Logger.h>
-#include <Poco/ConsoleChannel.h>
-#include <Poco/FormattingChannel.h>
-#include <Poco/PatternFormatter.h>
-#include <Poco/UUIDGenerator.h>
-#include <Poco/Process.h>
-#include <Poco/FileChannel.h>
-#include <Poco/SplitterChannel.h>
-#include <Poco/Util/HelpFormatter.h>
-#include <boost/algorithm/string.hpp>
-#include <Common/logger_useful.h>
-#include <Common/ThreadPool.h>
-#include <Common/Exception.h>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/KeeperException.h>
-#include <base/getFQDNOrHostName.h>
-#include <Common/isLocalAddress.h>
-#include <Common/typeid_cast.h>
-#include <Common/ClickHouseRevision.h>
-#include <Common/formatReadable.h>
-#include <Common/DNSResolver.h>
-#include <Common/CurrentThread.h>
-#include <Common/getNumberOfPhysicalCPUCores.h>
-#include <Common/ThreadStatus.h>
-#include <Client/Connection.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/Cluster.h>
-#include <Interpreters/InterpreterFactory.h>
-#include <Interpreters/InterpreterExistsQuery.h>
-#include <Interpreters/InterpreterShowCreateQuery.h>
-#include <Interpreters/InterpreterDropQuery.h>
-#include <Interpreters/InterpreterCreateQuery.h>
-#include <Columns/ColumnString.h>
-#include <Columns/ColumnsNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/NestedUtils.h>
-#include <Parsers/ParserCreateQuery.h>
-#include <Parsers/parseQuery.h>
-#include <Parsers/ParserQuery.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Parsers/queryToString.h>
-#include <Parsers/ASTDropQuery.h>
-#include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTExpressionList.h>
-#include <Formats/FormatSettings.h>
-#include <QueryPipeline/RemoteQueryExecutor.h>
-#include <IO/ConnectionTimeouts.h>
-#include <IO/Operators.h>
-#include <IO/ReadBufferFromString.h>
-#include <IO/ReadBufferFromFile.h>
-#include <Functions/registerFunctions.h>
-#include <TableFunctions/registerTableFunctions.h>
-#include <AggregateFunctions/registerAggregateFunctions.h>
-#include <Storages/registerStorages.h>
-#include <Storages/StorageDistributed.h>
-#include <Dictionaries/registerDictionaries.h>
-#include <Disks/registerDisks.h>
-#include <Databases/DatabaseMemory.h>
-
-#include "Aliases.h"
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int LOGICAL_ERROR;
-}
-
-
-ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data);
-
-String getQuotedTable(const String & database, const String & table);
-
-String getQuotedTable(const DatabaseAndTableName & db_and_table);
-
-
-enum class TaskState
-{
-    Started = 0,
-    Finished,
-    Unknown
-};
-
-/// Used to mark status of shard partition tasks
-struct TaskStateWithOwner
-{
-    TaskStateWithOwner() = default;
-
-    TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {}
-
-    TaskState state{TaskState::Unknown};
-    String owner;
-
-    static String getData(TaskState state, const String &owner)
-    {
-        return TaskStateWithOwner(state, owner).toString();
-    }
-
-    String toString() const
-    {
-        WriteBufferFromOwnString wb;
-        wb << static_cast<UInt32>(state) << "\n" << escape << owner;
-        return wb.str();
-    }
-
-    static TaskStateWithOwner fromString(const String & data)
-    {
-        ReadBufferFromString rb(data);
-        TaskStateWithOwner res;
-        UInt32 state;
-
-        rb >> state >> "\n" >> escape >> res.owner;
-
-        if (state >= static_cast<int>(TaskState::Unknown))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data);
-
-        res.state = static_cast<TaskState>(state);
-        return res;
-    }
-};
-
-
-struct ShardPriority
-{
-    UInt8 is_remote = 1;
-    size_t hostname_difference = 0;
-    UInt8 random = 0;
-
-    static bool greaterPriority(const ShardPriority & current, const ShardPriority & other)
-    {
-        return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random)
-               < std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random);
-    }
-};
-
-/// Execution status of a task.
-/// Is used for: partition copying task status, partition piece copying task status, partition moving task status.
-enum class TaskStatus
-{
-    Active,
-    Finished,
-    Error,
-};
-
-struct MultiTransactionInfo
-{
-    int32_t code;
-    Coordination::Requests requests;
-    Coordination::Responses responses;
-};
-
-// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key])
-std::shared_ptr<ASTStorage> createASTStorageDistributed(
-        const String & cluster_name, const String & database, const String & table,
-        const ASTPtr & sharding_key_ast = nullptr);
-
-Block getBlockWithAllStreamData(QueryPipelineBuilder builder);
-
-bool isExtendedDefinitionStorage(const ASTPtr & storage_ast);
-
-ASTPtr extractPartitionKey(const ASTPtr & storage_ast);
-
-/*
-* Choosing a Primary Key that Differs from the Sorting Key
-* It is possible to specify a primary key (an expression with values that are written in the index file for each mark)
-* that is different from the sorting key (an expression for sorting the rows in data parts).
-* In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
-* This feature is helpful when using the SummingMergeTree and AggregatingMergeTree table engines.
-* In a common case when using these engines, the table has two types of columns: dimensions and measures.
-* Typical queries aggregate values of measure columns with arbitrary GROUP BY and filtering by dimensions.
-* Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key,
-* it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns
-* and this list must be frequently updated with newly added dimensions.
-* In this case it makes sense to leave only a few columns in the primary key that will provide efficient
-* range scans and add the remaining dimension columns to the sorting key tuple.
-* ALTER of the sorting key is a lightweight operation because when a new column is simultaneously added t
-* o the table and to the sorting key, existing data parts don't need to be changed.
-* Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column,
-* the data is sorted by both the old and new sorting keys at the moment of table modification.
-*
-* */
-ASTPtr extractPrimaryKey(const ASTPtr & storage_ast);
-
-ASTPtr extractOrderBy(const ASTPtr & storage_ast);
-
-Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast);
-
-bool isReplicatedTableEngine(const ASTPtr & storage_ast);
-
-ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random);
-
-}
--- a/programs/copier/ShardPartition.cpp
+++ b/programs/copier/ShardPartition.cpp
@ -1,70 +0,0 @@
-#include "ShardPartition.h"
-
-#include "TaskShard.h"
-#include "TaskTable.h"
-
-namespace DB
-{
-
-ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits)
-    : task_shard(parent)
-    , name(std::move(name_quoted_))
-{
-    pieces.reserve(number_of_splits);
-}
-
-String ShardPartition::getPartitionCleanStartPath() const
-{
-    return getPartitionPath() + "/clean_start";
-}
-
-String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const
-{
-    assert(current_piece_number < task_shard.task_table.number_of_splits);
-    return getPartitionPiecePath(current_piece_number) + "/clean_start";
-}
-
-String ShardPartition::getPartitionPath() const
-{
-    return task_shard.task_table.getPartitionPath(name);
-}
-
-String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const
-{
-    assert(current_piece_number < task_shard.task_table.number_of_splits);
-    return task_shard.task_table.getPartitionPiecePath(name, current_piece_number);
-}
-
-String ShardPartition::getShardStatusPath() const
-{
-    // schema: /<root...>/tables/<table>/<partition>/shards/<shard>
-    // e.g. /root/table_test.hits/201701/shards/1
-    return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster());
-}
-
-String ShardPartition::getPartitionShardsPath() const
-{
-    return getPartitionPath() + "/shards";
-}
-
-String ShardPartition::getPartitionActiveWorkersPath() const
-{
-    return getPartitionPath() + "/partition_active_workers";
-}
-
-String ShardPartition::getActiveWorkerPath() const
-{
-    return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster());
-}
-
-String ShardPartition::getCommonPartitionIsDirtyPath() const
-{
-    return getPartitionPath() + "/is_dirty";
-}
-
-String ShardPartition::getCommonPartitionIsCleanedPath() const
-{
-    return getCommonPartitionIsDirtyPath() + "/cleaned";
-}
-
-}
--- a/programs/copier/ShardPartition.h
+++ b/programs/copier/ShardPartition.h
@ -1,54 +0,0 @@
-#pragma once
-
-#include "ShardPartitionPiece.h"
-
-#include <base/types.h>
-
-#include <map>
-
-namespace DB
-{
-
-struct TaskShard;
-
-/// Just destination partition of a shard
-/// I don't know what this comment means.
-/// In short, when we discovered what shards contain currently processing partition,
-/// This class describes a partition (name) that is stored on the shard (parent).
-struct ShardPartition
-{
-    ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10);
-
-    String getPartitionPath() const;
-
-    String getPartitionPiecePath(size_t current_piece_number) const;
-
-    String getPartitionCleanStartPath() const;
-
-    String getPartitionPieceCleanStartPath(size_t current_piece_number) const;
-
-    String getCommonPartitionIsDirtyPath() const;
-
-    String getCommonPartitionIsCleanedPath() const;
-
-    String getPartitionActiveWorkersPath() const;
-
-    String getActiveWorkerPath() const;
-
-    String getPartitionShardsPath() const;
-
-    String getShardStatusPath() const;
-
-    /// What partition pieces are present in current shard.
-    /// FYI: Piece is a part of partition which has modulo equals to concrete constant (less than number_of_splits obliously)
-    /// For example SELECT ... from ... WHERE partition=current_partition AND cityHash64(*) == const;
-    /// Absent pieces have field is_absent_piece equals to true.
-    PartitionPieces pieces;
-
-    TaskShard & task_shard;
-    String name;
-};
-
-using TasksPartition = std::map<String, ShardPartition, std::greater<>>;
-
-}
--- a/programs/copier/ShardPartitionPiece.cpp
+++ b/programs/copier/ShardPartitionPiece.cpp
@ -1,64 +0,0 @@
-#include "ShardPartitionPiece.h"
-
-#include "ShardPartition.h"
-#include "TaskShard.h"
-
-#include <IO/WriteHelpers.h>
-
-namespace DB
-{
-
-ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_)
-    : is_absent_piece(!is_present_piece_)
-    , current_piece_number(current_piece_number_)
-    , shard_partition(parent)
-{
-}
-
-String ShardPartitionPiece::getPartitionPiecePath() const
-{
-    return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number);
-}
-
-String ShardPartitionPiece::getPartitionPieceCleanStartPath() const
-{
-    return getPartitionPiecePath() + "/clean_start";
-}
-
-String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const
-{
-    return getPartitionPiecePath() + "/is_dirty";
-}
-
-String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const
-{
-    return getPartitionPieceIsDirtyPath() + "/cleaned";
-}
-
-String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const
-{
-    return getPartitionPiecePath() + "/partition_piece_active_workers";
-}
-
-String ShardPartitionPiece::getActiveWorkerPath() const
-{
-    return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
-}
-
-/// On what shards do we have current partition.
-String ShardPartitionPiece::getPartitionPieceShardsPath() const
-{
-    return getPartitionPiecePath() + "/shards";
-}
-
-String ShardPartitionPiece::getShardStatusPath() const
-{
-    return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster());
-}
-
-String ShardPartitionPiece::getPartitionPieceCleanerPath() const
-{
-    return getPartitionPieceIsDirtyPath() + "/cleaner";
-}
-
-}
--- a/programs/copier/ShardPartitionPiece.h
+++ b/programs/copier/ShardPartitionPiece.h
@ -1,43 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-
-#include <vector>
-
-namespace DB
-{
-
-struct ShardPartition;
-
-struct ShardPartitionPiece
-{
-    ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_);
-
-    String getPartitionPiecePath() const;
-
-    String getPartitionPieceCleanStartPath() const;
-
-    String getPartitionPieceIsDirtyPath() const;
-
-    String getPartitionPieceIsCleanedPath() const;
-
-    String getPartitionPieceActiveWorkersPath() const;
-
-    String getActiveWorkerPath() const ;
-
-    /// On what shards do we have current partition.
-    String getPartitionPieceShardsPath() const;
-
-    String getShardStatusPath() const;
-
-    String getPartitionPieceCleanerPath() const;
-
-    bool is_absent_piece;
-    const size_t current_piece_number;
-
-    ShardPartition & shard_partition;
-};
-
-using PartitionPieces = std::vector<ShardPartitionPiece>;
-
-}
--- a/programs/copier/StatusAccumulator.cpp
+++ b/programs/copier/StatusAccumulator.cpp
@ -1,48 +0,0 @@
-#include "StatusAccumulator.h"
-
-#include <Poco/JSON/Parser.h>
-#include <Poco/JSON/JSON.h>
-#include <Poco/JSON/Object.h>
-#include <Poco/JSON/Stringifier.h>
-
-#include <iostream>
-
-namespace DB
-{
-
-StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json)
-{
-    Poco::JSON::Parser parser;
-    auto state = parser.parse(state_json).extract<Poco::JSON::Object::Ptr>();
-    MapPtr result_ptr = std::make_shared<Map>();
-    for (const auto & table_name : state->getNames())
-    {
-        auto table_status_json = state->getValue<String>(table_name);
-        auto table_status = parser.parse(table_status_json).extract<Poco::JSON::Object::Ptr>();
-        /// Map entry will be created if it is absent
-        auto & map_table_status = (*result_ptr)[table_name];
-        map_table_status.all_partitions_count += table_status->getValue<size_t>("all_partitions_count");
-        map_table_status.processed_partitions_count += table_status->getValue<size_t>("processed_partitions_count");
-    }
-    return result_ptr;
-}
-
-String StatusAccumulator::serializeToJSON(MapPtr statuses)
-{
-    Poco::JSON::Object result_json;
-    for (const auto & [table_name, table_status] : *statuses)
-    {
-        Poco::JSON::Object status_json;
-        status_json.set("all_partitions_count", table_status.all_partitions_count);
-        status_json.set("processed_partitions_count", table_status.processed_partitions_count);
-
-        result_json.set(table_name, status_json);
-    }
-    std::ostringstream oss;     // STYLE_CHECK_ALLOW_STD_STRING_STREAM
-    oss.exceptions(std::ios::failbit);
-    Poco::JSON::Stringifier::stringify(result_json, oss);
-    auto result = oss.str();
-    return result;
-}
-
-}
--- a/programs/copier/StatusAccumulator.h
+++ b/programs/copier/StatusAccumulator.h
@ -1,27 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-
-#include <memory>
-#include <unordered_map>
-
-namespace DB
-{
-
-class StatusAccumulator
-{
-public:
-    struct TableStatus
-    {
-        size_t all_partitions_count;
-        size_t processed_partitions_count;
-    };
-
-    using Map = std::unordered_map<String, TableStatus>;
-    using MapPtr = std::shared_ptr<Map>;
-
-    static MapPtr fromJSON(String state_json);
-    static String serializeToJSON(MapPtr statuses);
-};
-
-}
--- a/programs/copier/TaskCluster.cpp
+++ b/programs/copier/TaskCluster.cpp
@ -1,74 +0,0 @@
-#include "TaskCluster.h"
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-}
-
-TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_)
-    : task_zookeeper_path(task_zookeeper_path_)
-    , default_local_database(default_local_database_)
-{}
-
-void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key)
-{
-    String prefix = base_key.empty() ? "" : base_key + ".";
-
-    clusters_prefix = prefix + "remote_servers";
-    if (!config.has(clusters_prefix))
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix);
-
-    Poco::Util::AbstractConfiguration::Keys tables_keys;
-    config.keys(prefix + "tables", tables_keys);
-
-    for (const auto & table_key : tables_keys)
-    {
-        table_tasks.emplace_back(*this, config, prefix + "tables", table_key);
-    }
-}
-
-void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key)
-{
-    String prefix = base_key.empty() ? "" : base_key + ".";
-
-    max_workers = config.getUInt64(prefix + "max_workers");
-
-    settings_common = Settings();
-    if (config.has(prefix + "settings"))
-        settings_common.loadSettingsFromConfig(prefix + "settings", config);
-
-    settings_common.prefer_localhost_replica = false;
-
-    settings_pull = settings_common;
-    if (config.has(prefix + "settings_pull"))
-        settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config);
-
-    settings_push = settings_common;
-    if (config.has(prefix + "settings_push"))
-        settings_push.loadSettingsFromConfig(prefix + "settings_push", config);
-
-    auto set_default_value = [] (auto && setting, auto && default_value)
-    {
-        setting = setting.changed ? setting.value : default_value;
-    };
-
-    /// Override important settings
-    settings_pull.readonly = 1;
-    settings_pull.prefer_localhost_replica = false;
-    settings_push.distributed_foreground_insert = true;
-    settings_push.prefer_localhost_replica = false;
-
-    set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME);
-    set_default_value(settings_pull.max_threads, 1);
-    set_default_value(settings_pull.max_block_size, 8192UL);
-    set_default_value(settings_pull.preferred_block_size_bytes, 0);
-
-    set_default_value(settings_push.distributed_background_insert_timeout, 0);
-    set_default_value(settings_push.alter_sync, 2);
-}
-
-}
-
--- a/programs/copier/TaskCluster.h
+++ b/programs/copier/TaskCluster.h
@ -1,51 +0,0 @@
-#pragma once
-
-#include "TaskTable.h"
-
-#include <Core/Settings.h>
-#include <base/types.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <pcg_random.hpp>
-
-namespace DB
-{
-
-struct TaskCluster
-{
-    TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_);
-
-    void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
-
-    /// Set (or update) settings and max_workers param
-    void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = "");
-
-    /// Base node for all tasks. Its structure:
-    ///  workers/ - directory with active workers (amount of them is less or equal max_workers)
-    ///  description - node with task configuration
-    ///  table_table1/ - directories with per-partition copying status
-    String task_zookeeper_path;
-
-    /// Database used to create temporary Distributed tables
-    String default_local_database;
-
-    /// Limits number of simultaneous workers
-    UInt64 max_workers = 0;
-
-    /// Base settings for pull and push
-    Settings settings_common;
-    /// Settings used to fetch data
-    Settings settings_pull;
-    /// Settings used to insert data
-    Settings settings_push;
-
-    String clusters_prefix;
-
-    /// Subtasks
-    TasksTable table_tasks;
-
-    pcg64 random_engine;
-};
-
-}
--- a/programs/copier/TaskShard.cpp
+++ b/programs/copier/TaskShard.cpp
@ -1,37 +0,0 @@
-#include "TaskShard.h"
-
-#include "TaskTable.h"
-
-namespace DB
-{
-
-TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_)
-    : task_table(parent)
-    , info(info_)
-{
-    list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName());
-}
-
-UInt32 TaskShard::numberInCluster() const
-{
-    return info.shard_num;
-}
-
-UInt32 TaskShard::indexInCluster() const
-{
-    return info.shard_num - 1;
-}
-
-String DB::TaskShard::getDescription() const
-{
-    return fmt::format("N{} (having a replica {}, pull table {} of cluster {}",
-                       numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name);
-}
-
-String DB::TaskShard::getHostNameExample() const
-{
-    const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster());
-    return replicas.at(0).readableString();
-}
-
-}
--- a/programs/copier/TaskShard.h
+++ b/programs/copier/TaskShard.h
@ -1,56 +0,0 @@
-#pragma once
-
-#include "Aliases.h"
-#include "Internals.h"
-#include "ClusterPartition.h"
-#include "ShardPartition.h"
-
-
-namespace DB
-{
-
-struct TaskTable;
-
-struct TaskShard
-{
-    TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_);
-
-    TaskTable & task_table;
-
-    Cluster::ShardInfo info;
-
-    UInt32 numberInCluster() const;
-
-    UInt32 indexInCluster() const;
-
-    String getDescription() const;
-
-    String getHostNameExample() const;
-
-    /// Used to sort clusters by their proximity
-    ShardPriority priority;
-
-    /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard
-    ColumnWithTypeAndName partition_key_column;
-
-    /// There is a task for each destination partition
-    TasksPartition partition_tasks;
-
-    /// Which partitions have been checked for existence
-    /// If some partition from this lists is exists, it is in partition_tasks
-    std::set<String> checked_partitions;
-
-    /// Last CREATE TABLE query of the table of the shard
-    ASTPtr current_pull_table_create_query;
-    ASTPtr current_push_table_create_query;
-
-    /// Internal distributed tables
-    DatabaseAndTableName table_read_shard;
-    DatabaseAndTableName main_table_split_shard;
-    ListOfDatabasesAndTableNames list_of_split_tables_on_shard;
-};
-
-using TaskShardPtr = std::shared_ptr<TaskShard>;
-using TasksShard = std::vector<TaskShardPtr>;
-
-}
--- a/programs/copier/TaskTable.cpp
+++ b/programs/copier/TaskTable.cpp
@ -1,222 +0,0 @@
-#include "TaskTable.h"
-
-#include "ClusterPartition.h"
-#include "TaskCluster.h"
-
-#include <Parsers/ASTFunction.h>
-#include <Common/escapeForFileName.h>
-
-#include <boost/algorithm/string/join.hpp>
-
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int UNKNOWN_ELEMENT_IN_CONFIG;
-    extern const int LOGICAL_ERROR;
-}
-
-TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config,
-                     const String & prefix_, const String & table_key)
-        : task_cluster(parent)
-{
-    String table_prefix = prefix_ + "." + table_key + ".";
-
-    name_in_config = table_key;
-
-    number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3);
-
-    allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false);
-    allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false);
-
-    cluster_pull_name = config.getString(table_prefix + "cluster_pull");
-    cluster_push_name = config.getString(table_prefix + "cluster_push");
-
-    table_pull.first = config.getString(table_prefix + "database_pull");
-    table_pull.second = config.getString(table_prefix + "table_pull");
-
-    table_push.first = config.getString(table_prefix + "database_push");
-    table_push.second = config.getString(table_prefix + "table_push");
-
-    /// Used as node name in ZooKeeper
-    table_id = escapeForFileName(cluster_push_name)
-               + "." + escapeForFileName(table_push.first)
-               + "." + escapeForFileName(table_push.second);
-
-    engine_push_str = config.getString(table_prefix + "engine", "rand()");
-
-    {
-        ParserStorage parser_storage{ParserStorage::TABLE_ENGINE};
-        engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-        engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
-        primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
-        is_replicated_table = isReplicatedTableEngine(engine_push_ast);
-    }
-
-    sharding_key_str = config.getString(table_prefix + "sharding_key");
-
-    auxiliary_engine_split_asts.reserve(number_of_splits);
-    {
-        ParserExpressionWithOptionalAlias parser_expression(false);
-        sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-        main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
-                                                            sharding_key_ast);
-
-        for (const auto piece_number : collections::range(0, number_of_splits))
-        {
-            auxiliary_engine_split_asts.emplace_back
-                    (
-                            createASTStorageDistributed(cluster_push_name, table_push.first,
-                                                        table_push.second + "_piece_" + toString(piece_number), sharding_key_ast)
-                    );
-        }
-    }
-
-    where_condition_str = config.getString(table_prefix + "where_condition", "");
-    if (!where_condition_str.empty())
-    {
-        ParserExpressionWithOptionalAlias parser_expression(false);
-        where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
-
-        // Will use canonical expression form
-        where_condition_str = queryToString(where_condition_ast);
-    }
-
-    String enabled_partitions_prefix = table_prefix + "enabled_partitions";
-    has_enabled_partitions = config.has(enabled_partitions_prefix);
-
-    if (has_enabled_partitions)
-    {
-        Strings keys;
-        config.keys(enabled_partitions_prefix, keys);
-
-        if (keys.empty())
-        {
-            /// Parse list of partition from space-separated string
-            String partitions_str = config.getString(table_prefix + "enabled_partitions");
-            boost::trim_if(partitions_str, isWhitespaceASCII);
-            boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on);
-        }
-        else
-        {
-            /// Parse sequence of <partition>...</partition>
-            for (const String &key : keys)
-            {
-                if (!startsWith(key, "partition"))
-                    throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix);
-
-                enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key));
-            }
-        }
-
-        std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin()));
-    }
-}
-
-
-String TaskTable::getPartitionPath(const String & partition_name) const
-{
-    return task_cluster.task_zookeeper_path             // root
-           + "/tables/" + table_id                      // tables/dst_cluster.merge.hits
-           + "/" + escapeForFileName(partition_name);   // 201701
-}
-
-String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/attach_active";
-}
-
-String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/attach_is_done";
-}
-
-String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const
-{
-    assert(piece_number < number_of_splits);
-    return getPartitionPath(partition_name) + "/piece_" + toString(piece_number);  // 1...number_of_splits
-}
-
-String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const
-{
-    return getPartitionPath(partition_name) + "/is_dirty";
-}
-
-String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const
-{
-    return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty";
-}
-
-String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const
-{
-    return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned";
-}
-
-String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const
-{
-    return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned";
-}
-
-String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const
-{
-    return getPartitionPath(partition_name) + "/shards";
-}
-
-String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const
-{
-    return getPartitionPiecePath(partition_name, piece_number) + "/shards";
-}
-
-bool TaskTable::isReplicatedTable() const
-{
-    return is_replicated_table;
-}
-
-String TaskTable::getStatusAllPartitionCount() const
-{
-    return task_cluster.task_zookeeper_path + "/status/all_partitions_count";
-}
-
-String TaskTable::getStatusProcessedPartitionsCount() const
-{
-    return task_cluster.task_zookeeper_path + "/status/processed_partitions_count";
-}
-
-ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const
-{
-    ASTPtr prev_engine_push_ast = engine_push_ast->clone();
-
-    auto & new_storage_ast = prev_engine_push_ast->as<ASTStorage &>();
-    auto & new_engine_ast = new_storage_ast.engine->as<ASTFunction &>();
-
-    /// Remove "Replicated" from name
-    new_engine_ast.name = new_engine_ast.name.substr(10);
-
-    if (new_engine_ast.arguments)
-    {
-        auto & replicated_table_arguments = new_engine_ast.arguments->children;
-
-
-        /// In some cases of Atomic database engine usage ReplicatedMergeTree tables
-        /// could be created without arguments.
-        if (!replicated_table_arguments.empty())
-        {
-            /// Delete first two arguments of Replicated...MergeTree() table.
-            replicated_table_arguments.erase(replicated_table_arguments.begin());
-            replicated_table_arguments.erase(replicated_table_arguments.begin());
-        }
-    }
-
-    return new_storage_ast.clone();
-}
-
-ClusterPartition & TaskTable::getClusterPartition(const String & partition_name)
-{
-    auto it = cluster_partitions.find(partition_name);
-    if (it == cluster_partitions.end())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id);
-    return it->second;
-}
-
-}
--- a/programs/copier/TaskTable.h
+++ b/programs/copier/TaskTable.h
@ -1,173 +0,0 @@
-#pragma once
-
-#include "Aliases.h"
-#include "TaskShard.h"
-
-
-namespace DB
-{
-
-struct ClusterPartition;
-struct TaskCluster;
-
-struct TaskTable
-{
-    TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key);
-
-    TaskCluster & task_cluster;
-
-    /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone()
-    /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc.
-
-    String getPartitionPath(const String & partition_name) const;
-
-    String getPartitionAttachIsActivePath(const String & partition_name) const;
-
-    String getPartitionAttachIsDonePath(const String & partition_name) const;
-
-    String getPartitionPiecePath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionIsDirtyPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionIsCleanedPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const;
-
-    String getCertainPartitionTaskStatusPath(const String & partition_name) const;
-
-    String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const;
-
-    bool isReplicatedTable() const;
-
-    /// These nodes are used for check-status option
-    String getStatusAllPartitionCount() const;
-    String getStatusProcessedPartitionsCount() const;
-
-    /// Partitions will be split into number-of-splits pieces.
-    /// Each piece will be copied independently. (10 by default)
-    size_t number_of_splits;
-
-    bool allow_to_copy_alias_and_materialized_columns{false};
-    bool allow_to_drop_target_partitions{false};
-
-    String name_in_config;
-
-    /// Used as task ID
-    String table_id;
-
-    /// Column names in primary key
-    String primary_key_comma_separated;
-
-    /// Source cluster and table
-    String cluster_pull_name;
-    DatabaseAndTableName table_pull;
-
-    /// Destination cluster and table
-    String cluster_push_name;
-    DatabaseAndTableName table_push;
-
-    /// Storage of destination table
-    /// (tables that are stored on each shard of target cluster)
-    String engine_push_str;
-    ASTPtr engine_push_ast;
-    ASTPtr engine_push_partition_key_ast;
-
-    /// First argument of Replicated...MergeTree()
-    String engine_push_zk_path;
-    bool is_replicated_table;
-
-    ASTPtr rewriteReplicatedCreateQueryToPlain() const;
-
-    /*
-     * A Distributed table definition used to split data
-     * Distributed table will be created on each shard of default
-     * cluster to perform data copying and resharding
-     * */
-    String sharding_key_str;
-    ASTPtr sharding_key_ast;
-    ASTPtr main_engine_split_ast;
-
-    /*
-     * To copy partition piece form one cluster to another we have to use Distributed table.
-     * In case of usage separate table (engine_push) for each partition piece,
-     * we have to use many Distributed tables.
-     * */
-    ASTs auxiliary_engine_split_asts;
-
-    /// Additional WHERE expression to filter input data
-    String where_condition_str;
-    ASTPtr where_condition_ast;
-
-    /// Resolved clusters
-    ClusterPtr cluster_pull;
-    ClusterPtr cluster_push;
-
-    /// Filter partitions that should be copied
-    bool has_enabled_partitions = false;
-    Strings enabled_partitions;
-    NameSet enabled_partitions_set;
-
-    /**
-     * Prioritized list of shards
-     * all_shards contains information about all shards in the table.
-     * So we have to check whether particular shard have current partition or not while processing.
-     */
-    TasksShard all_shards;
-    TasksShard local_shards;
-
-    /// All partitions of the current table.
-    ClusterPartitions cluster_partitions;
-    NameSet finished_cluster_partitions;
-
-    /// Partition names to process in user-specified order
-    Strings ordered_partition_names;
-
-    ClusterPartition & getClusterPartition(const String & partition_name);
-
-    Stopwatch watch;
-    UInt64 bytes_copied = 0;
-    UInt64 rows_copied = 0;
-
-    template <typename RandomEngine>
-    void initShards(RandomEngine &&random_engine);
-};
-
-using TasksTable = std::list<TaskTable>;
-
-
-template<typename RandomEngine>
-inline void TaskTable::initShards(RandomEngine && random_engine)
-{
-    const String & fqdn_name = getFQDNOrHostName();
-    std::uniform_int_distribution<uint8_t> get_urand(0, std::numeric_limits<UInt8>::max());
-
-    // Compute the priority
-    for (const auto & shard_info : cluster_pull->getShardsInfo())
-    {
-        TaskShardPtr task_shard = std::make_shared<TaskShard>(*this, shard_info);
-        const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster());
-        task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine));
-
-        all_shards.emplace_back(task_shard);
-    }
-
-    // Sort by priority
-    std::sort(all_shards.begin(), all_shards.end(),
-              [](const TaskShardPtr & lhs, const TaskShardPtr & rhs)
-              {
-                  return ShardPriority::greaterPriority(lhs->priority, rhs->priority);
-              });
-
-    // Cut local shards
-    auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1,
-                                            [](const TaskShardPtr & lhs, UInt8 is_remote)
-                                            {
-                                                return lhs->priority.is_remote < is_remote;
-                                            });
-
-    local_shards.assign(all_shards.begin(), it_first_remote);
-}
-
-}
--- a/programs/copier/ZooKeeperStaff.h
+++ b/programs/copier/ZooKeeperStaff.h
@ -1,221 +0,0 @@
-#pragma once
-
-/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow.
-  * We assume that we compare values that are not too far away.
-  * For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0.
-  */
-class WrappingUInt32
-{
-public:
-    UInt32 value;
-
-    explicit WrappingUInt32(UInt32 _value)
-            : value(_value)
-    {}
-
-    bool operator<(const WrappingUInt32 & other) const
-    {
-        return value != other.value && *this <= other;
-    }
-
-    bool operator<=(const WrappingUInt32 & other) const
-    {
-        const UInt32 HALF = static_cast<UInt32>(1) << 31;
-        return (value <= other.value && other.value - value < HALF)
-               || (value > other.value && value - other.value > HALF);
-    }
-
-    bool operator==(const WrappingUInt32 & other) const
-    {
-        return value == other.value;
-    }
-};
-
-/** Conforming Zxid definition.
-  * cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions
-  *
-  * But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html
-  *
-  * Actually here is the definition of Zxid.
-  * Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id).
-  * This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid
-  * and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2.
-  */
-class Zxid
-{
-public:
-    WrappingUInt32 epoch;
-    WrappingUInt32 counter;
-    explicit Zxid(UInt64 _zxid)
-            : epoch(static_cast<UInt32>(_zxid >> 32))
-            , counter(static_cast<UInt32>(_zxid))
-    {}
-
-    bool operator<=(const Zxid & other) const
-    {
-        return (epoch < other.epoch)
-               || (epoch == other.epoch && counter <= other.counter);
-    }
-
-    bool operator==(const Zxid & other) const
-    {
-        return epoch == other.epoch && counter == other.counter;
-    }
-};
-
-/* When multiple ClusterCopiers discover that the target partition is not empty,
- * they will attempt to clean up this partition before proceeding to copying.
- *
- * Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established
- * based on a happens-before relation between the events.
- * This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned.
- * The fact of the partition hygiene is encoded by CleanStateClock.
- *
- * For you to know what mzxid means:
- *
- * ZooKeeper Stat Structure:
- * The Stat structure for each znode in ZooKeeper is made up of the following fields:
- *
- * -- czxid
- * The zxid of the change that caused this znode to be created.
- *
- * -- mzxid
- * The zxid of the change that last modified this znode.
- *
- * -- ctime
- * The time in milliseconds from epoch when this znode was created.
- *
- * -- mtime
- * The time in milliseconds from epoch when this znode was last modified.
- *
- * -- version
- * The number of changes to the data of this znode.
- *
- * -- cversion
- * The number of changes to the children of this znode.
- *
- * -- aversion
- * The number of changes to the ACL of this znode.
- *
- * -- ephemeralOwner
- * The session id of the owner of this znode if the znode is an ephemeral node.
- * If it is not an ephemeral node, it will be zero.
- *
- * -- dataLength
- * The length of the data field of this znode.
- *
- * -- numChildren
- * The number of children of this znode.
- * */
-
-class LogicalClock
-{
-public:
-    std::optional<Zxid> zxid;
-
-    LogicalClock() = default;
-
-    explicit LogicalClock(UInt64 _zxid)
-            : zxid(_zxid)
-    {}
-
-    bool hasHappened() const
-    {
-        return bool(zxid);
-    }
-
-    /// happens-before relation with a reasonable time bound
-    bool happensBefore(const LogicalClock & other) const
-    {
-        return !zxid
-               || (other.zxid && *zxid <= *other.zxid);
-    }
-
-    bool operator<=(const LogicalClock & other) const
-    {
-        return happensBefore(other);
-    }
-
-    /// strict equality check
-    bool operator==(const LogicalClock & other) const
-    {
-        return zxid == other.zxid;
-    }
-};
-
-
-class CleanStateClock
-{
-public:
-    LogicalClock discovery_zxid;
-    std::optional<UInt32> discovery_version;
-
-    LogicalClock clean_state_zxid;
-    std::optional<UInt32> clean_state_version;
-
-    std::shared_ptr<std::atomic_bool> stale;
-
-    bool is_clean() const
-    {
-        return !is_stale()
-            && (!discovery_zxid.hasHappened() || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid));
-    }
-
-    bool is_stale() const
-    {
-        return stale->load();
-    }
-
-    CleanStateClock(
-            const zkutil::ZooKeeperPtr & zookeeper,
-            const String & discovery_path,
-            const String & clean_state_path)
-            : stale(std::make_shared<std::atomic_bool>(false))
-    {
-        Coordination::Stat stat{};
-        String _some_data;
-        auto watch_callback =
-                [my_stale = stale] (const Coordination::WatchResponse & rsp)
-                {
-                    auto logger = getLogger("ClusterCopier");
-                    if (rsp.error == Coordination::Error::ZOK)
-                    {
-                        switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case)
-                        {
-                            case Coordination::CREATED:
-                                LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);
-                                my_stale->store(true);
-                                break;
-                            case Coordination::CHANGED:
-                                LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path);
-                                my_stale->store(true);
-                        }
-                    }
-                };
-        if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback))
-        {
-            discovery_zxid = LogicalClock(stat.mzxid);
-            discovery_version = stat.version;
-        }
-        if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback))
-        {
-            clean_state_zxid = LogicalClock(stat.mzxid);
-            clean_state_version = stat.version;
-        }
-    }
-
-    bool operator==(const CleanStateClock & other) const
-    {
-        return !is_stale()
-               && !other.is_stale()
-               && discovery_zxid == other.discovery_zxid
-               && discovery_version == other.discovery_version
-               && clean_state_zxid == other.clean_state_zxid
-               && clean_state_version == other.clean_state_version;
-    }
-
-    bool operator!=(const CleanStateClock & other) const
-    {
-        return !(*this == other);
-    }
-};
--- a/programs/copier/clickhouse-copier.cpp
+++ b/programs/copier/clickhouse-copier.cpp
@ -1 +0,0 @@
-int mainEntryClickHouseClusterCopier(int argc, char ** argv);
--- a/programs/diagnostics/testdata/configs/xml/config.xml
+++ b/programs/diagnostics/testdata/configs/xml/config.xml
@ -94,7 +94,7 @@
    <http_port>8123</http_port>

    <!-- Port for interaction by native protocol with:
-         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
         - clickhouse-server with other clickhouse-servers for distributed query processing;
         - ClickHouse drivers and applications supporting native protocol
         (this protocol is also informally called as "the TCP protocol");
--- a/programs/diagnostics/testdata/configs/yaml/config.yaml
+++ b/programs/diagnostics/testdata/configs/yaml/config.yaml
@ -56,7 +56,7 @@ logger:
 http_port: 8123

 # Port for interaction by native protocol with:
-# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
 # - clickhouse-server with other clickhouse-servers for distributed query processing;
 # - ClickHouse drivers and applications supporting native protocol
 # (this protocol is also informally called as "the TCP protocol");
--- a/programs/diagnostics/testdata/configs/yandex_xml/config.xml
+++ b/programs/diagnostics/testdata/configs/yandex_xml/config.xml
@ -94,7 +94,7 @@
    <http_port>8123</http_port>

    <!-- Port for interaction by native protocol with:
-         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
         - clickhouse-server with other clickhouse-servers for distributed query processing;
         - ClickHouse drivers and applications supporting native protocol
         (this protocol is also informally called as "the TCP protocol");
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -433,7 +433,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            "clickhouse-client",
            "clickhouse-local",
            "clickhouse-benchmark",
-            "clickhouse-copier",
            "clickhouse-obfuscator",
            "clickhouse-git-import",
            "clickhouse-compressor",
--- a/programs/main.cpp
+++ b/programs/main.cpp
@ -31,7 +31,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv);
 int mainEntryClickHouseExtractFromConfig(int argc, char ** argv);
 int mainEntryClickHouseCompressor(int argc, char ** argv);
 int mainEntryClickHouseFormat(int argc, char ** argv);
-int mainEntryClickHouseClusterCopier(int argc, char ** argv);
 int mainEntryClickHouseObfuscator(int argc, char ** argv);
 int mainEntryClickHouseGitImport(int argc, char ** argv);
 int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
@ -80,7 +79,6 @@ std::pair<std::string_view, MainFunc> clickhouse_applications[] =
    {"extract-from-config", mainEntryClickHouseExtractFromConfig},
    {"compressor", mainEntryClickHouseCompressor},
    {"format", mainEntryClickHouseFormat},
-    {"copier", mainEntryClickHouseClusterCopier},
    {"obfuscator", mainEntryClickHouseObfuscator},
    {"git-import", mainEntryClickHouseGitImport},
    {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader},
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -135,7 +135,7 @@
    <http_port>8123</http_port>

    <!-- Port for interaction by native protocol with:
-         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+         - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
         - clickhouse-server with other clickhouse-servers for distributed query processing;
         - ClickHouse drivers and applications supporting native protocol
         (this protocol is also informally called as "the TCP protocol");
--- a/programs/server/config.yaml.example
+++ b/programs/server/config.yaml.example
@ -55,7 +55,7 @@ logger:
 http_port: 8123

 # Port for interaction by native protocol with:
-# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark, clickhouse-copier);
+# - clickhouse-client and other native ClickHouse tools (clickhouse-benchmark);
 # - clickhouse-server with other clickhouse-servers for distributed query processing;
 # - ClickHouse drivers and applications supporting native protocol
 # (this protocol is also informally called as "the TCP protocol");
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -86,6 +86,7 @@
 namespace ProfileEvents
 {
    extern const Event ScalarSubqueriesGlobalCacheHit;
+    extern const Event ScalarSubqueriesLocalCacheHit;
    extern const Event ScalarSubqueriesCacheMiss;
 }

@ -1444,7 +1445,8 @@ private:
    std::unordered_map<QueryTreeNodePtr, size_t> node_to_tree_size;

    /// Global scalar subquery to scalar value map
-    std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value;
+    std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_local;
+    std::unordered_map<QueryTreeNodePtrWithHash, Block> scalar_subquery_to_scalar_value_global;

    const bool only_analyze;
 };
@ -1951,6 +1953,24 @@ QueryTreeNodePtr QueryAnalyzer::tryGetLambdaFromSQLUserDefinedFunctions(const st
    return result_node;
 }

+bool subtreeHasViewSource(const IQueryTreeNode * node, const Context & context)
+{
+    if (!node)
+        return false;
+
+    if (const auto * table_node = node->as<TableNode>())
+    {
+        if (table_node->getStorageID().getFullNameNotQuoted() == context.getViewSource()->getStorageID().getFullNameNotQuoted())
+            return true;
+    }
+
+    for (const auto & child : node->getChildren())
+        if (subtreeHasViewSource(child.get(), context))
+            return true;
+
+    return false;
+}
+
 /// Evaluate scalar subquery and perform constant folding if scalar subquery does not have constant value
 void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, IdentifierResolveScope & scope)
 {
@ -1970,12 +1990,26 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
    node_without_alias->removeAlias();

    QueryTreeNodePtrWithHash node_with_hash(node_without_alias);
-    auto scalar_value_it = scalar_subquery_to_scalar_value.find(node_with_hash);
+    auto str_hash = DB::toString(node_with_hash.hash);

-    if (scalar_value_it != scalar_subquery_to_scalar_value.end())
+    bool can_use_global_scalars = !only_analyze && !(context->getViewSource() && subtreeHasViewSource(node_without_alias.get(), *context));
+
+    auto & scalars_cache = can_use_global_scalars ? scalar_subquery_to_scalar_value_global : scalar_subquery_to_scalar_value_local;
+
+    if (scalars_cache.contains(node_with_hash))
    {
+        if (can_use_global_scalars)
+            ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
+        else
+            ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit);
+
+        scalar_block = scalars_cache.at(node_with_hash);
+    }
+    else if (context->hasQueryContext() && can_use_global_scalars && context->getQueryContext()->hasScalar(str_hash))
+    {
+        scalar_block = context->getQueryContext()->getScalar(str_hash);
+        scalar_subquery_to_scalar_value_global.emplace(node_with_hash, scalar_block);
        ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit);
-        scalar_block = scalar_value_it->second;
    }
    else
    {
@ -2087,7 +2121,9 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden
            }
        }

-        scalar_subquery_to_scalar_value.emplace(node_with_hash, scalar_block);
+        scalars_cache.emplace(node_with_hash, scalar_block);
+        if (can_use_global_scalars && context->hasQueryContext())
+            context->getQueryContext()->addScalar(str_hash, scalar_block);
    }

    const auto & scalar_column_with_type = scalar_block.safeGetByPosition(0);
--- a/src/Columns/ColumnObject.cpp
+++ b/src/Columns/ColumnObject.cpp
@ -20,12 +20,12 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
    extern const int ILLEGAL_COLUMN;
    extern const int DUPLICATE_COLUMN;
    extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
    extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int EXPERIMENTAL_FEATURE_ERROR;
 }

 namespace
@ -247,7 +247,7 @@ void ColumnObject::Subcolumn::checkTypes() const
        prefix_types.push_back(current_type);
        auto prefix_common_type = getLeastSupertype(prefix_types);
        if (!prefix_common_type->equals(*current_type))
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
                "Data type {} of column at position {} cannot represent all columns from i-th prefix",
                current_type->getName(), i);
    }
@ -635,7 +635,7 @@ void ColumnObject::checkConsistency() const
    {
        if (num_rows != leaf->data.size())
        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject."
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject."
                " Subcolumn '{}' has {} rows, but expected size is {}",
                leaf->path.getPath(), leaf->data.size(), num_rows);
        }
@ -919,7 +919,7 @@ void ColumnObject::addSubcolumn(const PathInData & key, size_t new_size)
 void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size)
 {
    if (!key.hasNested())
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
+        throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
            "Cannot add Nested subcolumn, because path doesn't contain Nested");

    bool inserted = false;
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -226,7 +226,6 @@
    M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \
    M(PartsWide, "Wide parts.") \
    M(PartsCompact, "Compact parts.") \
-    M(PartsInMemory, "In-memory parts.") \
    M(MMappedFiles, "Total number of mmapped files.") \
    M(MMappedFileBytes, "Sum size of mmapped file regions.") \
    M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -598,6 +598,7 @@
    M(714, UNEXPECTED_CLUSTER) \
    M(715, CANNOT_DETECT_FORMAT) \
    M(716, CANNOT_FORGET_PARTITION) \
+    M(717, EXPERIMENTAL_FEATURE_ERROR) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/LRUResourceCache.h
+++ b/src/Common/LRUResourceCache.h
@ -221,7 +221,7 @@ private:
        {
            std::lock_guard lock(mutex);
            auto it = cells.find(key);
-            if (it != cells.end() && !it->second.expired)
+            if (it != cells.end())
            {
                if (!it->second.expired)
                {
--- a/src/Common/tests/gtest_lru_resource_cache.cpp
+++ b/src/Common/tests/gtest_lru_resource_cache.cpp
@ -45,6 +45,33 @@ struct MyWeight
    size_t operator()(const int & x) const { return static_cast<size_t>(x); }
 };

+TEST(LRUResourceCache, remove2)
+{
+    using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
+    auto mcache = MyCache(10, 10);
+    for (int i = 1; i < 5; ++i)
+    {
+        auto load_int = [&] { return std::make_shared<int>(i); };
+        mcache.getOrSet(i, load_int);
+    }
+
+    auto n = mcache.size();
+    ASSERT_EQ(n, 4);
+    auto w = mcache.weight();
+    ASSERT_EQ(w, 10);
+    auto holder4 = mcache.get(4);
+    ASSERT_TRUE(holder4 != nullptr);
+    mcache.tryRemove(4);
+    auto holder_reget_4 = mcache.get(4);
+    ASSERT_TRUE(holder_reget_4 == nullptr);
+    mcache.getOrSet(4, [&]() { return std::make_shared<int>(4); });
+    holder4.reset();
+    auto holder1 = mcache.getOrSet(1, [&]() { return std::make_shared<int>(1); });
+    ASSERT_TRUE(holder1 != nullptr);
+    auto holder7 = mcache.getOrSet(7, [&] { return std::make_shared<int>(7); });
+    ASSERT_TRUE(holder7 != nullptr);
+}
+
 TEST(LRUResourceCache, evictOnWweight)
 {
    using MyCache = DB::LRUResourceCache<int, int, MyWeight>;
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -497,7 +497,7 @@ public:

        switch (which)
        {
-            case Types::Null:    return false;
+            case Types::Null:    return get<Null>() < rhs.get<Null>();
            case Types::Bool:    [[fallthrough]];
            case Types::UInt64:  return get<UInt64>()  < rhs.get<UInt64>();
            case Types::UInt128: return get<UInt128>() < rhs.get<UInt128>();
@ -541,7 +541,7 @@ public:

        switch (which)
        {
-            case Types::Null:    return true;
+            case Types::Null:    return get<Null>() <= rhs.get<Null>();
            case Types::Bool: [[fallthrough]];
            case Types::UInt64:  return get<UInt64>()  <= rhs.get<UInt64>();
            case Types::UInt128: return get<UInt128>() <= rhs.get<UInt128>();
@ -590,7 +590,7 @@ public:

        switch (which)
        {
-            case Types::Null: return true;
+            case Types::Null: return get<Null>() == rhs.get<Null>();
            case Types::Bool: [[fallthrough]];
            case Types::UInt64: return get<UInt64>() == rhs.get<UInt64>();
            case Types::Int64:   return get<Int64>() == rhs.get<Int64>();
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@ -112,7 +112,6 @@ namespace DB
    M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
    M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
    M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
-    \
    M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \
    M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \
    M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -23,9 +23,9 @@ struct Null
 {
    enum class Value
    {
-        Null,
-        PositiveInfinity,
-        NegativeInfinity,
+        NegativeInfinity = -1,
+        Null = 0,
+        PositiveInfinity = 1,
    };

    Value value{Value::Null};
@ -34,15 +34,12 @@ struct Null
    bool isPositiveInfinity() const { return value == Value::PositiveInfinity; }
    bool isNegativeInfinity() const { return value == Value::NegativeInfinity; }

-    bool operator==(const Null & other) const
+    auto operator<=>(const Null & other) const
    {
-        return value == other.value;
+        return static_cast<int>(value) <=> static_cast<int>(other.value);
    }

-    bool operator!=(const Null & other) const
-    {
-        return !(*this == other);
-    }
+    bool operator==(const Null &) const = default;
 };

 using UInt128 = ::UInt128;
--- a/src/DataTypes/ObjectUtils.cpp
+++ b/src/DataTypes/ObjectUtils.cpp
@ -28,9 +28,9 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int TYPE_MISMATCH;
-    extern const int LOGICAL_ERROR;
    extern const int INCOMPATIBLE_COLUMNS;
    extern const int NOT_IMPLEMENTED;
+    extern const int EXPERIMENTAL_FEATURE_ERROR;
 }

 size_t getNumberOfDimensions(const IDataType & type)
@ -92,7 +92,7 @@ ColumnPtr createArrayOfColumn(ColumnPtr column, size_t num_dimensions)
 Array createEmptyArrayField(size_t num_dimensions)
 {
    if (num_dimensions == 0)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create array field with 0 dimensions");
+        throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create array field with 0 dimensions");

    Array array;
    Array * current_array = &array;
@ -231,7 +231,7 @@ static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
        };
    }

-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
+    throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type->getName());
 }

 void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot)
@ -247,7 +247,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto
        GetColumnsOptions options(GetColumnsOptions::AllPhysical);
        auto storage_column = storage_snapshot->tryGetColumn(options, column.name);
        if (!storage_column)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Column '{}' not found in storage", column.name);

        auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name);

@ -315,7 +315,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che
    {
        const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type.get());
        if (!type_tuple)
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
                "Least common type for object can be deduced only from tuples, but {} given", type->getName());

        auto [tuple_paths, tuple_types] = flattenTuple(type);
@ -427,7 +427,7 @@ static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl(
    if (const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_in_storage.get()))
        return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths);

-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
+    throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
 }

 DataTypePtr getLeastCommonTypeForDynamicColumns(
@ -481,7 +481,7 @@ DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage
        return recreateTupleWithElements(*type_tuple, new_elements);
    }

-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
+    throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName());
 }

 bool hasDynamicSubcolumns(const ColumnsDescription & columns)
@ -613,7 +613,7 @@ DataTypePtr reduceNumberOfDimensions(DataTypePtr type, size_t dimensions_to_redu
    {
        const auto * type_array = typeid_cast<const DataTypeArray *>(type.get());
        if (!type_array)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce");

        type = type_array->getNestedType();
    }
@ -627,7 +627,7 @@ ColumnPtr reduceNumberOfDimensions(ColumnPtr column, size_t dimensions_to_reduce
    {
        const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
        if (!column_array)
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce");
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce");

        column = column_array->getDataPtr();
    }
@ -653,7 +653,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
    auto collect_tuple_elemets = [](const auto & children)
    {
        if (children.empty())
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create type from empty Tuple or Nested node");
+            throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create type from empty Tuple or Nested node");

        std::vector<std::tuple<String, ColumnWithTypeAndDimensions>> tuple_elements;
        tuple_elements.reserve(children.size());
@ -705,6 +705,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
        size_t num_elements = tuple_columns.size();
        Columns tuple_elements_columns(num_elements);
        DataTypes tuple_elements_types(num_elements);
+        size_t last_offset = assert_cast<const ColumnArray::ColumnOffsets &>(*offsets_columns.back()).getData().back();

        /// Reduce extra array dimensions to get columns and types of Nested elements.
        for (size_t i = 0; i < num_elements; ++i)
@ -712,6 +713,14 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
            assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions);
            tuple_elements_columns[i] = reduceNumberOfDimensions(tuple_columns[i].column, tuple_columns[i].array_dimensions);
            tuple_elements_types[i] = reduceNumberOfDimensions(tuple_columns[i].type, tuple_columns[i].array_dimensions);
+            if (tuple_elements_columns[i]->size() != last_offset)
+                throw Exception(
+                    ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
+                    "Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. "
+                    "Data size: {}, last offset: {}",
+                    node.path.getPath(),
+                    tuple_elements_columns[i]->size(),
+                    last_offset);
        }

        auto result_column = ColumnArray::create(ColumnTuple::create(tuple_elements_columns), offsets_columns.back());
@ -720,6 +729,16 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node)
        /// Recreate result Array type and Array column.
        for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it)
        {
+            last_offset = assert_cast<const ColumnArray::ColumnOffsets &>((**it)).getData().back();
+            if (result_column->size() != last_offset)
+                throw Exception(
+                    ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
+                    "Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. "
+                    "Data size: {}, last offset: {}",
+                    node.path.getPath(),
+                    result_column->size(),
+                    last_offset);
+
            result_column = ColumnArray::create(result_column, *it);
            result_type = std::make_shared<DataTypeArray>(result_type);
        }
@ -822,7 +841,7 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
    assert(paths.size() == tuple_columns.size());

    if (paths.empty())
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unflatten empty Tuple");
+        throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot unflatten empty Tuple");

    /// We add all paths to the subcolumn tree and then create a type from it.
    /// The tree stores column, type and number of array dimensions
@ -841,7 +860,7 @@ std::pair<ColumnPtr, DataTypePtr> unflattenTuple(
        tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr<Node>
            {
                if (pos >= num_parts)
-                    throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
                        "Not enough name parts for path {}. Expected at least {}, got {}",
                            paths[i].getPath(), pos + 1, num_parts);

--- a/src/DataTypes/Serializations/SerializationObject.cpp
+++ b/src/DataTypes/Serializations/SerializationObject.cpp
@ -29,7 +29,7 @@ namespace ErrorCodes
    extern const int INCORRECT_DATA;
    extern const int CANNOT_READ_ALL_DATA;
    extern const int ARGUMENT_OUT_OF_BOUND;
-    extern const int LOGICAL_ERROR;
+    extern const int EXPERIMENTAL_FEATURE_ERROR;
 }

 template <typename Parser>
@ -177,7 +177,7 @@ void SerializationObject<Parser>::serializeBinaryBulkStatePrefix(
    auto * stream = settings.getter(settings.path);

    if (!stream)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization");
+        throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Missing stream for kind of binary serialization");

    auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object);

@ -288,7 +288,7 @@ void SerializationObject<Parser>::serializeBinaryBulkWithMultipleStreams(

    if (!state_object->nested_type->equals(*tuple_type))
    {
-        throw Exception(ErrorCodes::LOGICAL_ERROR,
+        throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
            "Types of internal column of Object mismatched. Expected: {}, Got: {}",
            state_object->nested_type->getName(), tuple_type->getName());
    }
--- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
+++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
@ -17,16 +17,15 @@ using namespace DB;

 namespace
 {
-bool withFileCache(const ReadSettings & settings)
-{
-    return settings.remote_fs_cache && settings.enable_filesystem_cache
-        && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache
-            || !settings.avoid_readthrough_cache_outside_query_context);
-}
-bool withPageCache(const ReadSettings & settings, bool with_file_cache)
-{
-    return settings.page_cache && !with_file_cache && settings.use_page_cache_for_disks_without_file_cache;
-}
+    bool withFileCache(const ReadSettings & settings)
+    {
+        return settings.remote_fs_cache && settings.enable_filesystem_cache;
+    }
+
+    bool withPageCache(const ReadSettings & settings, bool with_file_cache)
+    {
+        return settings.page_cache && !with_file_cache && settings.use_page_cache_for_disks_without_file_cache;
+    }
 }

 namespace DB
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@ -43,10 +43,6 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin
 {
    ReadSettings modified_settings{read_settings};
    modified_settings.remote_fs_cache = cache;
-
-    if (!canUseReadThroughCache(read_settings))
-        modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true;
-
    return object_storage->patchSettings(modified_settings);
 }

@ -206,14 +202,4 @@ String CachedObjectStorage::getObjectsNamespace() const
    return object_storage->getObjectsNamespace();
 }

-bool CachedObjectStorage::canUseReadThroughCache(const ReadSettings & settings)
-{
-    if (!settings.avoid_readthrough_cache_outside_query_context)
-        return true;
-
-    return CurrentThread::isInitialized()
-        && CurrentThread::get().getQueryContext()
-        && !CurrentThread::getQueryId().empty();
-}
-
 }
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@ -119,8 +119,6 @@ public:

    const FileCacheSettings & getCacheSettings() const { return cache_settings; }

-    static bool canUseReadThroughCache(const ReadSettings & settings);
-
 #if USE_AZURE_BLOB_STORAGE
    std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> getAzureBlobStorageClient() override
    {
--- a/src/IO/ReadSettings.h
+++ b/src/IO/ReadSettings.h
@ -99,8 +99,6 @@ struct ReadSettings
    bool enable_filesystem_cache = true;
    bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false;
    bool enable_filesystem_cache_log = false;
-    /// Don't populate cache when the read is not part of query execution (e.g. background thread).
-    bool avoid_readthrough_cache_outside_query_context = true;
    size_t filesystem_cache_segments_batch_size = 20;

    bool use_page_cache_for_disks_without_file_cache = false;
--- a/src/Interpreters/AddDefaultDatabaseVisitor.h
+++ b/src/Interpreters/AddDefaultDatabaseVisitor.h
@ -275,13 +275,7 @@ private:
        if (only_replace_current_database_function)
            return;

-        for (ASTRenameQuery::Element & elem : node.elements)
-        {
-            if (!elem.from.database)
-                elem.from.database = std::make_shared<ASTIdentifier>(database_name);
-            if (!elem.to.database)
-                elem.to.database = std::make_shared<ASTIdentifier>(database_name);
-        }
+        node.setDatabaseIfNotExists(database_name);
    }

    void visitDDL(ASTAlterQuery & node, ASTPtr &) const
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@ -10,6 +10,7 @@
 #include <Common/logger_useful.h>
 #include <Common/scope_guard_safe.h>
 #include <Common/ElapsedTimeProfileEventIncrement.h>
+#include <Common/setThreadName.h>

 #include <magic_enum.hpp>

@ -195,7 +196,7 @@ bool FileSegment::isDownloaded() const
 String FileSegment::getCallerId()
 {
    if (!CurrentThread::isInitialized() || CurrentThread::getQueryId().empty())
-        return "None:" + toString(getThreadId());
+        return fmt::format("None:{}:{}", getThreadName(), toString(getThreadId()));

    return std::string(CurrentThread::getQueryId()) + ":" + toString(getThreadId());
 }
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -1612,7 +1612,6 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
        executeTrivialBlockIO(fill_io, getContext());

        /// Replace target table with created one
-        auto ast_rename = std::make_shared<ASTRenameQuery>();
        ASTRenameQuery::Element elem
        {
            ASTRenameQuery::Table
@ -1627,7 +1626,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create,
            }
        };

-        ast_rename->elements.push_back(std::move(elem));
+        auto ast_rename = std::make_shared<ASTRenameQuery>(ASTRenameQuery::Elements{std::move(elem)});
        ast_rename->dictionary = create.is_dictionary;
        if (create.create_or_replace)
        {
--- a/src/Interpreters/InterpreterRenameQuery.cpp
+++ b/src/Interpreters/InterpreterRenameQuery.cpp
@ -47,12 +47,12 @@ BlockIO InterpreterRenameQuery::execute()
      */

    RenameDescriptions descriptions;
-    descriptions.reserve(rename.elements.size());
+    descriptions.reserve(rename.getElements().size());

    /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed.
    TableGuards table_guards;

-    for (const auto & elem : rename.elements)
+    for (const auto & elem : rename.getElements())
    {
        descriptions.emplace_back(elem, current_database);
        const auto & description = descriptions.back();
@ -186,7 +186,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename
 {
    AccessRightsElements required_access;
    const auto & rename = query_ptr->as<const ASTRenameQuery &>();
-    for (const auto & elem : rename.elements)
+    for (const auto & elem : rename.getElements())
    {
        if (type == RenameType::RenameTable)
        {
@ -214,7 +214,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename
 void InterpreterRenameQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const
 {
    const auto & rename = ast->as<const ASTRenameQuery &>();
-    for (const auto & element : rename.elements)
+    for (const auto & element : rename.getElements())
    {
        {
            String database = backQuoteIfNeed(!element.from.database ? getContext()->getCurrentDatabase() : element.from.getDatabase());
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@ -579,7 +579,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries(
    const InterpreterRenameImpl::TQuery & rename_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database)
 {
    ASTRenameQuery::Elements elements;
-    for (const auto & rename_element : rename_query.elements)
+    for (const auto & rename_element : rename_query.getElements())
    {
        const auto & to_database = resolveDatabase(rename_element.to.getDatabase(), mysql_database, mapped_to_database, context);
        const auto & from_database = resolveDatabase(rename_element.from.getDatabase(), mysql_database, mapped_to_database, context);
@ -600,8 +600,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries(
    if (elements.empty())
        return ASTs{};

-    auto rewritten_query = std::make_shared<ASTRenameQuery>();
-    rewritten_query->elements = elements;
+    auto rewritten_query = std::make_shared<ASTRenameQuery>(std::move(elements));
    return ASTs{rewritten_query};
 }

@ -616,7 +615,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(
        return {};

    auto rewritten_alter_query = std::make_shared<ASTAlterQuery>();
-    auto rewritten_rename_query = std::make_shared<ASTRenameQuery>();
+    ASTRenameQuery::Elements rename_elements;
+
    rewritten_alter_query->setDatabase(mapped_to_database);
    rewritten_alter_query->setTable(alter_query.table);
    rewritten_alter_query->alter_object = ASTAlterQuery::AlterObjectType::TABLE;
@ -749,13 +749,13 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(

            /// For ALTER TABLE table_name RENAME TO new_table_name_1, RENAME TO new_table_name_2;
            /// We just need to generate RENAME TABLE table_name TO new_table_name_2;
-            if (rewritten_rename_query->elements.empty())
-                rewritten_rename_query->elements.push_back(ASTRenameQuery::Element());
+            if (rename_elements.empty())
+                rename_elements.push_back(ASTRenameQuery::Element());

-            rewritten_rename_query->elements.back().from.database = std::make_shared<ASTIdentifier>(mapped_to_database);
-            rewritten_rename_query->elements.back().from.table = std::make_shared<ASTIdentifier>(alter_query.table);
-            rewritten_rename_query->elements.back().to.database = std::make_shared<ASTIdentifier>(mapped_to_database);
-            rewritten_rename_query->elements.back().to.table = std::make_shared<ASTIdentifier>(alter_command->new_table_name);
+            rename_elements.back().from.database = std::make_shared<ASTIdentifier>(mapped_to_database);
+            rename_elements.back().from.table = std::make_shared<ASTIdentifier>(alter_query.table);
+            rename_elements.back().to.database = std::make_shared<ASTIdentifier>(mapped_to_database);
+            rename_elements.back().to.table = std::make_shared<ASTIdentifier>(alter_command->new_table_name);
        }
    }

@ -765,8 +765,11 @@ ASTs InterpreterAlterImpl::getRewrittenQueries(
    if (!rewritten_alter_query->command_list->children.empty())
        rewritten_queries.push_back(rewritten_alter_query);

-    if (!rewritten_rename_query->elements.empty())
+    if (!rename_elements.empty())
+    {
+        auto rewritten_rename_query = std::make_shared<ASTRenameQuery>(std::move(rename_elements));
        rewritten_queries.push_back(rewritten_rename_query);
+    }

    return rewritten_queries;
 }
--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@ -563,7 +563,6 @@ void SystemLog<LogElement>::prepareTable()
                {table_id.database_name, table_id.table_name + "_" + toString(suffix)}, getContext()))
                ++suffix;

-            auto rename = std::make_shared<ASTRenameQuery>();
            ASTRenameQuery::Element elem
            {
                ASTRenameQuery::Table
@ -586,7 +585,7 @@ void SystemLog<LogElement>::prepareTable()
                old_create_query,
                create_query);

-            rename->elements.emplace_back(std::move(elem));
+            auto rename = std::make_shared<ASTRenameQuery>(ASTRenameQuery::Elements{std::move(elem)});

            ActionLock merges_lock;
            if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil)
--- a/src/Parsers/ASTRenameQuery.h
+++ b/src/Parsers/ASTRenameQuery.h
@ -45,7 +45,6 @@ public:
    };

    using Elements = std::vector<Element>;
-    Elements elements;

    bool exchange{false};   /// For EXCHANGE TABLES
    bool database{false};   /// For RENAME DATABASE
@ -54,12 +53,48 @@ public:
    /// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist.
    bool rename_if_cannot_exchange{false};

+    explicit ASTRenameQuery(Elements elements_ = {})
+        : elements(std::move(elements_))
+    {
+        for (const auto & elem : elements)
+        {
+            if (elem.from.database)
+                children.push_back(elem.from.database);
+            if (elem.from.table)
+                children.push_back(elem.from.table);
+            if (elem.to.database)
+                children.push_back(elem.to.database);
+            if (elem.to.table)
+                children.push_back(elem.to.table);
+        }
+    }
+
+    void setDatabaseIfNotExists(const String & database_name)
+    {
+        for (auto & elem : elements)
+        {
+            if (!elem.from.database)
+            {
+                elem.from.database = std::make_shared<ASTIdentifier>(database_name);
+                children.push_back(elem.from.database);
+            }
+            if (!elem.to.database)
+            {
+                elem.to.database = std::make_shared<ASTIdentifier>(database_name);
+                children.push_back(elem.to.database);
+            }
+        }
+    }
+
+    const Elements & getElements() const { return elements; }
+
    /** Get the text that identifies this element. */
    String getID(char) const override { return "Rename"; }

    ASTPtr clone() const override
    {
        auto res = std::make_shared<ASTRenameQuery>(*this);
+        res->cloneChildren();
        cloneOutputOptions(*res);
        return res;
    }
@ -145,6 +180,8 @@ protected:

        formatOnCluster(settings);
    }
+
+    Elements elements;
 };

 }
--- a/src/Parsers/ParserRenameQuery.cpp
+++ b/src/Parsers/ParserRenameQuery.cpp
@ -44,15 +44,14 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
            if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
                return false;
        }
+        ASTRenameQuery::Elements rename_elements;
+        rename_elements.emplace_back();
+        rename_elements.back().if_exists = if_exists;
+        rename_elements.back().from.database = from_db;
+        rename_elements.back().to.database = to_db;

-        auto query = std::make_shared<ASTRenameQuery>();
+        auto query = std::make_shared<ASTRenameQuery>(std::move(rename_elements));
        query->database = true;
-        query->elements.emplace({});
-        query->elements.front().if_exists = if_exists;
-        query->elements.front().from.database = from_db;
-        query->elements.front().to.database = to_db;
-        query->children.push_back(std::move(from_db));
-        query->children.push_back(std::move(to_db));
        query->cluster = cluster_str;
        node = query;
        return true;
@ -75,9 +74,8 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)

    const auto ignore_delim = [&] { return exchange ? s_and.ignore(pos) : s_to.ignore(pos); };

-    auto query = std::make_shared<ASTRenameQuery>();

-    ASTRenameQuery::Elements & elements = query->elements;
+    ASTRenameQuery::Elements elements;

    while (true)
    {
@ -93,15 +91,6 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
            || !ignore_delim()
            || !parseDatabaseAndTableAsAST(pos, expected, ref.to.database, ref.to.table))
            return false;
-
-        if (ref.from.database)
-            query->children.push_back(ref.from.database);
-        if (ref.from.table)
-            query->children.push_back(ref.from.table);
-        if (ref.to.database)
-            query->children.push_back(ref.to.database);
-        if (ref.to.table)
-            query->children.push_back(ref.to.table);
    }

    String cluster_str;
@ -111,6 +100,7 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
            return false;
    }

+    auto query = std::make_shared<ASTRenameQuery>(std::move(elements));
    query->cluster = cluster_str;
    query->exchange = exchange;
    query->dictionary = dictionary;
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@ -9,6 +9,7 @@
 #include <Parsers/ParserAlterQuery.h>
 #include <Parsers/ParserCreateQuery.h>
 #include <Parsers/ParserOptimizeQuery.h>
+#include <Parsers/ParserRenameQuery.h>
 #include <Parsers/ParserQueryWithOutput.h>
 #include <Parsers/ParserAttachAccessEntity.h>
 #include <Parsers/formatAST.h>
@ -62,10 +63,29 @@ TEST_P(ParserTest, parseQuery)
            if (std::string("CREATE USER or ALTER USER query") != parser->getName()
                    && std::string("ATTACH access entity query") != parser->getName())
            {
-                WriteBufferFromOwnString buf;
-                formatAST(*ast->clone(), buf, false, false);
-                String formatted_ast = buf.str();
-                EXPECT_EQ(expected_ast, formatted_ast);
+                ASTPtr ast_clone = ast->clone();
+                {
+                    WriteBufferFromOwnString buf;
+                    formatAST(*ast_clone, buf, false, false);
+                    String formatted_ast = buf.str();
+                    EXPECT_EQ(expected_ast, formatted_ast);
+                }
+
+
+                ASTPtr ast_clone2 = ast_clone->clone();
+                /// Break `ast_clone2`, it should not affect `ast_clone` if `clone()` implemented properly
+                for (auto & child : ast_clone2->children)
+                {
+                    if (auto * identifier = dynamic_cast<ASTIdentifier *>(child.get()))
+                        identifier->setShortName("new_name");
+                }
+
+                {
+                    WriteBufferFromOwnString buf;
+                    formatAST(*ast_clone, buf, false, false);
+                    String formatted_ast = buf.str();
+                    EXPECT_EQ(expected_ast, formatted_ast);
+                }
            }
            else
            {
@ -299,6 +319,16 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest,
        }
 })));

+INSTANTIATE_TEST_SUITE_P(ParserRenameQuery, ParserTest,
+    ::testing::Combine(
+        ::testing::Values(std::make_shared<ParserRenameQuery>()),
+        ::testing::ValuesIn(std::initializer_list<ParserTestCase>{
+        {
+            "RENAME TABLE eligible_test TO eligible_test2",
+            "RENAME TABLE eligible_test TO eligible_test2"
+        }
+})));
+
 INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserKQLTest,
    ::testing::Combine(
        ::testing::Values(std::make_shared<ParserKQLQuery>()),
--- a/src/Processors/Executors/ExecutingGraph.h
+++ b/src/Processors/Executors/ExecutingGraph.h
@ -3,6 +3,7 @@
 #include <Processors/Port.h>
 #include <Processors/IProcessor.h>
 #include <Common/SharedMutex.h>
+#include <Common/AllocatorWithMemoryTracking.h>
 #include <mutex>
 #include <queue>
 #include <stack>
@ -117,7 +118,11 @@ public:
        }
    };

-    using Queue = std::queue<Node *>;
+    /// This queue can grow a lot and lead to OOM. That is why we use non-default
+    /// allocator for container which throws exceptions in operator new
+    using DequeWithMemoryTracker = std::deque<ExecutingGraph::Node *, AllocatorWithMemoryTracking<ExecutingGraph::Node *>>;
+    using Queue = std::queue<ExecutingGraph::Node *, DequeWithMemoryTracker>;
+
    using NodePtr = std::unique_ptr<Node>;
    using Nodes = std::vector<NodePtr>;
    Nodes nodes;
--- a/src/Processors/Executors/ExecutorTasks.h
+++ b/src/Processors/Executors/ExecutorTasks.h
@ -47,7 +47,10 @@ class ExecutorTasks

 public:
    using Stack = std::stack<UInt64>;
-    using Queue = std::queue<ExecutingGraph::Node *>;
+    /// This queue can grow a lot and lead to OOM. That is why we use non-default
+    /// allocator for container which throws exceptions in operator new
+    using DequeWithMemoryTracker = std::deque<ExecutingGraph::Node *, AllocatorWithMemoryTracking<ExecutingGraph::Node *>>;
+    using Queue = std::queue<ExecutingGraph::Node *, DequeWithMemoryTracker>;

    void finish();
    bool isFinished() const { return finished; }
--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@ -5,7 +5,9 @@
 #include <Common/EventCounter.h>
 #include <Common/ThreadPool_fwd.h>
 #include <Common/ConcurrencyControl.h>
+#include <Common/AllocatorWithMemoryTracking.h>

+#include <deque>
 #include <queue>
 #include <mutex>
 #include <memory>
@ -90,7 +92,10 @@ private:

    ReadProgressCallbackPtr read_progress_callback;

-    using Queue = std::queue<ExecutingGraph::Node *>;
+    /// This queue can grow a lot and lead to OOM. That is why we use non-default
+    /// allocator for container which throws exceptions in operator new
+    using DequeWithMemoryTracker = std::deque<ExecutingGraph::Node *, AllocatorWithMemoryTracking<ExecutingGraph::Node *>>;
+    using Queue = std::queue<ExecutingGraph::Node *, DequeWithMemoryTracker>;

    void initializeExecution(size_t num_threads, bool concurrency_control); /// Initialize executor contexts and task_queue.
    void finalizeExecution(); /// Check all processors are finished.
--- a/src/Processors/QueryPlan/PartsSplitter.cpp
+++ b/src/Processors/QueryPlan/PartsSplitter.cpp
@ -101,9 +101,9 @@ bool isSafePrimaryKey(const KeyDescription & primary_key)

 int compareValues(const Values & lhs, const Values & rhs)
 {
-    chassert(lhs.size() == rhs.size());
+    size_t size = std::min(lhs.size(), rhs.size());

-    for (size_t i = 0; i < lhs.size(); ++i)
+    for (size_t i = 0; i < size; ++i)
    {
        if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i]))
            return -1;
@ -124,8 +124,9 @@ public:
    Values getValue(size_t part_idx, size_t mark) const
    {
        const auto & index = parts[part_idx].data_part->getIndex();
-        Values values(index.size());
-        for (size_t i = 0; i < values.size(); ++i)
+        size_t size = index.size();
+        Values values(size);
+        for (size_t i = 0; i < size; ++i)
        {
            index[i]->get(mark, values[i]);
            if (values[i].isNull())
--- a/src/Storages/MergeTree/CMakeLists.txt
+++ b/src/Storages/MergeTree/CMakeLists.txt
@ -1,3 +0,0 @@
-if(ENABLE_EXAMPLES)
-    add_subdirectory(examples)
-endif()
--- a/src/Storages/MergeTree/ColumnSizeEstimator.h
+++ b/src/Storages/MergeTree/ColumnSizeEstimator.h
@ -1,7 +1,6 @@
 #pragma once

 #include <Storages/MergeTree/IMergeTreeDataPart.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>


 namespace DB
@ -10,7 +9,7 @@ namespace DB
 /* Allow to compute more accurate progress statistics */
 class ColumnSizeEstimator
 {
-    using ColumnToSize = MergeTreeDataPartInMemory::ColumnToSize;
+    using ColumnToSize = std::map<String, UInt64>;
    ColumnToSize map;
 public:

--- a/src/Storages/MergeTree/DataPartsExchange.cpp
+++ b/src/Storages/MergeTree/DataPartsExchange.cpp
@ -10,7 +10,6 @@
 #include <IO/S3Common.h>
 #include <Server/HTTP/HTMLForm.h>
 #include <Server/HTTP/HTTPServerResponse.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergedBlockOutputStream.h>
 #include <Storages/MergeTree/ReplicatedFetchList.h>
 #include <Storages/StorageReplicatedMergeTree.h>
@ -44,10 +43,8 @@ namespace ErrorCodes
    extern const int CANNOT_WRITE_TO_OSTREAM;
    extern const int CHECKSUM_DOESNT_MATCH;
    extern const int INSECURE_PATH;
-    extern const int CORRUPTED_DATA;
    extern const int LOGICAL_ERROR;
    extern const int S3_ERROR;
-    extern const int INCORRECT_PART_TYPE;
    extern const int ZERO_COPY_REPLICATION_ERROR;
 }

@ -191,8 +188,6 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
        }

        if (data_settings->allow_remote_fs_zero_copy_replication &&
-            /// In memory data part does not have metadata yet.
-            !isInMemoryPart(part) &&
            client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY)
        {
            auto disk_type = part->getDataPartStorage().getDiskType();
@ -205,11 +200,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
            }
        }

-        if (isInMemoryPart(part))
-            sendPartFromMemory(part, out, send_projections);
-        else
-            sendPartFromDisk(part, out, client_protocol_version, false, send_projections);
-
+        sendPartFromDisk(part, out, client_protocol_version, false, send_projections);
        data.addLastSentPart(part->info);
    }
    catch (const NetException &)
@ -231,36 +222,6 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write
    }
 }

-void Service::sendPartFromMemory(
-    const MergeTreeData::DataPartPtr & part, WriteBuffer & out, bool send_projections)
-{
-    auto metadata_snapshot = data.getInMemoryMetadataPtr();
-    if (send_projections)
-    {
-        for (const auto & [name, projection] : part->getProjectionParts())
-        {
-            auto projection_sample_block = metadata_snapshot->projections.get(name).sample_block;
-            auto part_in_memory = asInMemoryPart(projection);
-            if (!part_in_memory)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection {} of part {} is not stored in memory", name, part->name);
-
-            writeStringBinary(name, out);
-            projection->checksums.write(out);
-            NativeWriter block_out(out, 0, projection_sample_block);
-            block_out.write(part_in_memory->block);
-        }
-    }
-
-    auto part_in_memory = asInMemoryPart(part);
-    if (!part_in_memory)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} is not stored in memory", part->name);
-
-    NativeWriter block_out(out, 0, metadata_snapshot->getSampleBlock());
-    part->checksums.write(out);
-    block_out.write(part_in_memory->block);
-
-    data.getSendsThrottler()->add(part_in_memory->block.bytes());
-}

 MergeTreeData::DataPart::Checksums Service::sendPartFromDisk(
    const MergeTreeData::DataPartPtr & part,
@ -642,8 +603,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
                            remote_fs_metadata, fmt::join(capability, ", "));
        if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version);
-        if (part_type == PartType::InMemory)
-            throw Exception(ErrorCodes::INCORRECT_PART_TYPE, "Got 'remote_fs_metadata' cookie for in-memory part");

        try
        {
@ -702,7 +661,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
    }

    auto storage_id = data.getStorageID();
-    String new_part_path = part_type == PartType::InMemory ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / "";
+    String new_part_path = fs::path(data.getFullPathOnDisk(disk)) / part_name / "";
    auto entry = data.getContext()->getReplicatedFetchList().insert(
        storage_id.getDatabaseName(), storage_id.getTableName(),
        part_info.partition_id, part_name, new_part_path,
@ -710,22 +669,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected

    in->setNextCallback(ReplicatedFetchReadCallback(*entry));

-    if (part_type == PartType::InMemory)
-    {
-        auto volume = std::make_shared<SingleDiskVolume>("volume_" + part_name, disk, 0);
-
-        auto data_part_storage = std::make_shared<DataPartStorageOnDiskFull>(
-            volume,
-            data.getRelativeDataPath(),
-            part_name);
-
-        return std::make_pair(downloadPartToMemory(
-            data_part_storage, part_name,
-            MergeTreePartInfo::fromPartName(part_name, data.format_version),
-            part_uuid, metadata_snapshot, context, *in,
-            projections, false, throttler), std::move(temporary_directory_lock));
-    }
-
    auto output_buffer_getter = [](IDataPartStorage & part_storage, const String & file_name, size_t file_size)
    {
        return part_storage.writeFile(file_name, std::min<UInt64>(file_size, DBMS_DEFAULT_BUFFER_SIZE), {});
@ -737,65 +680,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> Fetcher::fetchSelected
        projections, throttler, sync),std::move(temporary_directory_lock));
 }

-MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory(
-    MutableDataPartStoragePtr data_part_storage,
-    const String & part_name,
-    const MergeTreePartInfo & part_info,
-    const UUID & part_uuid,
-    const StorageMetadataPtr & metadata_snapshot,
-    ContextPtr context,
-    ReadWriteBufferFromHTTP & in,
-    size_t projections,
-    bool is_projection,
-    ThrottlerPtr throttler)
-{
-    auto new_data_part = std::make_shared<MergeTreeDataPartInMemory>(data, part_name, part_info, data_part_storage);
-
-    for (size_t i = 0; i < projections; ++i)
-    {
-        String projection_name;
-        readStringBinary(projection_name, in);
-
-        MergeTreePartInfo new_part_info("all", 0, 0, 0);
-        auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj");
-
-        auto new_projection_part = downloadPartToMemory(
-            projection_part_storage, projection_name,
-            new_part_info, part_uuid, metadata_snapshot,
-            context, in, 0, true, throttler);
-
-        new_data_part->addProjectionPart(projection_name, std::move(new_projection_part));
-    }
-
-    MergeTreeData::DataPart::Checksums checksums;
-    if (!checksums.read(in))
-        throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot deserialize checksums");
-
-    NativeReader block_in(in, 0);
-    auto block = block_in.read();
-    throttler->add(block.bytes());
-
-    new_data_part->setColumns(block.getNamesAndTypesList(), {}, metadata_snapshot->getMetadataVersion());
-
-    if (!is_projection)
-    {
-        new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
-        new_data_part->uuid = part_uuid;
-        new_data_part->is_temp = true;
-        new_data_part->minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()));
-        new_data_part->partition.create(metadata_snapshot, block, 0, context);
-    }
-
-    MergedBlockOutputStream part_out(
-        new_data_part, metadata_snapshot, block.getNamesAndTypesList(), {}, {},
-        CompressionCodecFactory::instance().get("NONE", {}), NO_TRANSACTION_PTR);
-
-    part_out.write(block);
-    part_out.finalizePart(new_data_part, false);
-    new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true);
-
-    return new_data_part;
-}

 void Fetcher::downloadBaseOrProjectionPartToDisk(
    const String & replica_path,
--- a/src/Storages/MergeTree/DataPartsExchange.h
+++ b/src/Storages/MergeTree/DataPartsExchange.h
@ -40,10 +40,6 @@ public:

 private:
    MergeTreeData::DataPartPtr findPart(const String & name);
-    void sendPartFromMemory(
-        const MergeTreeData::DataPartPtr & part,
-        WriteBuffer & out,
-        bool send_projections);

    MergeTreeData::DataPart::Checksums sendPartFromDisk(
        const MergeTreeData::DataPartPtr & part,
@ -113,18 +109,6 @@ private:
        ThrottlerPtr throttler,
        bool sync);

-    MergeTreeData::MutableDataPartPtr downloadPartToMemory(
-       MutableDataPartStoragePtr data_part_storage,
-       const String & part_name,
-       const MergeTreePartInfo & part_info,
-       const UUID & part_uuid,
-       const StorageMetadataPtr & metadata_snapshot,
-       ContextPtr context,
-       ReadWriteBufferFromHTTP & in,
-       size_t projections,
-       bool is_projection,
-       ThrottlerPtr throttler);
-
    MergeTreeData::MutableDataPartPtr downloadPartToDiskRemoteMeta(
       const String & part_name,
       const String & replica_path,
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -51,7 +51,6 @@ namespace CurrentMetrics

    extern const Metric PartsWide;
    extern const Metric PartsCompact;
-    extern const Metric PartsInMemory;
 }

 namespace DB
@ -278,9 +277,6 @@ static void incrementTypeMetric(MergeTreeDataPartType type)
        case MergeTreeDataPartType::Compact:
            CurrentMetrics::add(CurrentMetrics::PartsCompact);
            return;
-        case MergeTreeDataPartType::InMemory:
-            CurrentMetrics::add(CurrentMetrics::PartsInMemory);
-            return;
        case MergeTreeDataPartType::Unknown:
            return;
    }
@ -296,9 +292,6 @@ static void decrementTypeMetric(MergeTreeDataPartType type)
        case MergeTreeDataPartType::Compact:
            CurrentMetrics::sub(CurrentMetrics::PartsCompact);
            return;
-        case MergeTreeDataPartType::InMemory:
-            CurrentMetrics::sub(CurrentMetrics::PartsInMemory);
-            return;
        case MergeTreeDataPartType::Unknown:
            return;
    }
@ -844,6 +837,27 @@ void IMergeTreeDataPart::loadIndex() const
            for (size_t j = 0; j < key_size; ++j)
                key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {});

+        /// Cut useless suffix columns, if necessary.
+        Float64 ratio_to_drop_suffix_columns = storage.getSettings()->primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns;
+        if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1)
+        {
+            chassert(marks_count > 0);
+            for (size_t j = 0; j < key_size - 1; ++j)
+            {
+                size_t num_changes = 0;
+                for (size_t i = 1; i < marks_count; ++i)
+                    if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0))
+                        ++num_changes;
+
+                if (static_cast<Float64>(num_changes) / marks_count >= ratio_to_drop_suffix_columns)
+                {
+                    key_size = j + 1;
+                    loaded_index.resize(key_size);
+                    break;
+                }
+            }
+        }
+
        for (size_t i = 0; i < key_size; ++i)
        {
            loaded_index[i]->shrinkToFit();
@ -2207,11 +2221,6 @@ bool isWidePart(const MergeTreeDataPartPtr & data_part)
    return (data_part && data_part->getType() == MergeTreeDataPartType::Wide);
 }

-bool isInMemoryPart(const MergeTreeDataPartPtr & data_part)
-{
-    return (data_part && data_part->getType() == MergeTreeDataPartType::InMemory);
-}
-
 std::optional<std::string> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage)
 {
    if (data_part_storage.exists())
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -710,7 +710,6 @@ using MergeTreeMutableDataPartPtr = std::shared_ptr<IMergeTreeDataPart>;

 bool isCompactPart(const MergeTreeDataPartPtr & data_part);
 bool isWidePart(const MergeTreeDataPartPtr & data_part);
-bool isInMemoryPart(const MergeTreeDataPartPtr & data_part);

 inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; }
 std::optional<String> getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage);
--- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h
@ -34,8 +34,6 @@ public:

    virtual bool isWidePart() const = 0;

-    virtual bool isInMemoryPart() const = 0;
-
    virtual bool isProjectionPart() const = 0;

    virtual DataPartStoragePtr getDataPartStorage() const = 0;
--- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h
+++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h
@ -22,8 +22,6 @@ public:

    bool isWidePart() const override { return DB::isWidePart(data_part); }

-    bool isInMemoryPart() const override { return DB::isInMemoryPart(data_part); }
-
    bool isProjectionPart() const override { return data_part->isProjectionPart(); }

    DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); }
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@ -309,7 +309,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
            ctx->rows_sources_uncompressed_write_buf = ctx->tmp_disk->createRawStream();
            ctx->rows_sources_write_buf = std::make_unique<CompressedWriteBuffer>(*ctx->rows_sources_uncompressed_write_buf);

-            MergeTreeDataPartInMemory::ColumnToSize local_merged_column_to_size;
+            std::map<String, UInt64> local_merged_column_to_size;
            for (const MergeTreeData::DataPartPtr & part : global_ctx->future_part->parts)
                part->accumulateColumnSizes(local_merged_column_to_size);

--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -72,7 +72,6 @@
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/Statistics/Estimator.h>
 #include <Storages/MergeTree/MergeTreeSelectProcessor.h>
 #include <Storages/MergeTree/checkDataPart.h>
@ -1707,8 +1706,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
            {
                /// Skip temporary directories, file 'format_version.txt' and directory 'detached'.
                if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME
-                    || it->name() == MergeTreeData::DETACHED_DIR_NAME
-                    || startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME))
+                    || it->name() == MergeTreeData::DETACHED_DIR_NAME)
                    continue;

                if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version))
@ -2261,7 +2259,6 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force)
            bool reached_removal_time = part_remove_time <= time_now && time_now - part_remove_time >= getSettings()->old_parts_lifetime.totalSeconds();
            if ((reached_removal_time && !has_skipped_mutation_parent(part))
                || force
-                || isInMemoryPart(part)     /// Remove in-memory parts immediately to not store excessive data in RAM
                || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately))
            {
                part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed);
@ -5225,14 +5222,14 @@ Pipe MergeTreeData::alterPartition(
            case PartitionCommand::FREEZE_PARTITION:
            {
                auto lock = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout);
-                current_command_results = freezePartition(command.partition, metadata_snapshot, command.with_name, query_context, lock);
+                current_command_results = freezePartition(command.partition, command.with_name, query_context, lock);
            }
            break;

            case PartitionCommand::FREEZE_ALL_PARTITIONS:
            {
                auto lock = lockForShare(query_context->getCurrentQueryId(), query_context->getSettingsRef().lock_acquire_timeout);
-                current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
+                current_command_results = freezeAll(command.with_name, query_context, lock);
            }
            break;

@ -7103,27 +7100,6 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
    scope_guard src_flushed_tmp_dir_lock;
    MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;

-    /// If source part is in memory, flush it to disk and clone it already in on-disk format
-    /// Protect tmp dir from removing by cleanup thread with src_flushed_tmp_dir_lock
-    /// Construct src_flushed_tmp_part in order to delete part with its directory at destructor
-    if (auto src_part_in_memory = asInMemoryPart(src_part))
-    {
-        auto flushed_part_path = *src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix);
-
-        auto tmp_src_part_file_name = fs::path(tmp_dst_part_name).filename();
-        src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name);
-
-        auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
-
-        src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage)
-            .withPartInfo(src_part->info)
-            .withPartFormatFromDisk()
-            .build();
-
-        src_flushed_tmp_part->is_temp = true;
-        src_part_storage = flushed_part_storage;
-    }
-
    String with_copy;
    if (params.copy_instead_of_hardlink)
        with_copy = " (copying data)";
@ -7305,26 +7281,23 @@ MergeTreeData::MatcherFn MergeTreeData::getPartitionMatcher(const ASTPtr & parti

 PartitionCommandsResultInfo MergeTreeData::freezePartition(
    const ASTPtr & partition_ast,
-    const StorageMetadataPtr & metadata_snapshot,
    const String & with_name,
    ContextPtr local_context,
    TableLockHolder &)
 {
-    return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, local_context), metadata_snapshot, with_name, local_context);
+    return freezePartitionsByMatcher(getPartitionMatcher(partition_ast, local_context), with_name, local_context);
 }

 PartitionCommandsResultInfo MergeTreeData::freezeAll(
    const String & with_name,
-    const StorageMetadataPtr & metadata_snapshot,
    ContextPtr local_context,
    TableLockHolder &)
 {
-    return freezePartitionsByMatcher([] (const String &) { return true; }, metadata_snapshot, with_name, local_context);
+    return freezePartitionsByMatcher([] (const String &) { return true; }, with_name, local_context);
 }

 PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
    MatcherFn matcher,
-    const StorageMetadataPtr & metadata_snapshot,
    const String & with_name,
    ContextPtr local_context)
 {
@ -7376,22 +7349,6 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher(
        scope_guard src_flushed_tmp_dir_lock;
        MergeTreeData::MutableDataPartPtr src_flushed_tmp_part;

-        if (auto part_in_memory = asInMemoryPart(part))
-        {
-            auto flushed_part_path = *part_in_memory->getRelativePathForPrefix("tmp_freeze");
-            src_flushed_tmp_dir_lock = part->storage.getTemporaryPartDirectoryHolder("tmp_freeze" + part->name);
-
-            auto flushed_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot);
-
-            src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, part->name, flushed_part_storage)
-                .withPartInfo(part->info)
-                .withPartFormatFromDisk()
-                .build();
-
-            src_flushed_tmp_part->is_temp = true;
-            data_part_storage = flushed_part_storage;
-        }
-
        auto callback = [this, &part, &backup_part_path](const DiskPtr & disk)
        {
            // Store metadata for replicated table.
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -23,9 +23,7 @@
 #include <Storages/MergeTree/MergeList.h>
 #include <Storages/MergeTree/IMergeTreeDataPart.h>
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreePartsMover.h>
-#include <Storages/MergeTree/MergeTreeWriteAheadLog.h>
 #include <Storages/MergeTree/PinnedPartUUIDs.h>
 #include <Storages/MergeTree/ZeroCopyLock.h>
 #include <Storages/MergeTree/TemporaryParts.h>
@ -752,7 +750,6 @@ public:
      */
    PartitionCommandsResultInfo freezePartition(
        const ASTPtr & partition,
-        const StorageMetadataPtr & metadata_snapshot,
        const String & with_name,
        ContextPtr context,
        TableLockHolder & table_lock_holder);
@ -760,7 +757,6 @@ public:
    /// Freezes all parts.
    PartitionCommandsResultInfo freezeAll(
        const String & with_name,
-        const StorageMetadataPtr & metadata_snapshot,
        ContextPtr context,
        TableLockHolder & table_lock_holder);

@ -1307,7 +1303,7 @@ protected:
    bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const;

    /// Common part for |freezePartition()| and |freezeAll()|.
-    PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, ContextPtr context);
+    PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const String & with_name, ContextPtr context);
    PartitionCommandsResultInfo unfreezePartitionsByMatcher(MatcherFn matcher, const String & backup_name, ContextPtr context);

    // Partition helpers
--- a/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp
@ -1,5 +1,4 @@
 #include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
 #include <Storages/MergeTree/MergeTreeDataPartCompact.h>
 #include <Storages/MergeTree/MergeTreeDataPartWide.h>
 #include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
@ -64,8 +63,6 @@ std::shared_ptr<IMergeTreeDataPart> MergeTreeDataPartBuilder::build()
            return std::make_shared<MergeTreeDataPartWide>(data, name, *part_info, part_storage, parent_part);
        case PartType::Compact:
            return std::make_shared<MergeTreeDataPartCompact>(data, name, *part_info, part_storage, parent_part);
-        case PartType::InMemory:
-            return std::make_shared<MergeTreeDataPartInMemory>(data, name, *part_info, part_storage, parent_part);
        default:
            throw Exception(ErrorCodes::UNKNOWN_PART_TYPE,
                "Unknown type of part {}", part_storage->getRelativePath());
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@ -1,115 +0,0 @@
-#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
-#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
-#include <Storages/MergeTree/MergedBlockOutputStream.h>
-#include <Storages/MergeTree/IMergeTreeReader.h>
-#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
-#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
-#include <DataTypes/NestedUtils.h>
-#include <Disks/createVolume.h>
-#include <Interpreters/Context.h>
-#include <Poco/Logger.h>
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
-MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
-        const MergeTreeData & storage_,
-        const String & name_,
-        const MergeTreePartInfo & info_,
-        const MutableDataPartStoragePtr & data_part_storage_,
-        const IMergeTreeDataPart * parent_part_)
-    : IMergeTreeDataPart(storage_, name_, info_, data_part_storage_, Type::InMemory, parent_part_)
-{
-    default_codec = CompressionCodecFactory::instance().get("NONE", {});
-}
-
-IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
-    const NamesAndTypesList & columns_to_read,
-    const StorageSnapshotPtr & storage_snapshot,
-    const MarkRanges & mark_ranges,
-    const VirtualFields & virtual_fields,
-    UncompressedCache * /* uncompressed_cache */,
-    MarkCache * /* mark_cache */,
-    const AlterConversionsPtr & alter_conversions,
-    const MergeTreeReaderSettings & reader_settings,
-    const ValueSizeMap & /* avg_value_size_hints */,
-    const ReadBufferFromFileBase::ProfileCallback & /* profile_callback */) const
-{
-    auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this(), alter_conversions);
-    auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
-
-    return std::make_unique<MergeTreeReaderInMemory>(
-        read_info,
-        ptr,
-        columns_to_read,
-        virtual_fields,
-        storage_snapshot,
-        mark_ranges,
-        reader_settings);
-}
-
-IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
-    const NamesAndTypesList &,
-    const StorageMetadataPtr &,
-    const std::vector<MergeTreeIndexPtr> &,
-    const Statistics &,
-    const CompressionCodecPtr &,
-    const MergeTreeWriterSettings &,
-    const MergeTreeIndexGranularity &)
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "In-memory data parts are obsolete and no longer supported for writing");
-}
-
-MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &, const StorageMetadataPtr &) const
-{
-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "In-memory data parts are obsolete and no longer supported for writing");
-}
-
-DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix,
-                                                                  const StorageMetadataPtr & metadata_snapshot,
-                                                                  const DiskTransactionPtr & disk_transaction) const
-{
-    if (disk_transaction)
-        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions");
-    String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false);
-    return flushToDisk(detached_path, metadata_snapshot);
-}
-
-void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */)
-{
-    getDataPartStorage().setRelativePath(new_relative_path);
-}
-
-void MergeTreeDataPartInMemory::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const
-{
-    auto it = checksums.files.find("data.bin");
-    if (it != checksums.files.end())
-        total_size.data_uncompressed += it->second.uncompressed_size;
-
-    for (const auto & column : columns)
-        each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize();
-}
-
-IMergeTreeDataPart::Checksum MergeTreeDataPartInMemory::calculateBlockChecksum() const
-{
-    SipHash hash;
-    IMergeTreeDataPart::Checksum checksum;
-    for (const auto & column : block)
-        column.column->updateHashFast(hash);
-
-    checksum.uncompressed_size = block.bytes();
-    checksum.uncompressed_hash = getSipHash128AsPair(hash);
-    return checksum;
-}
-
-DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part)
-{
-    return std::dynamic_pointer_cast<const MergeTreeDataPartInMemory>(part);
-}
-}
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@ -1,70 +0,0 @@
-#pragma once
-
-#include <Storages/MergeTree/IMergeTreeDataPart.h>
-
-namespace DB
-{
-
-class UncompressedCache;
-
-class MergeTreeDataPartInMemory : public IMergeTreeDataPart
-{
-public:
-    MergeTreeDataPartInMemory(
-        const MergeTreeData & storage_,
-        const String & name_,
-        const MergeTreePartInfo & info_,
-        const MutableDataPartStoragePtr & data_part_storage_,
-        const IMergeTreeDataPart * parent_part_ = nullptr);
-
-    MergeTreeReaderPtr getReader(
-        const NamesAndTypesList & columns,
-        const StorageSnapshotPtr & storage_snapshot,
-        const MarkRanges & mark_ranges,
-        const VirtualFields & virtual_fields,
-        UncompressedCache * uncompressed_cache,
-        MarkCache * mark_cache,
-        const AlterConversionsPtr & alter_conversions,
-        const MergeTreeReaderSettings & reader_settings_,
-        const ValueSizeMap & avg_value_size_hints,
-        const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override;
-
-    MergeTreeWriterPtr getWriter(
-        const NamesAndTypesList & columns_list,
-        const StorageMetadataPtr & metadata_snapshot,
-        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
-        const Statistics & stats_to_recalc_,
-        const CompressionCodecPtr & default_codec_,
-        const MergeTreeWriterSettings & writer_settings,
-        const MergeTreeIndexGranularity & computed_index_granularity) override;
-
-    bool isStoredOnDisk() const override { return false; }
-    bool isStoredOnRemoteDisk() const override { return false; }
-    bool isStoredOnRemoteDiskWithZeroCopySupport() const override { return false; }
-    bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); }
-    std::optional<String> getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
-    void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
-    DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
-                                           const DiskTransactionPtr & disk_transaction) const override;
-    std::optional<time_t> getColumnModificationTime(const String & /* column_name */) const override { return {}; }
-
-    MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
-
-    /// Returns hash of parts's block
-    Checksum calculateBlockChecksum() const;
-
-    mutable Block block;
-
-private:
-    mutable std::condition_variable is_merged;
-
-    /// Calculates uncompressed sizes in memory.
-    void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
-};
-
-using DataPartInMemoryPtr = std::shared_ptr<const MergeTreeDataPartInMemory>;
-using MutableDataPartInMemoryPtr = std::shared_ptr<MergeTreeDataPartInMemory>;
-
-DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part);
-
-}
--- a/src/Storages/MergeTree/MergeTreeDataPartType.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartType.h
@ -42,9 +42,6 @@ public:
        /// Data of all columns is stored in one file. Marks are also stored in single file.
        Compact,

-        /// Format with buffering data in RAM. Obsolete - new parts cannot be created in this format.
-        InMemory,
-
        Unknown,
    };

--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@ -1,5 +1,8 @@
 #pragma once
+
 #include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
+#include <Formats/MarkInCompressedFile.h>
+

 namespace DB
 {
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@ -1018,7 +1018,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
    DataTypes key_types;
    for (size_t i : key_indices)
    {
-        index_columns->emplace_back(ColumnWithTypeAndName{index[i], primary_key.data_types[i], primary_key.column_names[i]});
+        if (i < index.size())
+            index_columns->emplace_back(index[i], primary_key.data_types[i], primary_key.column_names[i]);
+        else
+            index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it.
+
        key_types.emplace_back(primary_key.data_types[i]);
    }

@ -1027,7 +1031,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
    std::function<void(size_t, size_t, FieldRef &)> create_field_ref;
    if (key_condition.hasMonotonicFunctionsChain())
    {
-
        create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field)
        {
            field = {index_columns.get(), row, column};
@ -1067,7 +1070,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
            {
                for (size_t i = 0; i < used_key_size; ++i)
                {
-                    create_field_ref(range.begin, i, index_left[i]);
+                    if ((*index_columns)[i].column)
+                        create_field_ref(range.begin, i, index_left[i]);
+                    else
+                        index_left[i] = NEGATIVE_INFINITY;
+
                    index_right[i] = POSITIVE_INFINITY;
                }
            }
@ -1078,8 +1085,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(

                for (size_t i = 0; i < used_key_size; ++i)
                {
-                    create_field_ref(range.begin, i, index_left[i]);
-                    create_field_ref(range.end, i, index_right[i]);
+                    if ((*index_columns)[i].column)
+                    {
+                        create_field_ref(range.begin, i, index_left[i]);
+                        create_field_ref(range.end, i, index_right[i]);
+                    }
+                    else
+                    {
+                        /// If the PK column was not loaded in memory - exclude it from the analysis.
+                        index_left[i] = NEGATIVE_INFINITY;
+                        index_right[i] = POSITIVE_INFINITY;
+                    }
                }
            }
            key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types);
@ -1114,6 +1130,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
    bool part_offset_condition_exact_range
        = !part_offset_condition || part_offset_condition->alwaysUnknownOrTrue() || part_offset_condition->matchesExactContinuousRange();
    const String & part_name = part->isProjectionPart() ? fmt::format("{}.{}", part->name, part->getParentPart()->name) : part->name;
+
    if (!key_condition_exact_range || !part_offset_condition_exact_range)
    {
        // Do exclusion search, where we drop ranges that do not match
@ -1128,10 +1145,10 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
            part->index_granularity_info.index_granularity_bytes);

        /** There will always be disjoint suspicious segments on the stack, the leftmost one at the top (back).
-        * At each step, take the left segment and check if it fits.
-        * If fits, split it into smaller ones and put them on the stack. If not, discard it.
-        * If the segment is already of one mark length, add it to response and discard it.
-        */
+          * At each step, take the left segment and check if it fits.
+          * If fits, split it into smaller ones and put them on the stack. If not, discard it.
+          * If the segment is already of one mark length, add it to response and discard it.
+          */
        std::vector<MarkRange> ranges_stack = { {0, marks_count} };

        size_t steps = 0;
@ -1141,7 +1158,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
            MarkRange range = ranges_stack.back();
            ranges_stack.pop_back();

-            steps++;
+            ++steps;

            if (!may_be_true_in_range(range))
                continue;
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`int mainEntryClickHouseClusterCopier(int argc, char ** argv);`