Merge branch 'async-loader-integration' of github.com:ClickHouse/ClickHouse into async-loader-integration

2024-11-24 16:42:05 +00:00 · 2023-06-03 10:53:03 +00:00 · 2023-06-03 10:53:03 +00:00 · 1f5b23898b
commit 1f5b23898b
parent 47d70db2de 6aadcffe59
72 changed files with 2264 additions and 773 deletions
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -28,6 +28,19 @@ uint64_t getMemoryAmountOrZero()

 #if defined(OS_LINUX)
    // Try to lookup at the Cgroup limit
+
+    // CGroups v2
+    std::ifstream cgroupv2_limit("/sys/fs/cgroup/memory.max");
+    if (cgroupv2_limit.is_open())
+    {
+        uint64_t memory_limit = 0;
+        cgroupv2_limit >> memory_limit;
+        if (memory_limit > 0 && memory_limit < memory_amount)
+            memory_amount = memory_limit;
+    }
+    else
+    {
+        // CGroups v1
        std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
        if (cgroup_limit.is_open())
        {
@ -36,6 +49,7 @@ uint64_t getMemoryAmountOrZero()
            if (memory_limit > 0 && memory_limit < memory_amount)
                memory_amount = memory_limit;
        }
+    }
 #endif

    return memory_amount;
--- a/docs/en/operations/utilities/clickhouse-keeper-client.md
+++ b/docs/en/operations/utilities/clickhouse-keeper-client.md
@ -0,0 +1,53 @@
+---
+slug: /en/operations/utilities/clickhouse-keeper-client
+sidebar_label: clickhouse-keeper-client
+---
+
+# clickhouse-keeper-client
+
+A client application to interact with clickhouse-keeper by its native protocol.
+
+## Keys {#clickhouse-keeper-client}
+
+-   `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode.
+-   `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`.
+-   `-p N`, `--port=N` — Server port. Default value: 2181
+-   `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s.
+-   `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
+-   `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
+-   `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
+-   `--help` — Shows the help message.
+
+## Example {#clickhouse-keeper-client-example}
+
+```bash
+./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30
+Connected to ZooKeeper at [::1]:2181 with session_id 137
+/ :) ls
+keeper foo bar
+/ :) cd keeper
+/keeper :) ls
+api_version
+/keeper :) cd api_version
+/keeper/api_version :) ls
+
+/keeper/api_version :) cd xyz
+Path /keeper/api_version/xyz does not exists
+/keeper/api_version :) cd ../../
+/ :) ls
+keeper foo bar
+/ :) get keeper/api_version
+2
+```
+
+## Commands {#clickhouse-keeper-client-commands}
+
+-   `ls [path]` -- Lists the nodes for the given path (default: cwd)
+-   `cd [path]` -- Change the working path (default `.`)
+-   `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
+-   `create <path> <value>` -- Creates new node
+-   `get <path>` -- Returns the node's value
+-   `remove <path>` -- Remove the node
+-   `rmr <path>` -- Recursively deletes path. Confirmation required
+-   `flwc <command>` -- Executes four-letter-word command
+-   `help` -- Prints this message
--- a/docs/en/sql-reference/functions/geo/svg.md
+++ b/docs/en/sql-reference/functions/geo/svg.md
@ -0,0 +1,52 @@
+---
+slug: /en/sql-reference/functions/geo/svg
+sidebar_label: SVG
+title: "Functions for Generating SVG images from Geo data"
+---
+
+## Syntax
+
+``` sql
+SVG(geometry,[style])
+```
+
+### Parameters
+
+- `geometry` — Geo data
+- `style` — Optional style name
+
+### Returned value
+
+- The SVG representation of the geometry:
+  - SVG circle
+  - SVG polygon
+  - SVG path
+
+Type: String
+
+## Examples
+
+### Circle
+```sql
+SELECT SVG((0., 0.))
+```
+```response
+<circle cx="0" cy="0" r="5" style=""/>
+```
+
+### Polygon
+```sql
+SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)])
+```
+```response
+<polygon points="0,0 0,10 10,10 10,0 0,0" style=""/>
+```
+
+### Path
+```sql
+SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]])
+```
+```response
+<g fill-rule="evenodd"><path d="M 0,0 L 0,10 L 10,10 L 10,0 L 0,0M 4,4 L 5,4 L 5,5 L 4,5 L 4,4 z " style=""/></g>
+```
+
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@ -560,77 +560,6 @@ Result:
 └───────────────────────────┘
 ```

-## Entropy-learned hashing (experimental)
-
-Entropy-learned hashing is disabled by default, to enable: `SET allow_experimental_hash_functions=1`.
-
-Entropy-learned hashing is not a standalone hash function like `metroHash64`, `cityHash64`, `sipHash64` etc. Instead, it aims to preprocess
-the data to be hashed in a way that a standalone hash function can be computed more efficiently while not compromising the hash quality,
-i.e. the randomness of the hashes. For that, entropy-based hashing chooses a subset of the bytes in a training data set of Strings which has
-the same randomness (entropy) as the original Strings. For example, if the Strings are in average 100 bytes long, and we pick a subset of 5
-bytes, then a hash function will be 95% less expensive to evaluate. For details of the method, refer to [Entropy-Learned Hashing: Constant
-Time Hashing with Controllable Uniformity](https://doi.org/10.1145/3514221.3517894).
-
-Entropy-learned hashing has two phases:
-
-1. A training phase on a representative but typically small set of Strings to be hashed. Training consists of two steps:
-
-   - Function `prepareTrainEntropyLearnedHash(data, id)` caches the training data in a global state under a given `id`. It returns dummy
-     value `0` on every row.
-   - Function `trainEntropyLearnedHash(id)` computes a minimal partial sub-key of the training data stored stored under `id` in the global
-     state. The cached training data in the global state is replaced by the partial key. Dummy value `0` is returned on every row.
-
-2. An evaluation phase where hashes are computed using the previously calculated partial sub-keys. Function `entropyLearnedHash(data, id)`
-   hashes `data` using the partial subkey stored as `id`. CityHash64 is used as hash function.
-
-The reason that the training phase comprises two steps is that ClickHouse processes data at chunk granularity but entropy-learned hashing
-needs to process the entire training set at once.
-
-Since functions `prepareTrainEntropyLearnedHash()` and `trainEntropyLearnedHash()` access global state, they should not be called in
-parallel with the same `id`.
-
-**Syntax**
-
-``` sql
-prepareTrainEntropyLearnedHash(data, id);
-trainEntropyLearnedHash(id);
-entropyLearnedHash(data, id);
-```
-
-**Example**
-
-```sql
-SET allow_experimental_hash_functions=1;
-CREATE TABLE tab (col String) ENGINE=Memory;
-INSERT INTO tab VALUES ('aa'), ('ba'), ('ca');
-
-SELECT prepareTrainEntropyLearnedHash(col, 'id1') AS prepared FROM tab;
-SELECT trainEntropyLearnedHash('id1') AS trained FROM tab;
-SELECT entropyLearnedHash(col, 'id1') as hashes FROM tab;
-```
-
-Result:
-
-``` response
-┌─prepared─┐
-│        0 │
-│        0 │
-│        0 │
-└──────────┘
-
-┌─trained─┐
-│       0 │
-│       0 │
-│       0 │
-└─────────┘
-
-┌───────────────hashes─┐
-│  2603192927274642682 │
-│  4947675599669400333 │
-│ 10783339242466472992 │
-└──────────────────────┘
-```
-
 ## metroHash64

 Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/) hash value.
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -2480,3 +2480,75 @@ Result:
 │                      286 │
 └──────────────────────────┘
 ```
+
+## generateRandomStructure
+
+Generates random table structure in a format `column1_name column1_type, column2_name column2_type, ...`.
+
+**Syntax**
+
+``` sql
+generateRandomStructure([number_of_columns, seed])
+```
+
+**Arguments**
+
+- `number_of_columns` — The desired number of columns in the result table structure. If set to 0 or `Null`, the number of columns will be random from 1 to 128. Default value: `Null`.
+- `seed` - Random seed to produce stable results. If seed is not specified or set to `Null`, it is randomly generated.
+
+All arguments must be constant.
+
+**Returned value**
+
+- Randomly generated table structure.
+
+Type: [String](../../sql-reference/data-types/string.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT generateRandomStructure()
+```
+
+Result:
+
+``` text
+┌─generateRandomStructure()─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ c1 Decimal32(5), c2 Date, c3 Tuple(LowCardinality(String), Int128, UInt64, UInt16, UInt8, IPv6), c4 Array(UInt128), c5 UInt32, c6 IPv4, c7 Decimal256(64), c8 Decimal128(3), c9 UInt256, c10 UInt64, c11 DateTime │
+└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT generateRandomStructure(1)
+```
+
+Result:
+
+``` text
+┌─generateRandomStructure(1)─┐
+│ c1 Map(UInt256, UInt16)    │
+└────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT generateRandomStructure(NULL, 33)
+```
+
+Result:
+
+``` text
+┌─generateRandomStructure(NULL, 33)─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ c1 DateTime, c2 Enum8('c2V0' = 0, 'c2V1' = 1, 'c2V2' = 2, 'c2V3' = 3), c3 LowCardinality(Nullable(FixedString(30))), c4 Int16, c5 Enum8('c5V0' = 0, 'c5V1' = 1, 'c5V2' = 2, 'c5V3' = 3), c6 Nullable(UInt8), c7 String, c8 Nested(e1 IPv4, e2 UInt8, e3 UInt16, e4 UInt16, e5 Int32, e6 Map(Date, Decimal256(70))) │
+└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+**Note**: the maximum nesting depth of complex types (Array, Tuple, Map, Nested) is limited to 16.
+
+This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables.
+
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -119,19 +119,35 @@ The compiled expression cache is enabled/disabled with the query/user/profile-le

 Resets the [query cache](../../operations/query-cache.md).

+```sql
+SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name]
+```
+
 ## FLUSH LOGS

 Flushes buffered log messages to system tables, e.g. system.query_log. Mainly useful for debugging since most system tables have a default flush interval of 7.5 seconds.
 This will also create system tables even if message queue is empty.

+```sql
+SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
+```
+
 ## RELOAD CONFIG

 Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. Note that `SYSTEM RELOAD CONFIG` does not reload `USER` configuration stored in ZooKeeper, it only reloads `USER` configuration that is stored in `users.xml`.  To reload all `USER` config use `SYSTEM RELOAD USERS`

+```sql
+SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
+```
+
 ## RELOAD USERS

 Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. 

+```sql
+SYSTEM RELOAD USERS [ON CLUSTER cluster_name]
+```
+
 ## SHUTDOWN

 Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
@ -149,7 +165,7 @@ ClickHouse can manage [distributed](../../engines/table-engines/special/distribu
 Disables background data distribution when inserting data into distributed tables.

 ``` sql
-SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### FLUSH DISTRIBUTED
@ -157,7 +173,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
 Forces ClickHouse to send data to cluster nodes synchronously. If any nodes are unavailable, ClickHouse throws an exception and stops query execution. You can retry the query until it succeeds, which will happen when all nodes are back online.

 ``` sql
-SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
+SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### START DISTRIBUTED SENDS
@ -165,7 +181,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 Enables background data distribution when inserting data into distributed tables.

 ``` sql
-SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ## Managing MergeTree Tables
@ -177,7 +193,7 @@ ClickHouse can manage background processes in [MergeTree](../../engines/table-en
 Provides possibility to stop background merges for tables in the MergeTree family:

 ``` sql
-SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
+SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

 :::note
@ -189,7 +205,7 @@ SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 Provides possibility to start background merges for tables in the MergeTree family:

 ``` sql
-SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
+SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

 ### STOP TTL MERGES
@ -198,7 +214,7 @@ Provides possibility to stop background delete old data according to [TTL expres
 Returns `Ok.` even if table does not exist or table has not MergeTree engine. Returns error when database does not exist:

 ``` sql
-SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START TTL MERGES
@ -207,7 +223,7 @@ Provides possibility to start background delete old data according to [TTL expre
 Returns `Ok.` even if table does not exist. Returns error when database does not exist:

 ``` sql
-SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### STOP MOVES
@ -216,7 +232,7 @@ Provides possibility to stop background move data according to [TTL table expres
 Returns `Ok.` even if table does not exist. Returns error when database does not exist:

 ``` sql
-SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START MOVES
@ -225,7 +241,7 @@ Provides possibility to start background move data according to [TTL table expre
 Returns `Ok.` even if table does not exist. Returns error when database does not exist:

 ``` sql
-SYSTEM START MOVES [[db.]merge_tree_family_table_name]
+SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -241,7 +257,7 @@ SYSTEM UNFREEZE WITH NAME <backup_name>
 Wait until all asynchronously loading data parts of a table (outdated data parts) will became loaded.

 ``` sql
-SYSTEM WAIT LOADING PARTS [db.]merge_tree_family_table_name
+SYSTEM WAIT LOADING PARTS [ON CLUSTER cluster_name] [db.]merge_tree_family_table_name
 ```

 ## Managing ReplicatedMergeTree Tables
@ -254,7 +270,7 @@ Provides possibility to stop background fetches for inserted parts for tables in
 Always returns `Ok.` regardless of the table engine and even if table or database does not exist.

 ``` sql
-SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START FETCHES
@ -263,7 +279,7 @@ Provides possibility to start background fetches for inserted parts for tables i
 Always returns `Ok.` regardless of the table engine and even if table or database does not exist.

 ``` sql
-SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATED SENDS
@ -271,7 +287,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
 Provides possibility to stop background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:

 ``` sql
-SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATED SENDS
@ -279,7 +295,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 Provides possibility to start background sends to other replicas in cluster for new inserted parts for tables in the `ReplicatedMergeTree` family:

 ``` sql
-SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATION QUEUES
@ -287,7 +303,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 Provides possibility to stop background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause:

 ``` sql
-SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATION QUEUES
@ -295,7 +311,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
 Provides possibility to start background fetch tasks from replication queues which stored in Zookeeper for tables in the `ReplicatedMergeTree` family. Possible background tasks types - merges, fetches, mutation, DDL statements with ON CLUSTER clause:

 ``` sql
-SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### SYNC REPLICA
@ -318,7 +334,7 @@ Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMe
 Initialization of replication queue based on ZooKeeper data happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations.

 ``` sql
-SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
+SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
 ```

 ### RESTORE REPLICA
@ -384,7 +400,7 @@ Provides possibility to reinitialize Zookeeper sessions state for all `Replicate
 Allows to drop filesystem cache.

 ```sql
-SYSTEM DROP FILESYSTEM CACHE
+SYSTEM DROP FILESYSTEM CACHE [ON CLUSTER cluster_name]
 ```

 ### SYNC FILE CACHE
@ -396,5 +412,5 @@ It's too heavy and has potential for misuse.
 Will do sync syscall.

 ```sql
-SYSTEM SYNC FILE CACHE
+SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name]
 ```
--- a/docs/en/sql-reference/table-functions/generate.md
+++ b/docs/en/sql-reference/table-functions/generate.md
@ -11,7 +11,7 @@ Allows to populate test tables with data.
 Not all types are supported.

 ``` sql
-generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]])
+generateRandom(['name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]])
 ```

 **Arguments**
@ -53,5 +53,49 @@ SELECT * FROM random;
 └──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘
 ```

+In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generateRandomStructure):
+
+```sql
+SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3;
+```
+
+```text
+┌──────────────────c1─┬──────────────────c2─┬─c3─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c4──────────────────────────────────────┐
+│ 1996-04-15 06:40:05 │ 33954608387.2844801 │ ['232.78.216.176','9.244.59.211','211.21.80.152','44.49.94.109','165.77.195.182','68.167.134.239','212.13.24.185','1.197.255.35','192.55.131.232'] │ 45d9:2b52:ab6:1c59:185b:515:c5b6:b781   │
+│ 2063-01-13 01:22:27 │ 36155064970.9514454 │ ['176.140.188.101']                                                                                                                                │ c65a:2626:41df:8dee:ec99:f68d:c6dd:6b30 │
+│ 2090-02-28 14:50:56 │  3864327452.3901373 │ ['155.114.30.32']                                                                                                                                  │ 57e9:5229:93ab:fbf3:aae7:e0e4:d1eb:86b  │
+└─────────────────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┘
+```
+
+With missing `structure` argument (in this case the structure is random):
+
+```sql
+SELECT * FROM generateRandom() LIMIT 3;
+```
+
+```text
+┌───c1─┬─────────c2─┬─────────────────────c3─┬──────────────────────c4─┬─c5───────┐
+│ -128 │  317300854 │ 2030-08-16 08:22:20.65 │ 1994-08-16 12:08:56.745 │ R0qgiC46 │
+│   40 │ -744906827 │ 2059-04-16 06:31:36.98 │ 1975-07-16 16:28:43.893 │ PuH4M*MZ │
+│  -55 │  698652232 │ 2052-08-04 20:13:39.68 │ 1998-09-20 03:48:29.279 │          │
+└──────┴────────────┴────────────────────────┴─────────────────────────┴──────────┘
+```
+
+With random seed both for random structure and random data:
+
+```sql
+SELECT * FROM generateRandom(11) LIMIT 3;
+```
+
+```text
+┌───────────────────────────────────────c1─┬─────────────────────────────────────────────────────────────────────────────c2─┬─────────────────────────────────────────────────────────────────────────────c3─┬─────────c4─┬─────────────────────────────────────────────────────────────────────────────c5─┬──────────────────────c6─┬─c7──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c8──────────────────────────────────────┬─────────c9─┐
+│  -77422512305044606600216318673365695785 │   636812099959807642229.503817849012019401335326013846687285151335352272727523 │ -34944452809785978175157829109276115789694605299387223845886143311647505037529 │  544473976 │ 111220388331710079615337037674887514156741572807049614590010583571763691328563 │       22016.22623506465 │ {'2052-01-31 20:25:33':4306400876908509081044405485378623663,'1993-04-16 15:58:49':164367354809499452887861212674772770279,'2101-08-19 03:07:18':-60676948945963385477105077735447194811,'2039-12-22 22:31:39':-59227773536703059515222628111999932330} │ a7b2:8f58:4d07:6707:4189:80cf:92f5:902d │ 1950-07-14 │
+│ -159940486888657488786004075627859832441 │  629206527868163085099.8195700356331771569105231840157308480121506729741348442 │ -53203761250367440823323469081755775164053964440214841464405368882783634063735 │ 2187136525 │  94881662451116595672491944222189810087991610568040618106057495823910493624275 │ 1.3095786748458954e-104 │ {}                                                                                                                                                                                                                                                      │ a051:e3da:2e0a:c69:7835:aed6:e8b:3817   │ 1943-03-25 │
+│   -5239084224358020595591895205940528518 │ -529937657954363597180.1709207212648004850138812370209091520162977548101577846 │  47490343304582536176125359129223180987770215457970451211489086575421345731671 │ 1637451978 │ 101899445785010192893461828129714741298630410942962837910400961787305271699002 │  2.4344456058391296e223 │ {'2013-12-22 17:42:43':80271108282641375975566414544777036006,'2041-03-08 10:28:17':169706054082247533128707458270535852845,'1986-08-31 23:07:38':-54371542820364299444195390357730624136,'2094-04-23 21:26:50':7944954483303909347454597499139023465}  │ 1293:a726:e899:9bfc:8c6f:2aa1:22c9:b635 │ 1924-11-20 │
+└──────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────┴─────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┴────────────┘
+```
+
+**Note:** `generateRandom(generateRandomStructure(), [random seed], max_string_length, max_array_length)` with large enough `max_array_length` can generate really huge output due to possible big nesting depth (up to 16) of complex types (`Array`, `Tuple`, `Map`, `Nested`).
+
 ## Related content
 - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse)
--- a/docs/ru/sql-reference/statements/system.md
+++ b/docs/ru/sql-reference/statements/system.md
@ -39,7 +39,7 @@ SELECT name, status FROM system.dictionaries;
 **Синтаксис**

 ```sql
-SYSTEM RELOAD MODELS
+SYSTEM RELOAD MODELS [ON CLUSTER cluster_name]
 ```

 ## RELOAD MODEL {#query_language-system-reload-model}
@ -49,7 +49,7 @@ SYSTEM RELOAD MODELS
 **Синтаксис**

 ```sql
-SYSTEM RELOAD MODEL <model_path>
+SYSTEM RELOAD MODEL [ON CLUSTER cluster_name] <model_path>
 ```

 ## RELOAD FUNCTIONS {#query_language-system-reload-functions}
@ -59,8 +59,8 @@ SYSTEM RELOAD MODEL <model_path>
 **Синтаксис**

 ```sql
-RELOAD FUNCTIONS
-RELOAD FUNCTION function_name
+RELOAD FUNCTIONS [ON CLUSTER cluster_name]
+RELOAD FUNCTION function_name [ON CLUSTER cluster_name]
 ```

 ## DROP DNS CACHE {#query_language-system-drop-dns-cache}
@ -106,10 +106,18 @@ Cкомпилированные выражения используются ко
 Записывает буферы логов в системные таблицы (например system.query_log). Позволяет не ждать 7.5 секунд при отладке.
 Если буфер логов пустой, то этот запрос просто создаст системные таблицы.

+```sql
+SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
+```
+
 ## RELOAD CONFIG {#query_language-system-reload-config}

 Перечитывает конфигурацию настроек ClickHouse. Используется при хранении конфигурации в zookeeper.

+```sql
+SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
+```
+
 ## SHUTDOWN {#query_language-system-shutdown}

 Штатно завершает работу ClickHouse (аналог `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)
@ -127,7 +135,7 @@ ClickHouse может оперировать [распределёнными](..
 Отключает фоновую отправку при вставке данных в распределённые таблицы.

 ``` sql
-SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### FLUSH DISTRIBUTED {#query_language-system-flush-distributed}
@ -135,7 +143,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
 В синхронном режиме отправляет все данные на узлы кластера. Если какие-либо узлы недоступны, ClickHouse генерирует исключение и останавливает выполнение запроса. Такой запрос можно повторять до успешного завершения, что будет означать возвращение связанности с остальными узлами кластера.

 ``` sql
-SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
+SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends}
@ -143,7 +151,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 Включает фоновую отправку при вставке данных в распределенные таблицы.

 ``` sql
-SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ## Managing MergeTree Tables {#query-language-system-mergetree}
@ -155,7 +163,7 @@ ClickHouse может управлять фоновыми процессами
 Позволяет остановить фоновые мержи для таблиц семейства MergeTree:

 ``` sql
-SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
+SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

    :::note
@ -166,7 +174,7 @@ SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 Включает фоновые мержи для таблиц семейства MergeTree:

 ``` sql
-SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
+SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

 ### STOP TTL MERGES {#query_language-stop-ttl-merges}
@ -175,7 +183,7 @@ SYSTEM START MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name
 Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:

 ``` sql
-SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START TTL MERGES {#query_language-start-ttl-merges}
@ -184,7 +192,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
 Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:

 ``` sql
-SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### STOP MOVES {#query_language-stop-moves}
@ -193,7 +201,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
 Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:

 ``` sql
-SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START MOVES {#query_language-start-moves}
@ -202,7 +210,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
 Возвращает `Ok.` даже если указана несуществующая таблица или таблица имеет тип отличный от MergeTree. Возвращает ошибку если указана не существующая база данных:

 ``` sql
-SYSTEM START MOVES [[db.]merge_tree_family_table_name]
+SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -223,7 +231,7 @@ ClickHouse может управлять фоновыми процессами
 Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.

 ``` sql
-SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START FETCHES {#query_language-system-start-fetches}
@ -232,7 +240,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
 Всегда возвращает `Ok.` вне зависимости от типа таблицы и даже если таблица или база данных не существет.

 ``` sql
-SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -240,7 +248,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
 Позволяет остановить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:

 ``` sql
-SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -248,7 +256,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 Позволяет запустить фоновые процессы отсылки новых вставленных кусков данных другим репликам в кластере для таблиц семейства `ReplicatedMergeTree`:

 ``` sql
-SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
@ -256,7 +264,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 Останавливает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:

 ``` sql
-SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
@ -264,7 +272,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
 Запускает фоновые процессы разбора заданий из очереди репликации которая хранится в Zookeeper для таблиц семейства `ReplicatedMergeTree`. Возможные типы заданий - merges, fetches, mutation, DDL запросы с ON CLUSTER:

 ``` sql
-SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### SYNC REPLICA {#query_language-system-sync-replica}
@ -287,7 +295,7 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHT
 Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при `ATTACH TABLE`. Некоторое время таблица будет недоступна для любых операций.

 ``` sql
-SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
+SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
 ```

 ### RESTORE REPLICA {#query_language-system-restore-replica}
--- a/docs/zh/sql-reference/statements/system.md
+++ b/docs/zh/sql-reference/statements/system.md
@ -71,10 +71,18 @@ SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';

 将日志信息缓冲数据刷入系统表（例如system.query_log）。调试时允许等待不超过7.5秒。当信息队列为空时，会创建系统表。

+```sql
+SYSTEM FLUSH LOGS [ON CLUSTER cluster_name]
+```
+
 ## RELOAD CONFIG {#query_language-system-reload-config}

 重新加载ClickHouse的配置。用于当配置信息存放在ZooKeeper时。

+```sql
+SYSTEM RELOAD CONFIG [ON CLUSTER cluster_name]
+```
+
 ## SHUTDOWN {#query_language-system-shutdown}

 关闭ClickHouse服务（类似于 `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`）
@ -93,7 +101,7 @@ ClickHouse可以管理 [distribute](../../engines/table-engines/special/distribu
 当向分布式表插入数据时，禁用后台的分布式数据分发。

 ``` sql
-SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### FLUSH DISTRIBUTED {#query_language-system-flush-distributed}
@ -101,7 +109,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
 强制让ClickHouse同步向集群节点同步发送数据。如果有节点失效，ClickHouse抛出异常并停止插入操作。当所有节点都恢复上线时，你可以重试之前的操作直到成功执行。

 ``` sql
-SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
+SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends}
@ -109,7 +117,7 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
 当向分布式表插入数据时，允许后台的分布式数据分发。

 ``` sql
-SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
+SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> [ON CLUSTER cluster_name]
 ```

 ## Managing MergeTree Tables {#query-language-system-mergetree}
@ -121,7 +129,7 @@ ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/
 为MergeTree系列引擎表停止后台合并操作。

 ``` sql
-SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
+SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

 :::note
@ -133,7 +141,7 @@ SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
 为MergeTree系列引擎表启动后台合并操作。

 ``` sql
-SYSTEM START MERGES [[db.]merge_tree_family_table_name]
+SYSTEM START MERGES [ON CLUSTER cluster_name] [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
 ```

 ### STOP TTL MERGES {#query_language-stop-ttl-merges}
@ -142,7 +150,7 @@ SYSTEM START MERGES [[db.]merge_tree_family_table_name]
 不管表存在与否，都返回 `OK.`。当数据库不存在时返回错误。

 ``` sql
-SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM STOP TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START TTL MERGES {#query_language-start-ttl-merges}
@ -151,7 +159,7 @@ SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]


 ``` sql
-SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
+SYSTEM START TTL MERGES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### STOP MOVES {#query_language-stop-moves}
@ -160,7 +168,7 @@ SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]


 ``` sql
-SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+SYSTEM STOP MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### START MOVES {#query_language-start-moves}
@ -169,7 +177,7 @@ SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]


 ``` sql
-SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
+SYSTEM START MOVES [ON CLUSTER cluster_name] [[db.]merge_tree_family_table_name]
 ```

 ### SYSTEM UNFREEZE {#query_language-system-unfreeze}
@ -190,7 +198,7 @@ SYSTEM UNFREEZE WITH NAME <backup_name>
 不管表引擎类型如何或表/数据库是否存，都返回 `OK.`。

 ``` sql
-SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START FETCHES {#query_language-system-start-fetches}
@ -199,7 +207,7 @@ SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
 不管表引擎类型如何或表/数据库是否存，都返回 `OK.`。

 ``` sql
-SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START FETCHES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -207,7 +215,7 @@ SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
 停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。

 ``` sql
-SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
@ -215,7 +223,7 @@ SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。

 ``` sql
-SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATED SENDS [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
@ -224,7 +232,7 @@ SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
 停止从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含：merges, fetches, mutation，带有 `ON CLUSTER`的ddl语句

 ``` sql
-SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM STOP REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
@ -232,7 +240,7 @@ SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
 启动从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含：merges, fetches, mutation，带有 `ON CLUSTER`的ddl语句

 ``` sql
-SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
+SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name]
 ```

 ### SYNC REPLICA {#query_language-system-sync-replica}
@ -250,7 +258,7 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHT


 ``` sql
-SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
+SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
 ```

 ### RESTART REPLICAS {#query_language-system-restart-replicas}
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -55,6 +55,8 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_

 option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL})

+option (ENABLE_CLICKHOUSE_KEEPER_CLIENT "ClickHouse Keeper Client" ${ENABLE_CLICKHOUSE_ALL})
+
 option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL})

 option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL})
@ -169,6 +171,13 @@ else()
    message(STATUS "ClickHouse keeper-converter mode: OFF")
 endif()

+if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
+    message(STATUS "ClickHouse keeper-client mode: ON")
+else()
+    message(STATUS "ClickHouse keeper-client mode: OFF")
+endif()
+
+
 if (ENABLE_CLICKHOUSE_DISKS)
    message(STATUS "Clickhouse disks mode: ON")
 else()
@ -237,6 +246,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
    add_subdirectory (keeper-converter)
 endif()

+if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
+    add_subdirectory (keeper-client)
+endif()
+
 if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
    add_subdirectory (odbc-bridge)
 endif ()
@ -301,6 +314,9 @@ endif()
 if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
    clickhouse_target_link_split_lib(clickhouse keeper-converter)
 endif()
+if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
+    clickhouse_target_link_split_lib(clickhouse keeper-client)
+endif()
 if (ENABLE_CLICKHOUSE_INSTALL)
    clickhouse_target_link_split_lib(clickhouse install)
 endif ()
@ -392,6 +408,11 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER)
    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
    list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter)
 endif ()
+if (ENABLE_CLICKHOUSE_KEEPER_CLIENT)
+    add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse)
+    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client)
+endif ()
 if (ENABLE_CLICKHOUSE_DISKS)
    add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse)
    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
--- a/programs/config_tools.h.in
+++ b/programs/config_tools.h.in
@ -17,6 +17,7 @@
 #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE
 #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE
 #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER
+#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CLIENT
 #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER
 #cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
 #cmakedefine01 ENABLE_CLICKHOUSE_SU
--- a/programs/keeper-client/CMakeLists.txt
+++ b/programs/keeper-client/CMakeLists.txt
@ -0,0 +1,9 @@
+set (CLICKHOUSE_KEEPER_CLIENT_SOURCES KeeperClient.cpp Parser.cpp Commands.cpp)
+
+set (CLICKHOUSE_KEEPER_CLIENT_LINK
+    PRIVATE
+    boost::program_options
+    dbms
+)
+
+clickhouse_program_add(keeper-client)
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@ -0,0 +1,196 @@
+
+#include "Commands.h"
+#include "KeeperClient.h"
+
+
+namespace DB
+{
+
+bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return true;
+
+    node->args.push_back(std::move(arg));
+    return true;
+}
+
+void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    String path;
+    if (!query->args.empty())
+        path = client->getAbsolutePath(query->args[0].safeGet<String>());
+    else
+        path = client->cwd;
+
+    for (const auto & child : client->zookeeper->getChildren(path))
+        std::cout << child << " ";
+    std::cout << "\n";
+}
+
+bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return true;
+
+    node->args.push_back(std::move(arg));
+    return true;
+}
+
+void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    if (query->args.empty())
+        return;
+
+    auto new_path = client->getAbsolutePath(query->args[0].safeGet<String>());
+    if (!client->zookeeper->exists(new_path))
+        std::cerr << "Path " << new_path << " does not exists\n";
+    else
+        client->cwd = new_path;
+}
+
+bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    if (!parseKeeperArg(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    ASTPtr version;
+    if (ParserNumber{}.parse(pos, version, expected))
+        node->args.push_back(version->as<ASTLiteral &>().value);
+
+    return true;
+}
+
+void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    if (query->args.size() == 2)
+        client->zookeeper->set(client->getAbsolutePath(query->args[0].safeGet<String>()), query->args[1].safeGet<String>());
+    else
+        client->zookeeper->set(
+            client->getAbsolutePath(query->args[0].safeGet<String>()),
+            query->args[1].safeGet<String>(),
+            static_cast<Int32>(query->args[2].safeGet<Int64>()));
+}
+
+bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    if (!parseKeeperArg(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    int mode = zkutil::CreateMode::Persistent;
+
+    if (ParserKeyword{"PERSISTENT"}.ignore(pos, expected))
+        mode = zkutil::CreateMode::Persistent;
+    else if (ParserKeyword{"EPHEMERAL"}.ignore(pos, expected))
+        mode = zkutil::CreateMode::Ephemeral;
+    else if (ParserKeyword{"EPHEMERAL SEQUENTIAL"}.ignore(pos, expected))
+        mode = zkutil::CreateMode::EphemeralSequential;
+    else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected))
+        mode = zkutil::CreateMode::PersistentSequential;
+
+    node->args.push_back(mode);
+
+    return true;
+}
+
+void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    client->zookeeper->create(
+        client->getAbsolutePath(query->args[0].safeGet<String>()),
+        query->args[1].safeGet<String>(),
+        static_cast<int>(query->args[2].safeGet<Int64>()));
+}
+
+bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    return true;
+}
+
+void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
+}
+
+bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    return true;
+}
+
+void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
+}
+
+bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    String arg;
+    if (!parseKeeperPath(pos, expected, arg))
+        return false;
+    node->args.push_back(std::move(arg));
+
+    return true;
+}
+
+void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    String path = client->getAbsolutePath(query->args[0].safeGet<String>());
+    client->askConfirmation("You are going to recursively delete path " + path,
+                            [client, path]{ client->zookeeper->removeRecursive(path); });
+}
+
+bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
+{
+    return true;
+}
+
+void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const
+{
+    for (const auto & pair : KeeperClient::commands)
+        std::cout << pair.second->getHelpMessage() << "\n";
+}
+
+bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
+{
+    expected.add(pos, "four-letter-word command");
+    if (pos->type != TokenType::BareWord)
+        return false;
+
+    String cmd(pos->begin, pos->end);
+    if (cmd.size() != 4)
+        return false;
+
+    ++pos;
+    node->args.push_back(std::move(cmd));
+    return true;
+}
+
+void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
+{
+    std::cout << client->executeFourLetterCommand(query->args[0].safeGet<String>()) << "\n";
+}
+
+}
--- a/programs/keeper-client/Commands.h
+++ b/programs/keeper-client/Commands.h
@ -0,0 +1,131 @@
+#pragma once
+
+#include "Parser.h"
+
+namespace DB
+{
+
+class KeeperClient;
+
+class IKeeperClientCommand
+{
+public:
+    static const String name;
+
+    virtual bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const = 0;
+
+    virtual void execute(const ASTKeeperQuery * query, KeeperClient * client) const = 0;
+
+    virtual String getHelpMessage() const = 0;
+
+    virtual String getName() const = 0;
+
+    virtual ~IKeeperClientCommand() = default;
+};
+
+using Command = std::shared_ptr<IKeeperClientCommand>;
+
+
+class LSCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "ls"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; }
+};
+
+class CDCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "cd"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; }
+};
+
+class SetCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "set"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override
+    {
+        return "set <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
+    }
+};
+
+class CreateCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "create"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "create <path> <value> -- Creates new node"; }
+};
+
+class GetCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "get"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "get <path> -- Returns the node's value"; }
+};
+
+class RMCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "rm"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "remove <path> -- Remove the node"; }
+};
+
+class RMRCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "rmr"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "rmr <path> -- Recursively deletes path. Confirmation required"; }
+};
+
+class HelpCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "help"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "help -- Prints this message"; }
+};
+
+class FourLetterWordCommand : public IKeeperClientCommand
+{
+    String getName() const override { return "flwc"; }
+
+    bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
+
+    void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
+
+    String getHelpMessage() const override { return "flwc <command> -- Executes four-letter-word command"; }
+};
+
+}
--- a/programs/keeper-client/KeeperClient.cpp
+++ b/programs/keeper-client/KeeperClient.cpp
@ -0,0 +1,343 @@
+#include "KeeperClient.h"
+#include "Commands.h"
+#include <Client/ReplxxLineReader.h>
+#include <Client/ClientBase.h>
+#include <Common/EventNotifier.h>
+#include <Common/filesystemHelpers.h>
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Parsers/parseQuery.h>
+#include <Poco/Util/HelpFormatter.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+String KeeperClient::executeFourLetterCommand(const String & command)
+{
+    /// We need to create a new socket every time because ZooKeeper forcefully shuts down the connection after a four-letter-word command.
+    Poco::Net::StreamSocket socket;
+    socket.connect(Poco::Net::SocketAddress{zk_args.hosts[0]}, zk_args.connection_timeout_ms * 1000);
+
+    socket.setReceiveTimeout(zk_args.operation_timeout_ms * 1000);
+    socket.setSendTimeout(zk_args.operation_timeout_ms * 1000);
+    socket.setNoDelay(true);
+
+    ReadBufferFromPocoSocket in(socket);
+    WriteBufferFromPocoSocket out(socket);
+
+    out.write(command.data(), command.size());
+    out.next();
+
+    String result;
+    readStringUntilEOF(result, in);
+    in.next();
+    return result;
+}
+
+std::vector<String> KeeperClient::getCompletions(const String & prefix) const
+{
+    Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false);
+    IParser::Pos pos(tokens, 0);
+
+    if (pos->type != TokenType::BareWord)
+        return registered_commands_and_four_letter_words;
+
+    ++pos;
+    if (pos->isEnd())
+        return registered_commands_and_four_letter_words;
+
+    ParserToken{TokenType::Whitespace}.ignore(pos);
+
+    std::vector<String> result;
+    String string_path;
+    Expected expected;
+    if (!parseKeeperPath(pos, expected, string_path))
+        string_path = cwd;
+
+    if (!pos->isEnd())
+        return result;
+
+    fs::path path = string_path;
+    String parent_path;
+    if (string_path.ends_with("/"))
+        parent_path = getAbsolutePath(string_path);
+    else
+        parent_path = getAbsolutePath(path.parent_path());
+
+    try
+    {
+        for (const auto & child : zookeeper->getChildren(parent_path))
+            result.push_back(child);
+    }
+    catch (Coordination::Exception &) {}
+
+    std::sort(result.begin(), result.end());
+
+    return result;
+}
+
+void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
+{
+    std::cout << prompt << " Continue?\n";
+    need_confirmation = true;
+    confirmation_callback = callback;
+}
+
+fs::path KeeperClient::getAbsolutePath(const String & relative) const
+{
+    String result;
+    if (relative.starts_with('/'))
+        result = fs::weakly_canonical(relative);
+    else
+        result = fs::weakly_canonical(cwd / relative);
+
+    if (result.ends_with('/') && result.size() > 1)
+        result.pop_back();
+
+    return result;
+}
+
+void KeeperClient::loadCommands(std::vector<Command> && new_commands)
+{
+    for (const auto & command : new_commands)
+    {
+        String name = command->getName();
+        commands.insert({name, command});
+        registered_commands_and_four_letter_words.push_back(std::move(name));
+    }
+
+    for (const auto & command : four_letter_word_commands)
+        registered_commands_and_four_letter_words.push_back(command);
+
+    std::sort(registered_commands_and_four_letter_words.begin(), registered_commands_and_four_letter_words.end());
+}
+
+void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
+{
+    Poco::Util::Application::defineOptions(options);
+
+    options.addOption(
+        Poco::Util::Option("help", "", "show help and exit")
+            .binding("help"));
+
+    options.addOption(
+        Poco::Util::Option("host", "h", "server hostname. default `localhost`")
+            .argument("host")
+            .binding("host"));
+
+    options.addOption(
+        Poco::Util::Option("port", "p", "server port. default `2181`")
+            .argument("port")
+            .binding("port"));
+
+    options.addOption(
+        Poco::Util::Option("query", "q", "will execute given query, then exit.")
+            .argument("query")
+            .binding("query"));
+
+    options.addOption(
+        Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.")
+            .argument("connection-timeout")
+            .binding("connection-timeout"));
+
+    options.addOption(
+        Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.")
+            .argument("session-timeout")
+            .binding("session-timeout"));
+
+    options.addOption(
+        Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.")
+            .argument("operation-timeout")
+            .binding("operation-timeout"));
+
+    options.addOption(
+        Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`")
+            .argument("history-file")
+            .binding("history-file"));
+
+    options.addOption(
+        Poco::Util::Option("log-level", "", "set log level")
+            .argument("log-level")
+            .binding("log-level"));
+}
+
+void KeeperClient::initialize(Poco::Util::Application & /* self */)
+{
+    suggest.setCompletionsCallback(
+        [&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); });
+
+    loadCommands({
+        std::make_shared<LSCommand>(),
+        std::make_shared<CDCommand>(),
+        std::make_shared<SetCommand>(),
+        std::make_shared<CreateCommand>(),
+        std::make_shared<GetCommand>(),
+        std::make_shared<RMCommand>(),
+        std::make_shared<RMRCommand>(),
+        std::make_shared<HelpCommand>(),
+        std::make_shared<FourLetterWordCommand>(),
+    });
+
+    String home_path;
+    const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe)
+    if (home_path_cstr)
+        home_path = home_path_cstr;
+
+    if (config().has("history-file"))
+        history_file = config().getString("history-file");
+    else
+        history_file = home_path + "/.keeper-client-history";
+
+    if (!history_file.empty() && !fs::exists(history_file))
+    {
+        try
+        {
+            FS::createFile(history_file);
+        }
+        catch (const ErrnoException & e)
+        {
+            if (e.getErrno() != EEXIST)
+                throw;
+        }
+    }
+
+    Poco::Logger::root().setLevel(config().getString("log-level", "error"));
+
+    EventNotifier::init();
+}
+
+void KeeperClient::executeQuery(const String & query)
+{
+    std::vector<String> queries;
+    boost::algorithm::split(queries, query, boost::is_any_of(";"));
+
+    for (const auto & query_text : queries)
+    {
+        if (!query_text.empty())
+            processQueryText(query_text);
+    }
+}
+
+bool KeeperClient::processQueryText(const String & text)
+{
+    if (exit_strings.find(text) != exit_strings.end())
+        return false;
+
+    try
+    {
+        if (need_confirmation)
+        {
+            need_confirmation = false;
+            if (text.size() == 1 && (text == "y" || text == "Y"))
+                confirmation_callback();
+            return true;
+        }
+
+        KeeperParser parser;
+        String message;
+        const char * begin = text.data();
+        ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
+
+        if (!res)
+        {
+            std::cerr << message << "\n";
+            return true;
+        }
+
+        auto * query = res->as<ASTKeeperQuery>();
+
+        auto command = KeeperClient::commands.find(query->command);
+        command->second->execute(query, this);
+    }
+    catch (Coordination::Exception & err)
+    {
+        std::cerr << err.message() << "\n";
+    }
+    return true;
+}
+
+void KeeperClient::runInteractive()
+{
+
+    LineReader::Patterns query_extenders = {"\\"};
+    LineReader::Patterns query_delimiters = {};
+
+    ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {});
+    lr.enableBracketedPaste();
+
+    while (true)
+    {
+        String prompt;
+        if (need_confirmation)
+            prompt = "[y/n] ";
+        else
+            prompt = cwd.string() + " :) ";
+
+        auto input = lr.readLine(prompt, ":-] ");
+        if (input.empty())
+            break;
+
+        if (!processQueryText(input))
+            break;
+    }
+}
+
+int KeeperClient::main(const std::vector<String> & /* args */)
+{
+    if (config().hasOption("help"))
+    {
+        Poco::Util::HelpFormatter help_formatter(KeeperClient::options());
+        auto header_str = fmt::format("{} [OPTION]\n", commandName());
+        help_formatter.setHeader(header_str);
+        help_formatter.format(std::cout);
+        return 0;
+    }
+
+    auto host = config().getString("host", "localhost");
+    auto port = config().getString("port", "2181");
+    zk_args.hosts = {host + ":" + port};
+    zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000;
+    zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000;
+    zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
+    zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
+
+    if (config().has("query"))
+        executeQuery(config().getString("query"));
+    else
+        runInteractive();
+
+    return 0;
+}
+
+}
+
+
+int mainEntryClickHouseKeeperClient(int argc, char ** argv)
+{
+    try
+    {
+        DB::KeeperClient client;
+        client.init(argc, argv);
+        return client.run();
+    }
+    catch (const DB::Exception & e)
+    {
+        std::cerr << DB::getExceptionMessage(e, false) << std::endl;
+        return 1;
+    }
+    catch (const boost::program_options::error & e)
+    {
+        std::cerr << "Bad arguments: " << e.what() << std::endl;
+        return DB::ErrorCodes::BAD_ARGUMENTS;
+    }
+    catch (...)
+    {
+        std::cerr << DB::getCurrentExceptionMessage(true) << std::endl;
+        return 1;
+    }
+}
--- a/programs/keeper-client/KeeperClient.h
+++ b/programs/keeper-client/KeeperClient.h
@ -0,0 +1,69 @@
+#pragma once
+
+#include "Parser.h"
+#include "Commands.h"
+#include <Common/ZooKeeper/ZooKeeper.h>
+#include <Client/LineReader.h>
+#include <IO/ReadBufferFromPocoSocket.h>
+#include <IO/WriteBufferFromPocoSocket.h>
+#include <Parsers/ASTLiteral.h>
+#include <Poco/Net/StreamSocket.h>
+#include <Poco/Util/Application.h>
+#include <filesystem>
+
+
+namespace fs = std::filesystem;
+
+namespace DB
+{
+
+static const NameSet four_letter_word_commands
+    {
+        "ruok", "mntr", "srvr", "stat", "srst", "conf",
+        "cons", "crst", "envi", "dirs", "isro", "wchs",
+        "wchc", "wchp", "dump", "csnp", "lgif", "rqld",
+    };
+
+class KeeperClient: public Poco::Util::Application
+{
+public:
+    KeeperClient() = default;
+
+    void initialize(Poco::Util::Application & self) override;
+
+    int main(const std::vector<String> & args) override;
+
+    void defineOptions(Poco::Util::OptionSet & options) override;
+
+    fs::path getAbsolutePath(const String & relative) const;
+
+    void askConfirmation(const String & prompt, std::function<void()> && callback);
+
+    String executeFourLetterCommand(const String & command);
+
+    zkutil::ZooKeeperPtr zookeeper;
+    std::filesystem::path cwd = "/";
+    std::function<void()> confirmation_callback;
+
+    inline static std::map<String, Command> commands;
+
+protected:
+    void runInteractive();
+    bool processQueryText(const String & text);
+    void executeQuery(const String & query);
+
+    void loadCommands(std::vector<Command> && new_commands);
+
+    std::vector<String> getCompletions(const String & prefix) const;
+
+    String history_file;
+    LineReader::Suggest suggest;
+
+    zkutil::ZooKeeperArgs zk_args;
+
+    bool need_confirmation = false;
+
+    std::vector<String> registered_commands_and_four_letter_words;
+};
+
+}
--- a/programs/keeper-client/Parser.cpp
+++ b/programs/keeper-client/Parser.cpp
@ -0,0 +1,94 @@
+#include "Parser.h"
+#include "KeeperClient.h"
+
+
+namespace DB
+{
+
+bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
+{
+    expected.add(pos, getTokenName(TokenType::BareWord));
+
+    if (pos->type == TokenType::BareWord)
+    {
+        result = String(pos->begin, pos->end);
+        ++pos;
+        ParserToken{TokenType::Whitespace}.ignore(pos);
+        return true;
+    }
+
+    bool status = parseIdentifierOrStringLiteral(pos, expected, result);
+    ParserToken{TokenType::Whitespace}.ignore(pos);
+    return status;
+}
+
+bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
+{
+    expected.add(pos, "path");
+
+    if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
+        return parseIdentifierOrStringLiteral(pos, expected, path);
+
+    String result;
+    while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot)
+    {
+        result.append(pos->begin, pos->end);
+        ++pos;
+    }
+    ParserToken{TokenType::Whitespace}.ignore(pos);
+
+    if (result.empty())
+        return false;
+
+    path = result;
+    return true;
+}
+
+bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    auto query = std::make_shared<ASTKeeperQuery>();
+
+    for (const auto & pair : KeeperClient::commands)
+        expected.add(pos, pair.first.data());
+
+    for (const auto & flwc : four_letter_word_commands)
+        expected.add(pos, flwc.data());
+
+    if (pos->type != TokenType::BareWord)
+        return false;
+
+    String command_name(pos->begin, pos->end);
+    Command command;
+
+    auto iter = KeeperClient::commands.find(command_name);
+    if (iter == KeeperClient::commands.end())
+    {
+        if (command_name.size() == 4)
+        {
+            /// Treat it like four-letter command
+            /// Since keeper server can potentially have different version we don't want to match this command with embedded list
+            command = std::make_shared<FourLetterWordCommand>();
+            command_name = command->getName();
+            /// We also don't move the position, so the command will be parsed as an argument
+        }
+        else
+            return false;
+    }
+    else
+    {
+        command = iter->second;
+        ++pos;
+        ParserToken{TokenType::Whitespace}.ignore(pos);
+    }
+
+    query->command = command_name;
+    if (!command->parse(pos, query, expected))
+        return false;
+
+    ParserToken{TokenType::Whitespace}.ignore(pos);
+
+    node = query;
+    return true;
+}
+
+}
--- a/programs/keeper-client/Parser.h
+++ b/programs/keeper-client/Parser.h
@ -0,0 +1,36 @@
+#pragma once
+
+#include <Parsers/CommonParsers.h>
+#include <Parsers/ExpressionElementParsers.h>
+#include <Parsers/IAST.h>
+#include <Parsers/IParserBase.h>
+#include <Parsers/parseIdentifierOrStringLiteral.h>
+
+
+namespace DB
+{
+
+bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result);
+
+bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path);
+
+
+class ASTKeeperQuery : public IAST
+{
+public:
+    String getID(char) const override { return "KeeperQuery"; }
+    ASTPtr clone() const override { return std::make_shared<ASTKeeperQuery>(*this); }
+
+    String command;
+    std::vector<Field> args;
+};
+
+class KeeperParser : public IParserBase
+{
+protected:
+    const char * getName() const override { return "Keeper client query"; }
+
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
+}
--- a/programs/main.cpp
+++ b/programs/main.cpp
@ -62,6 +62,9 @@ int mainEntryClickHouseKeeper(int argc, char ** argv);
 #if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
 int mainEntryClickHouseKeeperConverter(int argc, char ** argv);
 #endif
+#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
+int mainEntryClickHouseKeeperClient(int argc, char ** argv);
+#endif
 #if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER
 int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv);
 #endif
@ -133,6 +136,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
 #if ENABLE_CLICKHOUSE_KEEPER_CONVERTER
    {"keeper-converter", mainEntryClickHouseKeeperConverter},
 #endif
+#if ENABLE_CLICKHOUSE_KEEPER_CLIENT
+    {"keeper-client", mainEntryClickHouseKeeperClient},
+#endif
 #if ENABLE_CLICKHOUSE_INSTALL
    {"install", mainEntryClickHouseInstall},
    {"start", mainEntryClickHouseStart},
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -90,14 +90,6 @@ namespace CurrentMetrics
 namespace DB
 {

-static const NameSet exit_strings
-{
-    "exit", "quit", "logout", "учше", "йгше", "дщпщге",
-    "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
-    "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
-};
-
-
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -24,6 +24,14 @@ namespace po = boost::program_options;
 namespace DB
 {

+
+static const NameSet exit_strings
+{
+    "exit", "quit", "logout", "учше", "йгше", "дщпщге",
+    "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж",
+    "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй"
+};
+
 namespace ErrorCodes
 {
    extern const int NOT_IMPLEMENTED;
--- a/src/Client/LineReader.cpp
+++ b/src/Client/LineReader.cpp
@ -81,15 +81,33 @@ replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String &

    std::lock_guard lock(mutex);

+    Words to_search;
+    bool no_case = false;
    /// Only perform case sensitive completion when the prefix string contains any uppercase characters
    if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast<wint_t>(x)); }))
+    {
+        to_search = words_no_case;
+        no_case = true;
+    }
+    else
+        to_search = words;
+
+    if (custom_completions_callback)
+    {
+        auto new_words = custom_completions_callback(prefix, prefix_length);
+        assert(std::is_sorted(new_words.begin(), new_words.end()));
+        addNewWords(to_search, new_words, std::less<std::string>{});
+    }
+
+    if (no_case)
        range = std::equal_range(
-            words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
+            to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
            {
                return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0;
            });
    else
-        range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
+        range = std::equal_range(
+            to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched)
            {
                return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0;
            });
--- a/src/Client/LineReader.h
+++ b/src/Client/LineReader.h
@ -18,15 +18,20 @@ public:
    struct Suggest
    {
        using Words = std::vector<std::string>;
+        using Callback = std::function<Words(const String & prefix, size_t prefix_length)>;

        /// Get vector for the matched range of words if any.
        replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length);
        void addWords(Words && new_words);

+        void setCompletionsCallback(Callback && callback) { custom_completions_callback = callback; }
+
    private:
        Words words TSA_GUARDED_BY(mutex);
        Words words_no_case TSA_GUARDED_BY(mutex);

+        Callback custom_completions_callback = nullptr;
+
        std::mutex mutex;
    };

--- a/src/Common/AsynchronousMetrics.cpp
+++ b/src/Common/AsynchronousMetrics.cpp
@ -69,13 +69,23 @@ AsynchronousMetrics::AsynchronousMetrics(

    /// CGroups v2
    openFileIfExists("/sys/fs/cgroup/memory.max", cgroupmem_limit_in_bytes);
+    if (cgroupmem_limit_in_bytes)
+    {
        openFileIfExists("/sys/fs/cgroup/memory.current", cgroupmem_usage_in_bytes);
+    }
+    openFileIfExists("/sys/fs/cgroup/cpu.max", cgroupcpu_max);

    /// CGroups v1
    if (!cgroupmem_limit_in_bytes)
+    {
        openFileIfExists("/sys/fs/cgroup/memory/memory.limit_in_bytes", cgroupmem_limit_in_bytes);
-    if (!cgroupmem_usage_in_bytes)
        openFileIfExists("/sys/fs/cgroup/memory/memory.usage_in_bytes", cgroupmem_usage_in_bytes);
+    }
+    if (!cgroupcpu_max)
+    {
+        openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_period_us", cgroupcpu_cfs_period);
+        openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota);
+    }

    openSensors();
    openBlockDevices();
@ -926,6 +936,61 @@ void AsynchronousMetrics::update(TimePoint update_time)
            tryLogCurrentException(__PRETTY_FUNCTION__);
        }
    }
+
+    if (cgroupcpu_max)
+    {
+        try {
+            cgroupcpu_max->rewind();
+
+            uint64_t quota = 0;
+            uint64_t period = 0;
+
+            std::string line;
+            readText(line, *cgroupcpu_max);
+
+            auto space = line.find(' ');
+
+            if (line.rfind("max", space) == std::string::npos)
+            {
+                auto field1 = line.substr(0, space);
+                quota = std::stoull(field1);
+            }
+
+            if (space != std::string::npos)
+            {
+                auto field2 = line.substr(space + 1);
+                period = std::stoull(field2);
+            }
+
+            new_values["CGroupCpuCfsPeriod"] = { period, "The CFS period of CPU cgroup."};
+            new_values["CGroupCpuCfsQuota"] = { quota, "The CFS quota of CPU cgroup. If stated zero, the quota is max."};
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+    else if (cgroupcpu_cfs_quota && cgroupcpu_cfs_period)
+    {
+        try {
+            cgroupcpu_cfs_quota->rewind();
+            cgroupcpu_cfs_period->rewind();
+
+            uint64_t quota = 0;
+            uint64_t period = 0;
+
+            tryReadText(quota, *cgroupcpu_cfs_quota);
+            tryReadText(period, *cgroupcpu_cfs_period);
+
+            new_values["CGroupCpuCfsPeriod"] = { period, "The CFS period of CPU cgroup."};
+            new_values["CGroupCpuCfsQuota"] = { quota, "The CFS quota of CPU cgroup. If stated zero, the quota is max."};
+        }
+        catch (...)
+        {
+            tryLogCurrentException(__PRETTY_FUNCTION__);
+        }
+    }
+
    if (meminfo)
    {
        try
--- a/src/Common/AsynchronousMetrics.h
+++ b/src/Common/AsynchronousMetrics.h
@ -110,6 +110,9 @@ private:

    std::optional<ReadBufferFromFilePRead> cgroupmem_limit_in_bytes;
    std::optional<ReadBufferFromFilePRead> cgroupmem_usage_in_bytes;
+    std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_period;
+    std::optional<ReadBufferFromFilePRead> cgroupcpu_cfs_quota;
+    std::optional<ReadBufferFromFilePRead> cgroupcpu_max;

    std::vector<std::unique_ptr<ReadBufferFromFilePRead>> thermal;

--- a/src/Common/OpenTelemetryTraceContext.cpp
+++ b/src/Common/OpenTelemetryTraceContext.cpp
@ -124,7 +124,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind)
        this->start_time_us
            = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();

-        /// Add new initialization here
+        this->addAttribute("clickhouse.thread_id", getThreadId());
    }
    catch (...)
    {
--- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
+++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp
@ -202,6 +202,8 @@ void preparePostgreSQLArrayInfo(
        parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
    else if (which.isFloat64())
        parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
+    else if (which.isUUID())
+        parser = [](std::string & field) -> Field { return parse<UUID>(field); };
    else if (which.isString() || which.isFixedString())
        parser = [](std::string & field) -> Field { return field; };
    else if (which.isDate())
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -651,6 +651,8 @@ class IColumn;
    M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
    M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \
    M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \
+    M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
+    M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
    \
    M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
    M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
--- a/src/DataTypes/NestedUtils.cpp
+++ b/src/DataTypes/NestedUtils.cpp
@ -71,7 +71,7 @@ std::string extractTableName(const std::string & nested_name)
 }


-Block flatten(const Block & block)
+static Block flattenImpl(const Block & block, bool flatten_named_tuple)
 {
    Block res;

@ -114,7 +114,7 @@ Block flatten(const Block & block)
            else
                res.insert(elem);
        }
-        else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(elem.type.get()))
+        else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(elem.type.get()); type_tuple && flatten_named_tuple)
        {
            if (type_tuple->haveExplicitNames())
            {
@ -143,6 +143,17 @@ Block flatten(const Block & block)
    return res;
 }

+Block flatten(const Block & block)
+{
+    return flattenImpl(block, true);
+}
+
+
+Block flattenArrayOfTuples(const Block & block)
+{
+    return flattenImpl(block, false);
+}
+
 namespace
 {

--- a/src/DataTypes/NestedUtils.h
+++ b/src/DataTypes/NestedUtils.h
@ -23,6 +23,9 @@ namespace Nested
    /// 2) For an Array with named Tuple element column, a Array(Tuple(x ..., y ..., ...)), replace it with multiple Array Columns, a.x ..., a.y ..., ...
    Block flatten(const Block & block);

+    /// Same as flatten but only for Array with named Tuple element column.
+    Block flattenArrayOfTuples(const Block & block);
+
    /// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
    NamesAndTypesList collect(const NamesAndTypesList & names_and_types);

--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp
@ -42,6 +42,11 @@ namespace ErrorCodes
    extern const int ARGUMENT_OUT_OF_BOUND;
 }

+static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size)
+{
+    /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file.
+    return std::min<size_t>(std::max<size_t>(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size);
+}

 AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer(
    ImplPtr impl_,
@ -49,11 +54,11 @@ AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer(
    const ReadSettings & settings_,
    AsyncReadCountersPtr async_read_counters_,
    FilesystemReadPrefetchesLogPtr prefetches_log_)
-    : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0)
+    : ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0)
    , impl(std::move(impl_))
    , read_settings(settings_)
    , reader(reader_)
-    , prefetch_buffer(settings_.prefetch_buffer_size)
+    , prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize()))
    , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "")
    , current_reader_id(getRandomASCIIString(8))
    , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer"))
@ -103,12 +108,10 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority)
    if (!hasPendingDataToRead())
        return;

-    last_prefetch_info.submit_time = std::chrono::duration_cast<std::chrono::milliseconds>(
-        std::chrono::system_clock::now().time_since_epoch()).count();
+    last_prefetch_info.submit_time = std::chrono::system_clock::now();
    last_prefetch_info.priority = priority;

-    chassert(prefetch_buffer.size() == read_settings.prefetch_buffer_size
-             || prefetch_buffer.size() == read_settings.remote_fs_buffer_size);
+    chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize()));
    prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority);
    ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches);
 }
@ -187,7 +190,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl()
    {
        ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds);

-        chassert(memory.size() == read_settings.prefetch_buffer_size || memory.size() == read_settings.remote_fs_buffer_size);
+        chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize()));
        std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore);

        ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads);
--- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h
+++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h
@ -1,11 +1,12 @@
 #pragma once

-#include "config.h"
-#include <IO/ReadBufferFromFile.h>
+#include <chrono>
+#include <utility>
 #include <IO/AsynchronousReader.h>
+#include <IO/ReadBufferFromFile.h>
 #include <IO/ReadSettings.h>
 #include <Interpreters/FilesystemReadPrefetchesLog.h>
-#include <utility>
+#include "config.h"

 namespace Poco { class Logger; }

@ -71,7 +72,7 @@ private:

    struct LastPrefetchInfo
    {
-        UInt64 submit_time = 0;
+        std::chrono::system_clock::time_point submit_time;
        Priority priority;
    };
    LastPrefetchInfo last_prefetch_info;
--- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
+++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp
@ -124,7 +124,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
    else
    {
        CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular);
-        file_segments = cache->getOrSet(cache_key, offset, size, create_settings);
+        file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings);
    }

    /**
@ -529,6 +529,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
            ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds());
    });

+    OpenTelemetry::SpanHolder span{
+        fmt::format("CachedOnDiskReadBufferFromFile::predownload(key={}, size={})", file_segment.key().toString(), bytes_to_predownload)};
+
    if (bytes_to_predownload)
    {
        /// Consider this case. Some user needed segment [a, b] and downloaded it partially.
@ -795,6 +798,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
    if (file_segments->empty())
        return false;

+    const size_t original_buffer_size = internal_buffer.size();
+
    bool implementation_buffer_can_be_reused = false;
    SCOPE_EXIT({
        try
@ -820,6 +825,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
                }
            }

+            if (use_external_buffer && !internal_buffer.empty())
+                internal_buffer.resize(original_buffer_size);
+
            chassert(!file_segment.isDownloader());
        }
        catch (...)
@ -846,6 +854,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()

    chassert(!internal_buffer.empty());

+    /// We allocate buffers not less than 1M so that s3 requests will not be too small. But the same buffers (members of AsynchronousReadIndirectBufferFromRemoteFS)
+    /// are used for reading from files. Some of these readings are fairly small and their performance degrade when we use big buffers (up to ~20% for queries like Q23 from ClickBench).
+    if (use_external_buffer && read_type == ReadType::CACHED && settings.local_fs_buffer_size < internal_buffer.size())
+        internal_buffer.resize(settings.local_fs_buffer_size);
+
    // Pass a valid external buffer for implementation_buffer to read into.
    // We then take it back with another swap() after reading is done.
    // (If we get an exception in between, we'll be left with an invalid internal_buffer. That's ok, as long as
--- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
+++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp
@ -79,7 +79,7 @@ std::future<IAsynchronousReader::Result> ThreadPoolRemoteFSReader::submit(Reques
        auto async_read_counters = remote_fs_fd->getReadCounters();
        std::optional<AsyncReadIncrement> increment = async_read_counters ? std::optional<AsyncReadIncrement>(async_read_counters) : std::nullopt;

-        auto watch = std::make_unique<Stopwatch>(CLOCK_MONOTONIC);
+        auto watch = std::make_unique<Stopwatch>(CLOCK_REALTIME);
        Result result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore);
        watch->stop();

--- a/src/Functions/EntropyLearnedHash.cpp
+++ b/src/Functions/EntropyLearnedHash.cpp
@ -1,395 +0,0 @@
-#include <base/defines.h>
-#include <base/types.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnString.h>
-#include <Common/Exception.h>
-#include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionHelpers.h>
-#include <Functions/IFunction.h>
-#include <Interpreters/Context.h>
-
-/// Implementation of entropy-learned hashing: https://doi.org/10.1145/3514221.3517894
-/// If you change something in this file, please don't deviate too much from the pseudocode in the paper!
-
-/// TODOs for future work:
-/// - allow to specify an arbitrary hash function (currently always CityHash is used)
-/// - allow function chaining a la entropyLearnedHash(trainEntropyLearnedHash())
-/// - support more datatypes for data (besides String)
-
-
-namespace DB
-{
-
-namespace ErrorCodes
-{
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
-    extern const int SUPPORT_IS_DISABLED;
-}
-
-namespace
-{
-
-using PartialKeyPositions = std::vector<size_t>;
-using Entropies = std::vector<size_t>;
-
-void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key_positions, String & result)
-{
-    result.clear();
-    result.reserve(partial_key_positions.size());
-
-    for (auto partial_key_position : partial_key_positions)
-        if (partial_key_position < key.size())
-            result.push_back(key[partial_key_position]);
-}
-
-bool allPartialKeysAreUnique(const std::vector<std::string_view> & keys, const PartialKeyPositions & partial_key_positions)
-{
-    std::unordered_set<String> unique_partial_keys;
-    unique_partial_keys.reserve(keys.size());
-    String partial_key;
-
-    for (const auto & key : keys)
-    {
-        getPartialKey(key, partial_key_positions, partial_key);
-        if (!unique_partial_keys.insert(partial_key).second)
-            return false;
-    }
-
-    return true;
-}
-
-// NextByte returns position of byte which adds the most entropy and the new entropy
-std::pair<size_t, size_t> nextByte(const std::vector<std::string_view> & keys, size_t max_len, PartialKeyPositions & partial_key_positions)
-{
-    size_t min_collisions = std::numeric_limits<size_t>::max();
-    size_t best_position = 0;
-
-    std::unordered_map<String, size_t> count_table;
-    count_table.reserve(keys.size());
-
-    String partial_key;
-
-    for (size_t i = 0; i < max_len; ++i)
-    {
-        count_table.clear();
-
-        partial_key_positions.push_back(i);
-        size_t collisions = 0;
-        for (const auto & key : keys)
-        {
-            getPartialKey(key, partial_key_positions, partial_key);
-            collisions += count_table[partial_key]++;
-        }
-
-        if (collisions < min_collisions)
-        {
-            min_collisions = collisions;
-            best_position = i;
-        }
-        partial_key_positions.pop_back();
-    }
-
-    return {best_position, min_collisions};
-}
-
-std::pair<PartialKeyPositions, Entropies> chooseBytes(const std::vector<std::string_view> & train_data)
-{
-    if (train_data.size() <= 1)
-        return {};
-
-    PartialKeyPositions partial_key_positions;
-    Entropies entropies;
-
-    size_t max_len = 0; /// length of the longest key in training data
-    for (const auto & key : train_data)
-        max_len = std::max(max_len, key.size());
-
-    while (!allPartialKeysAreUnique(train_data, partial_key_positions))
-    {
-        auto [new_position, new_entropy] = nextByte(train_data, max_len, partial_key_positions);
-        if (!entropies.empty() && new_entropy == entropies.back())
-            break;
-        partial_key_positions.push_back(new_position);
-        entropies.push_back(new_entropy);
-    }
-    return {partial_key_positions, entropies};
-}
-
-/// Contains global state to convey information between SQL functions
-/// - prepareTrainEntropyLearnedHash(),
-/// - trainEntropyLearnedHash() and
-/// - entropyLearnedHash().
-///
-/// The reason this machinery is necessary is that ClickHouse processes data in chunks of unpredictable size, yet the training step of
-/// entropy-learned hashing needs to process *all* training data in one go. The downside is that the training step becomes quite expensive :-(
-class EntropyLearnedHashGlobalState
-{
-public:
-    static EntropyLearnedHashGlobalState & instance()
-    {
-        static EntropyLearnedHashGlobalState instance;
-        return instance;
-    }
-
-    /// Called by prepareTrainEntropyLearnedHash()
-    void cacheTrainingSample(const String & user_name, const String & id, IColumn::MutablePtr column)
-    {
-        std::lock_guard lock(mutex);
-        auto & ids_for_user = global_state[user_name];
-        auto & training_samples_for_id = ids_for_user[id].training_samples;
-        training_samples_for_id.push_back(std::move(column));
-    }
-
-    void train(const String & user_name, const String & id)
-    {
-        std::lock_guard lock(mutex);
-        auto & ids_for_user = global_state[user_name];
-        auto & training_samples = ids_for_user[id].training_samples;
-
-        if (training_samples.empty())
-            return;
-
-        auto & concatenated_training_sample = training_samples[0];
-        for (size_t i = 1; i < training_samples.size(); ++i)
-        {
-            auto & other_training_sample = training_samples[i];
-            concatenated_training_sample->insertRangeFrom(*other_training_sample, 0, other_training_sample->size());
-        }
-
-        const ColumnString * concatenated_training_sample_string = checkAndGetColumn<ColumnString>(*concatenated_training_sample);
-        if (!concatenated_training_sample_string)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column");
-
-        const size_t num_rows = concatenated_training_sample_string->size();
-        std::vector<std::string_view> training_data;
-        for (size_t i = 0; i < num_rows; ++i)
-        {
-            std::string_view string_view = concatenated_training_sample_string->getDataAt(i).toView();
-            training_data.emplace_back(string_view);
-        }
-
-        PartialKeyPositions partial_key_positions = chooseBytes(training_data).first;
-
-        ids_for_user[id].partial_key_positions = partial_key_positions;
-        training_samples.clear();
-    }
-
-    const PartialKeyPositions & getPartialKeyPositions(const String & user_name, const String & id) const
-    {
-        std::lock_guard lock(mutex);
-        auto it_user = global_state.find(user_name);
-        if (it_user == global_state.end())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
-        auto it_id = it_user->second.find(id);
-        if (it_id == it_user->second.end())
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Id {} not registered for user in entropy learned hashing", id);
-        return it_id->second.partial_key_positions;
-    }
-
-private:
-    mutable std::mutex mutex;
-
-    /// The state.
-    struct ColumnsAndPartialKeyPositions
-    {
-        /// Caches training data chunks. Filled by prepareTrainEntropyLearnedHash(), cleared by trainEntropyLearnedHash().
-        MutableColumns training_samples;
-        /// The result of the training phase. Filled by trainEntropyLearnedHash().
-        PartialKeyPositions partial_key_positions;
-    };
-
-    /// Maps a state id to the state.
-    using IdToColumnsAndPartialKeyPositions = std::map<String, ColumnsAndPartialKeyPositions>;
-
-    /// Maps the user name to a state id. As a result, the state id is unique at user scope.
-    using UserNameToId = std::map<String, IdToColumnsAndPartialKeyPositions>;
-
-    UserNameToId global_state TSA_GUARDED_BY(mutex);
-};
-
-}
-
-
-/// Copies all chunks of the training sample column into the global state under a given id.
-class FunctionPrepareTrainEntropyLearnedHash : public IFunction
-{
-public:
-    static constexpr auto name = "prepareTrainEntropyLearnedHash";
-    static FunctionPtr create(ContextPtr context)
-    {
-        if (!context->getSettings().allow_experimental_hash_functions)
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
-                "Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
-
-        return std::make_shared<FunctionPrepareTrainEntropyLearnedHash>(context->getUserName());
-    }
-    explicit FunctionPrepareTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 2; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"data", &isString<IDataType>, nullptr, "String"},
-            {"id", &isString<IDataType>, nullptr, "String"}
-        };
-
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        return std::make_shared<DataTypeUInt8>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
-    {
-        const IColumn * id_col = arguments[1].column.get();
-        const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
-        const String id = id_col_const->getValue<String>();
-
-        IColumn::Ptr data_col = arguments[0].column;
-        IColumn::MutablePtr data_col_mutable = IColumn::mutate(data_col);
-
-        auto & global_state = EntropyLearnedHashGlobalState::instance();
-        global_state.cacheTrainingSample(user_name, id, std::move(data_col_mutable));
-
-        const size_t num_rows = data_col->size();
-        return result_type->createColumnConst(num_rows, 0u); /// dummy output
-    }
-private:
-    const String user_name;
-};
-
-
-/// 1. Concatenates the training samples of a given id in the global state.
-/// 2. Computes the partial key positions from the concatenated training samples and stores that in the global state.
-/// 3. clear()-s the training samples in the global state.
-class FunctionTrainEntropyLearnedHash : public IFunction
-{
-public:
-    static constexpr auto name = "trainEntropyLearnedHash";
-    static FunctionPtr create(ContextPtr context)
-    {
-        if (!context->getSettings().allow_experimental_hash_functions)
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
-                "Entropy-learned hashing is experimental. Set `allow_experimental_hash_functions` setting to enable it");
-        return std::make_shared<FunctionTrainEntropyLearnedHash>(context->getUserName());
-    }
-    explicit FunctionTrainEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 1; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return false; }
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"id", &isString<IDataType>, nullptr, "String"}
-        };
-
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        return std::make_shared<DataTypeUInt8>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override
-    {
-        const IColumn * id_col = arguments[0].column.get();
-        const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
-        if (!id_col_const)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
-                arguments.begin()->column->getName(), getName());
-
-        auto & global_state = EntropyLearnedHashGlobalState::instance();
-
-        const String id = id_col_const->getValue<String>();
-        global_state.train(user_name, id);
-
-        const size_t num_rows = id_col->size();
-        return result_type->createColumnConst(num_rows, 0u); /// dummy output
-    }
-private:
-    const String user_name;
-};
-
-
-/// Hashes input strings using partial key positions stored in the global state.
-class FunctionEntropyLearnedHash : public IFunction
-{
-public:
-    static constexpr auto name = "entropyLearnedHash";
-    static FunctionPtr create(ContextPtr context)
-    {
-        if (!context->getSettings().allow_experimental_hash_functions)
-            throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
-                "Entropy-learned hashing experimental. Set `allow_experimental_hash_functions` setting to enable it");
-        return std::make_shared<FunctionEntropyLearnedHash>(context->getUserName());
-    }
-    explicit FunctionEntropyLearnedHash(const String & user_name_) : IFunction(), user_name(user_name_) {}
-
-    String getName() const override { return name; }
-    size_t getNumberOfArguments() const override { return 2; }
-    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
-    bool useDefaultImplementationForConstants() const override { return true; }
-    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
-
-
-    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
-    {
-        FunctionArgumentDescriptors args{
-            {"data", &isString<IDataType>, nullptr, "String"},
-            {"id", &isString<IDataType>, nullptr, "String"}
-        };
-
-        validateFunctionArgumentTypes(*this, arguments, args);
-
-        return std::make_shared<DataTypeUInt64>();
-    }
-
-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
-    {
-        const IColumn * id_col = arguments.back().column.get();
-        const ColumnConst * id_col_const = checkAndGetColumn<ColumnConst>(id_col);
-        const String id = id_col_const->getValue<String>();
-
-        const auto & global_state = EntropyLearnedHashGlobalState::instance();
-        const auto & partial_key_positions = global_state.getPartialKeyPositions(user_name, id);
-
-        const auto * data_col = arguments[0].column.get();
-        if (const auto * col_data_string = checkAndGetColumn<ColumnString>(data_col))
-        {
-            const size_t num_rows = col_data_string->size();
-            auto col_res = ColumnUInt64::create(num_rows);
-
-            auto & col_res_vec = col_res->getData();
-            String partial_key;
-            for (size_t i = 0; i < num_rows; ++i)
-            {
-                std::string_view string_ref = col_data_string->getDataAt(i).toView();
-                getPartialKey(string_ref, partial_key_positions, partial_key);
-                col_res_vec[i] = CityHash_v1_0_2::CityHash64(partial_key.data(), partial_key.size());
-            }
-
-            return col_res;
-        }
-        else
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
-                arguments.begin()->column->getName(), getName());
-    }
-private:
-    const String user_name;
-};
-
-REGISTER_FUNCTION(EntropyLearnedHash)
-{
-    factory.registerFunction<FunctionPrepareTrainEntropyLearnedHash>();
-    factory.registerFunction<FunctionTrainEntropyLearnedHash>();
-    factory.registerFunction<FunctionEntropyLearnedHash>();
-}
-
-}
--- a/src/Functions/FunctionGenerateRandomStructure.cpp
+++ b/src/Functions/FunctionGenerateRandomStructure.cpp
@ -0,0 +1,446 @@
+#include <Functions/FunctionGenerateRandomStructure.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Columns/ColumnString.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeFixedString.h>
+#include <Interpreters/Context.h>
+#include <Common/randomSeed.h>
+#include <Common/FunctionDocumentation.h>
+#include <IO/WriteHelpers.h>
+#include <IO/WriteBufferFromVector.h>
+
+#include <pcg_random.hpp>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+    const size_t MAX_NUMBER_OF_COLUMNS = 128;
+    const size_t MAX_TUPLE_ELEMENTS = 16;
+    const size_t MAX_DATETIME64_PRECISION = 9;
+    const size_t MAX_DECIMAL32_PRECISION = 9;
+    const size_t MAX_DECIMAL64_PRECISION = 18;
+    const size_t MAX_DECIMAL128_PRECISION = 38;
+    const size_t MAX_DECIMAL256_PRECISION = 76;
+    const size_t MAX_DEPTH = 16;
+
+    constexpr std::array<TypeIndex, 29> simple_types
+    {
+        TypeIndex::Int8,
+        TypeIndex::UInt8,
+        TypeIndex::Int16,
+        TypeIndex::UInt16,
+        TypeIndex::Int32,
+        TypeIndex::UInt32,
+        TypeIndex::Int64,
+        TypeIndex::UInt64,
+        TypeIndex::Int128,
+        TypeIndex::UInt128,
+        TypeIndex::Int256,
+        TypeIndex::UInt256,
+        TypeIndex::Float32,
+        TypeIndex::Float64,
+        TypeIndex::Decimal32,
+        TypeIndex::Decimal64,
+        TypeIndex::Decimal128,
+        TypeIndex::Decimal256,
+        TypeIndex::Date,
+        TypeIndex::Date32,
+        TypeIndex::DateTime,
+        TypeIndex::DateTime64,
+        TypeIndex::String,
+        TypeIndex::FixedString,
+        TypeIndex::Enum8,
+        TypeIndex::Enum16,
+        TypeIndex::IPv4,
+        TypeIndex::IPv6,
+        TypeIndex::UUID,
+    };
+
+    constexpr std::array<TypeIndex, 5> complex_types
+    {
+        TypeIndex::Nullable,
+        TypeIndex::LowCardinality,
+        TypeIndex::Array,
+        TypeIndex::Tuple,
+        TypeIndex::Map,
+    };
+
+    constexpr std::array<TypeIndex, 22> map_key_types
+    {
+        TypeIndex::Int8,
+        TypeIndex::UInt8,
+        TypeIndex::Int16,
+        TypeIndex::UInt16,
+        TypeIndex::Int32,
+        TypeIndex::UInt32,
+        TypeIndex::Int64,
+        TypeIndex::UInt64,
+        TypeIndex::Int128,
+        TypeIndex::UInt128,
+        TypeIndex::Int256,
+        TypeIndex::UInt256,
+        TypeIndex::Date,
+        TypeIndex::Date32,
+        TypeIndex::DateTime,
+        TypeIndex::String,
+        TypeIndex::FixedString,
+        TypeIndex::IPv4,
+        TypeIndex::Enum8,
+        TypeIndex::Enum16,
+        TypeIndex::UUID,
+        TypeIndex::LowCardinality,
+    };
+
+    constexpr std::array<TypeIndex, 22> suspicious_lc_types
+    {
+        TypeIndex::Int8,
+        TypeIndex::UInt8,
+        TypeIndex::Int16,
+        TypeIndex::UInt16,
+        TypeIndex::Int32,
+        TypeIndex::UInt32,
+        TypeIndex::Int64,
+        TypeIndex::UInt64,
+        TypeIndex::Int128,
+        TypeIndex::UInt128,
+        TypeIndex::Int256,
+        TypeIndex::UInt256,
+        TypeIndex::Float32,
+        TypeIndex::Float64,
+        TypeIndex::Date,
+        TypeIndex::Date32,
+        TypeIndex::DateTime,
+        TypeIndex::String,
+        TypeIndex::FixedString,
+        TypeIndex::IPv4,
+        TypeIndex::IPv6,
+        TypeIndex::UUID,
+    };
+
+    template <bool allow_complex_types>
+    constexpr auto getAllTypes()
+    {
+        constexpr size_t complex_types_size = complex_types.size() * allow_complex_types;
+        constexpr size_t result_size = simple_types.size() + complex_types_size;
+        std::array<TypeIndex, result_size> result;
+        size_t index = 0;
+
+        for (size_t i = 0; i != simple_types.size(); ++i, ++index)
+            result[index] = simple_types[i];
+
+        for (size_t i = 0; i != complex_types_size; ++i, ++index)
+            result[index] = complex_types[i];
+
+        return result;
+    }
+
+    size_t generateNumberOfColumns(pcg64 & rng)
+    {
+        return rng() % MAX_NUMBER_OF_COLUMNS + 1;
+    }
+
+    void writeLowCardinalityNestedType(pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types)
+    {
+        bool make_nullable = rng() % 2;
+        if (make_nullable)
+            writeCString("Nullable(", buf);
+
+        if (allow_suspicious_lc_types)
+        {
+            TypeIndex type = suspicious_lc_types[rng() % suspicious_lc_types.size()];
+
+            if (type == TypeIndex::FixedString)
+                writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
+            else
+                writeString(magic_enum::enum_name<TypeIndex>(type), buf);
+        }
+        else
+        {
+            /// Support only String and FixedString.
+            if (rng() % 2)
+                writeCString("String", buf);
+            else
+                writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
+        }
+
+        if (make_nullable)
+            writeChar(')', buf);
+    }
+
+    void writeEnumValues(const String & column_name, pcg64 & rng, WriteBuffer & buf, ssize_t max_value)
+    {
+        /// Don't generate big enums, because it will lead to really big result
+        /// and slowness of this function, and it can lead to `Max query size exceeded`
+        /// while using this function with generateRandom.
+        size_t num_values = rng() % 16 + 1;
+        std::vector<Int16> values(num_values);
+
+        /// Generate random numbers from range [-(max_value + 1), max_value - num_values + 1].
+        for (Int16 & x : values)
+            x = rng() % (2 * max_value + 3 - num_values) - max_value - 1;
+        /// Make all numbers unique.
+        std::sort(values.begin(), values.end());
+        for (size_t i = 0; i < num_values; ++i)
+            values[i] += i;
+        std::shuffle(values.begin(), values.end(), rng);
+        for (size_t i = 0; i != num_values; ++i)
+        {
+            if (i != 0)
+                writeCString(", ", buf);
+            writeString("'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(values[i]), buf);
+        }
+    }
+
+    void writeMapKeyType(const String & column_name, pcg64 & rng, WriteBuffer & buf)
+    {
+        TypeIndex type = map_key_types[rng() % map_key_types.size()];
+        switch (type)
+        {
+            case TypeIndex::FixedString:
+                writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
+                break;
+            case TypeIndex::LowCardinality:
+                writeCString("LowCardinality(", buf);
+                /// Map key supports only String and FixedString inside LowCardinality.
+                if (rng() % 2)
+                    writeCString("String", buf);
+                else
+                    writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
+                writeChar(')', buf);
+                break;
+            case TypeIndex::Enum8:
+                writeCString("Enum8(", buf);
+                writeEnumValues(column_name, rng, buf, INT8_MAX);
+                writeChar(')', buf);
+                break;
+            case TypeIndex::Enum16:
+                writeCString("Enum16(", buf);
+                writeEnumValues(column_name, rng, buf, INT16_MAX);
+                writeChar(')', buf);
+                break;
+            default:
+                writeString(magic_enum::enum_name<TypeIndex>(type), buf);
+                break;
+        }
+    }
+
+    template <bool allow_complex_types = true>
+    void writeRandomType(const String & column_name, pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types, size_t depth = 0)
+    {
+        if (allow_complex_types && depth > MAX_DEPTH)
+            writeRandomType<false>(column_name, rng, buf, depth);
+
+        constexpr auto all_types = getAllTypes<allow_complex_types>();
+        auto type = all_types[rng() % all_types.size()];
+
+        switch (type)
+        {
+            case TypeIndex::UInt8:
+                if (rng() % 2)
+                    writeCString("UInt8", buf);
+                else
+                    writeCString("Bool", buf);
+                return;
+            case TypeIndex::FixedString:
+                writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf);
+                return;
+            case TypeIndex::DateTime64:
+                writeString("DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION + 1) + ")", buf);
+                return;
+            case TypeIndex::Decimal32:
+                writeString("Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION + 1) + ")", buf);
+                return;
+            case TypeIndex::Decimal64:
+                writeString("Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION + 1) + ")", buf);
+                return;
+            case TypeIndex::Decimal128:
+                writeString("Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION + 1) + ")", buf);
+                return;
+            case TypeIndex::Decimal256:
+                writeString("Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION + 1) + ")", buf);
+                return;
+            case TypeIndex::Enum8:
+                writeCString("Enum8(", buf);
+                writeEnumValues(column_name, rng, buf, INT8_MAX);
+                writeChar(')', buf);
+                return;
+            case TypeIndex::Enum16:
+                writeCString("Enum16(", buf);
+                writeEnumValues(column_name, rng, buf, INT16_MAX);
+                writeChar(')', buf);
+                return;
+            case TypeIndex::LowCardinality:
+                writeCString("LowCardinality(", buf);
+                writeLowCardinalityNestedType(rng, buf, allow_suspicious_lc_types);
+                writeChar(')', buf);
+                return;
+            case TypeIndex::Nullable:
+            {
+                writeCString("Nullable(", buf);
+                writeRandomType<false>(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
+                writeChar(')', buf);
+                return;
+            }
+            case TypeIndex::Array:
+            {
+                writeCString("Array(", buf);
+                writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
+                writeChar(')', buf);
+                return;
+            }
+            case TypeIndex::Map:
+            {
+                writeCString("Map(", buf);
+                writeMapKeyType(column_name, rng, buf);
+                writeCString(", ", buf);
+                writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1);
+                writeChar(')', buf);
+                return;
+            }
+            case TypeIndex::Tuple:
+            {
+                size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1;
+                bool generate_nested = rng() % 2;
+                bool generate_named_tuple = rng() % 2;
+                if (generate_nested)
+                    writeCString("Nested(", buf);
+                else
+                    writeCString("Tuple(", buf);
+
+                for (size_t i = 0; i != elements; ++i)
+                {
+                    if (i != 0)
+                        writeCString(", ", buf);
+
+                    String element_name = "e" + std::to_string(i + 1);
+                    if (generate_named_tuple || generate_nested)
+                    {
+                        writeString(element_name, buf);
+                        writeChar(' ', buf);
+                    }
+                    writeRandomType(element_name, rng, buf, allow_suspicious_lc_types, depth + 1);
+                }
+                writeChar(')', buf);
+                return;
+            }
+            default:
+                writeString(magic_enum::enum_name<TypeIndex>(type), buf);
+                return;
+        }
+    }
+
+    void writeRandomStructure(pcg64 & rng, size_t number_of_columns, WriteBuffer & buf, bool allow_suspicious_lc_types)
+    {
+        for (size_t i = 0; i != number_of_columns; ++i)
+        {
+            if (i != 0)
+                writeCString(", ", buf);
+            String column_name = "c" + std::to_string(i + 1);
+            writeString(column_name, buf);
+            writeChar(' ', buf);
+            writeRandomType(column_name, rng, buf, allow_suspicious_lc_types);
+        }
+    }
+}
+
+DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const
+{
+    if (arguments.size() > 2)
+        throw Exception(
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
+            "Number of arguments for function {} doesn't match: passed {}, expected from 0 to 2",
+            getName(), arguments.size());
+
+
+    for (size_t i = 0; i != arguments.size(); ++i)
+    {
+        if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull())
+        {
+            throw Exception(
+                ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Illegal type {} of the {} argument of function {}, expected unsigned integer or Null",
+                arguments[i]->getName(),
+                i + 1,
+                getName());
+        }
+    }
+
+    return std::make_shared<DataTypeString>();
+}
+
+ColumnPtr FunctionGenerateRandomStructure::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
+{
+    size_t seed = randomSeed();
+    size_t number_of_columns = 0;
+
+    if (!arguments.empty() && !arguments[0].column->onlyNull())
+    {
+        number_of_columns = arguments[0].column->getUInt(0);
+        if (number_of_columns > MAX_NUMBER_OF_COLUMNS)
+            throw Exception(
+                ErrorCodes::BAD_ARGUMENTS,
+                "Maximum allowed number of columns is {}, got {}",
+                MAX_NUMBER_OF_COLUMNS,
+                number_of_columns);
+    }
+
+    if (arguments.size() > 1 && !arguments[1].column->onlyNull())
+        seed = arguments[1].column->getUInt(0);
+
+    pcg64 rng(seed);
+    if (number_of_columns == 0)
+        number_of_columns = generateNumberOfColumns(rng);
+
+    auto col_res = ColumnString::create();
+    auto & string_column = assert_cast<ColumnString &>(*col_res);
+    auto & chars = string_column.getChars();
+    WriteBufferFromVector buf(chars);
+    writeRandomStructure(rng, number_of_columns, buf, allow_suspicious_lc_types);
+    buf.finalize();
+    chars.push_back(0);
+    string_column.getOffsets().push_back(chars.size());
+    return ColumnConst::create(std::move(col_res), input_rows_count);
+}
+
+String FunctionGenerateRandomStructure::generateRandomStructure(size_t seed, const ContextPtr & context)
+{
+    pcg64 rng(seed);
+    size_t number_of_columns = generateNumberOfColumns(rng);
+    WriteBufferFromOwnString buf;
+    writeRandomStructure(rng, number_of_columns, buf, context->getSettingsRef().allow_suspicious_low_cardinality_types);
+    return buf.str();
+}
+
+REGISTER_FUNCTION(GenerateRandomStructure)
+{
+    factory.registerFunction<FunctionGenerateRandomStructure>(FunctionDocumentation
+        {
+            .description=R"(
+Generates a random table structure.
+This function takes 2 optional constant arguments:
+the number of columns in the result structure (random by default) and random seed (random by default)
+The maximum number of columns is 128.
+The function returns a value of type String.
+)",
+            .examples{
+                {"random", "SELECT generateRandomStructure()", "c1 UInt32, c2 FixedString(25)"},
+                {"with specified number of columns", "SELECT generateRandomStructure(3)", "c1 String, c2 Array(Int32), c3 LowCardinality(String)"},
+                {"with specified seed", "SELECT generateRandomStructure(1, 42)", "c1 UInt128"},
+            },
+            .categories{"Random"}
+        },
+        FunctionFactory::CaseSensitive);
+}
+
+}
--- a/src/Functions/FunctionGenerateRandomStructure.h
+++ b/src/Functions/FunctionGenerateRandomStructure.h
@ -0,0 +1,47 @@
+#pragma once
+
+#include <Functions/IFunction.h>
+#include <Interpreters/Context.h>
+
+#include <pcg_random.hpp>
+
+namespace DB
+{
+
+class FunctionGenerateRandomStructure : public IFunction
+{
+public:
+    static constexpr auto name = "generateRandomStructure";
+
+    explicit FunctionGenerateRandomStructure(bool allow_suspicious_lc_types_) : allow_suspicious_lc_types(allow_suspicious_lc_types_)
+    {
+    }
+
+    static FunctionPtr create(ContextPtr context)
+    {
+        return std::make_shared<FunctionGenerateRandomStructure>(context->getSettingsRef().allow_suspicious_low_cardinality_types.value);
+    }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 0; }
+
+    bool isVariadic() const override { return true; }
+    bool isDeterministic() const override { return false; }
+    bool isDeterministicInScopeOfQuery() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    ColumnNumbers getArgumentsThatAreAlwaysConstant() const  override { return {0, 1}; }
+    bool useDefaultImplementationForConstants() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override;
+
+    static String generateRandomStructure(size_t seed, const ContextPtr & context);
+
+private:
+    bool allow_suspicious_lc_types;
+};
+
+}
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -195,18 +195,14 @@ void WriteBufferFromS3::finalizeImpl()

    if (request_settings.check_objects_after_upload)
    {
-        LOG_TRACE(log, "Checking object {} exists after upload", key);
        S3::checkObjectExists(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload");

-        LOG_TRACE(log, "Checking object {} has size as expected {}", key, total_size);
        size_t actual_size = S3::getObjectSize(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage);
        if (actual_size != total_size)
            throw Exception(
                    ErrorCodes::S3_ERROR,
                    "Object {} from bucket {} has unexpected size {} after upload, expected size {}, it's a bug in S3 or S3 API.",
                    key, bucket, actual_size, total_size);
-
-        LOG_TRACE(log, "Object {} exists after upload", key);
    }
 }

@ -245,10 +241,8 @@ WriteBufferFromS3::~WriteBufferFromS3()
        LOG_INFO(log,
                 "WriteBufferFromS3 is not finalized in destructor. "
                 "It could be if an exception occurs. File is not written to S3. "
-                 "{}. "
-                 "Stack trace: {}",
-                 getLogDetails(),
-                 StackTrace().toString());
+                 "{}.",
+                 getLogDetails());
    }

    task_tracker->safeWaitAll();
@ -292,8 +286,6 @@ void WriteBufferFromS3::reallocateFirstBuffer()
    WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size);

    chassert(offset() == 0);
-
-    LOG_TRACE(log, "Reallocated first buffer with size {}. {}", memory.size(), getLogDetails());
 }

 void WriteBufferFromS3::detachBuffer()
@ -316,8 +308,6 @@ void WriteBufferFromS3::allocateFirstBuffer()
    const auto size = std::min(size_t(DBMS_DEFAULT_BUFFER_SIZE), max_first_buffer);
    memory = Memory(size);
    WriteBuffer::set(memory.data(), memory.size());
-
-    LOG_TRACE(log, "Allocated first buffer with size {}. {}", memory.size(), getLogDetails());
 }

 void WriteBufferFromS3::allocateBuffer()
--- a/src/IO/WriteBufferFromS3TaskTracker.cpp
+++ b/src/IO/WriteBufferFromS3TaskTracker.cpp
@ -36,8 +36,6 @@ ThreadPoolCallbackRunner<void> WriteBufferFromS3::TaskTracker::syncRunner()

 void WriteBufferFromS3::TaskTracker::waitAll()
 {
-    LOG_TEST(log, "waitAll, in queue {}", futures.size());
-
    /// Exceptions are propagated
    for (auto & future : futures)
    {
@ -51,8 +49,6 @@ void WriteBufferFromS3::TaskTracker::waitAll()

 void WriteBufferFromS3::TaskTracker::safeWaitAll()
 {
-    LOG_TEST(log, "safeWaitAll, wait in queue {}", futures.size());
-
    for (auto & future : futures)
    {
        if (future.valid())
@ -76,7 +72,6 @@ void WriteBufferFromS3::TaskTracker::safeWaitAll()

 void WriteBufferFromS3::TaskTracker::waitIfAny()
 {
-    LOG_TEST(log, "waitIfAny, in queue {}", futures.size());
    if (futures.empty())
        return;

@ -101,8 +96,6 @@ void WriteBufferFromS3::TaskTracker::waitIfAny()

    watch.stop();
    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds());
-
-    LOG_TEST(log, "waitIfAny ended, in queue {}", futures.size());
 }

 void WriteBufferFromS3::TaskTracker::add(Callback && func)
@ -147,8 +140,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink()
    if (!max_tasks_inflight)
        return;

-    LOG_TEST(log, "waitTilInflightShrink, in queue {}", futures.size());
-
    Stopwatch watch;

    /// Alternative approach is to wait until at least futures.size() - max_tasks_inflight element are finished
@ -171,8 +162,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink()

    watch.stop();
    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds());
-
-    LOG_TEST(log, "waitTilInflightShrink ended, in queue {}", futures.size());
 }

 bool WriteBufferFromS3::TaskTracker::isAsync() const
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@ -1,16 +1,35 @@
 #include "FileCache.h"

-#include <Common/randomSeed.h>
+#include <IO/Operators.h>
+#include <IO/ReadHelpers.h>
+#include <IO/ReadSettings.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromString.h>
 #include <Interpreters/Cache/FileCacheSettings.h>
 #include <Interpreters/Cache/LRUFileCachePriority.h>
 #include <Interpreters/Context.h>
-#include <IO/ReadHelpers.h>
-#include <IO/WriteBufferFromFile.h>
-#include <IO/ReadSettings.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
-#include <pcg-random/pcg_random.hpp>
 #include <base/hex.h>
+#include <pcg-random/pcg_random.hpp>
+#include <Common/randomSeed.h>
+
+#include <filesystem>
+
+
+namespace fs = std::filesystem;
+
+namespace
+{
+
+size_t roundDownToMultiple(size_t num, size_t multiple)
+{
+    return (num / multiple) * multiple;
+}
+
+size_t roundUpToMultiple(size_t num, size_t multiple)
+{
+    return roundDownToMultiple(num + multiple - 1, multiple);
+}
+}

 namespace DB
 {
@ -26,6 +45,7 @@ FileCache::FileCache(const FileCacheSettings & settings)
    , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms)
    , log(&Poco::Logger::get("FileCache"))
    , metadata(settings.base_path)
+    , boundary_alignment(settings.boundary_alignment)
 {
    main_priority = std::make_unique<LRUFileCachePriority>(settings.max_size, settings.max_elements);

@ -385,15 +405,16 @@ FileSegmentsHolderPtr FileCache::set(
    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }

-FileSegmentsHolderPtr FileCache::getOrSet(
-    const Key & key,
-    size_t offset,
-    size_t size,
-    const CreateFileSegmentSettings & settings)
+FileSegmentsHolderPtr
+FileCache::getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings)
 {
    assertInitialized();

-    FileSegment::Range range(offset, offset + size - 1);
+    const auto aligned_offset = roundDownToMultiple(offset, boundary_alignment);
+    const auto aligned_end = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size);
+    const auto aligned_size = aligned_end - aligned_offset;
+
+    FileSegment::Range range(aligned_offset, aligned_offset + aligned_size - 1);

    auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY);

@ -401,8 +422,7 @@ FileSegmentsHolderPtr FileCache::getOrSet(
    auto file_segments = getImpl(*locked_key, range);
    if (file_segments.empty())
    {
-        file_segments = splitRangeIntoFileSegments(
-            *locked_key, offset, size, FileSegment::State::EMPTY, settings);
+        file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings);
    }
    else
    {
@ -410,6 +430,12 @@ FileSegmentsHolderPtr FileCache::getOrSet(
            *locked_key, file_segments, range, /* fill_with_detached */false, settings);
    }

+    while (!file_segments.empty() && file_segments.front()->range().right < offset)
+        file_segments.pop_front();
+
+    while (!file_segments.empty() && file_segments.back()->range().left >= offset + size)
+        file_segments.pop_back();
+
    chassert(!file_segments.empty());
    return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
 }
--- a/src/Interpreters/Cache/FileCache.h
+++ b/src/Interpreters/Cache/FileCache.h
@ -67,7 +67,8 @@ public:
     * As long as pointers to returned file segments are held
     * it is guaranteed that these file segments are not removed from cache.
     */
-    FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings);
+    FileSegmentsHolderPtr
+    getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings);

    /**
     * Segments in returned list are ordered in ascending order and represent a full contiguous
@ -179,6 +180,8 @@ private:

    void assertInitialized() const;

+    size_t boundary_alignment;
+
    void assertCacheCorrectness();

    void loadMetadata();
--- a/src/Interpreters/Cache/FileCacheSettings.cpp
+++ b/src/Interpreters/Cache/FileCacheSettings.cpp
@ -49,6 +49,8 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &

    do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true);

+    boundary_alignment = config.getUInt64(config_prefix + ".boundary_alignment", DBMS_DEFAULT_BUFFER_SIZE);
+
    delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS);
 }

--- a/src/Interpreters/Cache/FileCacheSettings.h
+++ b/src/Interpreters/Cache/FileCacheSettings.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Core/Defines.h>
 #include <Interpreters/Cache/FileCache_fwd.h>
 #include <string>

@ -27,6 +28,8 @@ struct FileCacheSettings
    size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD;
    size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS;

+    size_t boundary_alignment = DBMS_DEFAULT_BUFFER_SIZE;
+
    void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
 };

--- a/src/Interpreters/Cache/FileCache_fwd.h
+++ b/src/Interpreters/Cache/FileCache_fwd.h
@ -4,7 +4,7 @@
 namespace DB
 {

-static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024;
+static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 8 * 1024 * 1024;
 static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024;
 static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0;
 static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024;
--- a/src/Interpreters/Cache/FileSegment.cpp
+++ b/src/Interpreters/Cache/FileSegment.cpp
@ -1,13 +1,14 @@
 #include "FileSegment.h"

-#include <base/getThreadId.h>
-#include <Common/scope_guard_safe.h>
-#include <base/hex.h>
-#include <Common/logger_useful.h>
-#include <Interpreters/Cache/FileCache.h>
-#include <IO/WriteBufferFromString.h>
-#include <IO/Operators.h>
 #include <filesystem>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+#include <Interpreters/Cache/FileCache.h>
+#include <base/getThreadId.h>
+#include <base/hex.h>
+#include <Common/OpenTelemetryTraceContext.h>
+#include <Common/logger_useful.h>
+#include <Common/scope_guard_safe.h>

 #include <magic_enum.hpp>

@ -399,6 +400,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset)

 FileSegment::State FileSegment::wait(size_t offset)
 {
+    OpenTelemetry::SpanHolder span{fmt::format("FileSegment::wait({})", key().toString())};
+
    auto lock = segment_guard.lock();

    if (downloader_id.empty() || offset < getCurrentWriteOffset(true))
--- a/src/Interpreters/FilesystemReadPrefetchesLog.cpp
+++ b/src/Interpreters/FilesystemReadPrefetchesLog.cpp
@ -1,9 +1,9 @@
-#include <Interpreters/FilesystemReadPrefetchesLog.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Interpreters/FilesystemReadPrefetchesLog.h>


 namespace DB
@ -39,12 +39,12 @@ void FilesystemReadPrefetchesLogElement::appendToBlock(MutableColumns & columns)
    columns[i++]->insert(path);
    columns[i++]->insert(offset);
    columns[i++]->insert(size);
-    columns[i++]->insert(prefetch_submit_time);
+    columns[i++]->insert(std::chrono::duration_cast<std::chrono::microseconds>(prefetch_submit_time.time_since_epoch()).count());
    columns[i++]->insert(priority.value);
    if (execution_watch)
    {
-        columns[i++]->insert(execution_watch->getStart());
-        columns[i++]->insert(execution_watch->getEnd());
+        columns[i++]->insert(execution_watch->getStart() / 1000);
+        columns[i++]->insert(execution_watch->getEnd() / 1000);
        columns[i++]->insert(execution_watch->elapsedMicroseconds());
    }
    else
--- a/src/Interpreters/FilesystemReadPrefetchesLog.h
+++ b/src/Interpreters/FilesystemReadPrefetchesLog.h
@ -24,7 +24,7 @@ struct FilesystemReadPrefetchesLogElement
    String path;
    UInt64 offset;
    Int64 size; /// -1 means unknown
-    Decimal64 prefetch_submit_time{};
+    std::chrono::system_clock::time_point prefetch_submit_time;
    std::optional<Stopwatch> execution_watch;
    Priority priority;
    FilesystemPrefetchState state;
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -322,6 +322,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
    /// This does not have impact on the final span logs, because these internal queries are issued by external queries,
    /// we still have enough span logs for the execution of external queries.
    std::shared_ptr<OpenTelemetry::SpanHolder> query_span = internal ? nullptr : std::make_shared<OpenTelemetry::SpanHolder>("query");
+    if (query_span)
+        LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id);

    auto query_start_time = std::chrono::system_clock::now();

--- a/src/Interpreters/tests/gtest_lru_file_cache.cpp
+++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp
@ -192,6 +192,9 @@ TEST_F(FileCacheTest, get)
    settings.base_path = cache_base_path;
    settings.max_size = 30;
    settings.max_elements = 5;
+    settings.boundary_alignment = 1;
+
+    const size_t file_size = -1; // the value doesn't really matter because boundary_alignment == 1.

    {
        std::cerr << "Step 1\n";
@ -200,7 +203,7 @@ TEST_F(FileCacheTest, get)
        auto key = cache.createKeyForPath("key1");

        {
-            auto holder = cache.getOrSet(key, 0, 10, {});  /// Add range [0, 9]
+            auto holder = cache.getOrSet(key, 0, 10, file_size, {}); /// Add range [0, 9]
            assertEqual(holder, { Range(0, 9) }, { State::EMPTY });
            download(holder->front());
            assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
@ -219,7 +222,7 @@ TEST_F(FileCacheTest, get)

        {
            /// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache.
-            auto holder = cache.getOrSet(key, 5, 10, {});
+            auto holder = cache.getOrSet(key, 5, 10, file_size, {});
            assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY });
            download(get(holder, 1));
            assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED });
@ -238,20 +241,18 @@ TEST_F(FileCacheTest, get)

        /// Get [9, 9]
        {
-            auto holder = cache.getOrSet(key, 9, 1, {});
+            auto holder = cache.getOrSet(key, 9, 1, file_size, {});
            assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED });
            increasePriority(holder);
        }

        assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) });
        /// Get [9, 10]
-        assertEqual(cache.getOrSet(key, 9, 2, {}),
-                    { Range(0, 9),       Range(10, 14) },
-                    { State::DOWNLOADED, State::DOWNLOADED });
+        assertEqual(cache.getOrSet(key, 9, 2, file_size, {}), {Range(0, 9), Range(10, 14)}, {State::DOWNLOADED, State::DOWNLOADED});

        /// Get [10, 10]
        {
-            auto holder = cache.getOrSet(key, 10, 1, {});
+            auto holder = cache.getOrSet(key, 10, 1, file_size, {});
            assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED });
            increasePriority(holder);
        }
@ -264,19 +265,19 @@ TEST_F(FileCacheTest, get)
        std::cerr << "Step 4\n";

        {
-            auto holder = cache.getOrSet(key, 17, 4, {});
+            auto holder = cache.getOrSet(key, 17, 4, file_size, {});
            download(holder); /// Get [17, 20]
            increasePriority(holder);
        }

        {
-            auto holder = cache.getOrSet(key, 24, 3, {});
+            auto holder = cache.getOrSet(key, 24, 3, file_size, {});
            download(holder); /// Get [24, 26]
            increasePriority(holder);
        }

        {
-            auto holder = cache.getOrSet(key, 27, 1, {});
+            auto holder = cache.getOrSet(key, 27, 1, file_size, {});
            download(holder); /// Get [27, 27]
            increasePriority(holder);
        }
@ -292,7 +293,7 @@ TEST_F(FileCacheTest, get)

        std::cerr << "Step 5\n";
        {
-            auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25]
+            auto holder = cache.getOrSet(key, 0, 26, file_size, {}); /// Get [0, 25]
            assertEqual(holder,
                        { Range(0, 9),       Range(10, 14),     Range(15, 16),  Range(17, 20),     Range(21, 23), Range(24, 26) },
                        { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY,   State::DOWNLOADED, State::EMPTY,  State::DOWNLOADED });
@ -309,12 +310,12 @@ TEST_F(FileCacheTest, get)
            /// Let's not invalidate pointers to returned segments from range [0, 25] and
            /// as max elements size is reached, next attempt to put something in cache should fail.
            /// This will also check that [27, 27] was indeed evicted.
-            auto holder2 = cache.getOrSet(key, 27, 1, {});
+            auto holder2 = cache.getOrSet(key, 27, 1, file_size, {});
            assertEqual(holder2, { Range(27, 27) }, { State::EMPTY });
            assertDownloadFails(holder2->front());
            assertEqual(holder2, { Range(27, 27) }, { State::DETACHED });

-            auto holder3 = cache.getOrSet(key, 28, 3, {});
+            auto holder3 = cache.getOrSet(key, 28, 3, file_size, {});
            assertEqual(holder3, { Range(28, 30) }, { State::EMPTY });
            assertDownloadFails(holder3->front());
            assertEqual(holder3, { Range(28, 30) }, { State::DETACHED });
@ -336,7 +337,7 @@ TEST_F(FileCacheTest, get)
        std::cerr << "Step 6\n";

        {
-            auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21]
+            auto holder = cache.getOrSet(key, 12, 10, file_size, {}); /// Get [12, 21]
            assertEqual(holder,
                        { Range(10, 14),     Range(15, 16),     Range(17, 20),     Range(21, 21) },
                        { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
@ -357,7 +358,7 @@ TEST_F(FileCacheTest, get)

        std::cerr << "Step 7\n";
        {
-            auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27]
+            auto holder = cache.getOrSet(key, 23, 5, file_size, {}); /// Get [23, 27]
            assertEqual(holder,
                        { Range(23, 23), Range(24, 26),     Range(27, 27) },
                        { State::EMPTY,  State::DOWNLOADED, State::EMPTY });
@ -376,25 +377,25 @@ TEST_F(FileCacheTest, get)

        std::cerr << "Step 8\n";
        {
-            auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4]
+            auto holder = cache.getOrSet(key, 2, 3, file_size, {}); /// Get [2, 4]
            assertEqual(holder, { Range(2, 4) }, { State::EMPTY });

-            auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31]
+            auto holder2 = cache.getOrSet(key, 30, 2, file_size, {}); /// Get [30, 31]
            assertEqual(holder2, { Range(30, 31) }, { State::EMPTY });

            download(get(holder, 0));
            download(get(holder2, 0));

-            auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23]
+            auto holder3 = cache.getOrSet(key, 23, 1, file_size, {}); /// Get [23, 23]
            assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED });

-            auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26]
+            auto holder4 = cache.getOrSet(key, 24, 3, file_size, {}); /// Get [24, 26]
            assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED });

-            auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27]
+            auto holder5 = cache.getOrSet(key, 27, 1, file_size, {}); /// Get [27, 27]
            assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED });

-            auto holder6 = cache.getOrSet(key, 0, 40, {});
+            auto holder6 = cache.getOrSet(key, 0, 40, file_size, {});
            assertEqual(holder6,
                        { Range(0, 1), Range(2, 4),        Range(5, 22), Range(23, 23),     Range(24, 26),     Range(27, 27),    Range(28, 29), Range(30, 31),     Range(32, 39) },
                        { State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY });
@ -422,14 +423,14 @@ TEST_F(FileCacheTest, get)

        /// Get [2, 4]
        {
-            auto holder = cache.getOrSet(key, 2, 3, {});
+            auto holder = cache.getOrSet(key, 2, 3, file_size, {});
            assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED });
            increasePriority(holder);
        }


        {
-            auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29]
+            auto holder = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29]
            assertEqual(holder,
                        { Range(24, 26),     Range(27, 27),     Range(28, 29) },
                        { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY });
@ -451,7 +452,7 @@ TEST_F(FileCacheTest, get)
                chassert(&DB::CurrentThread::get() == &thread_status_1);
                DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);

-                auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again.
+                auto holder2 = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29] once again.
                assertEqual(holder2,
                            { Range(24, 26),     Range(27, 27),     Range(28, 29) },
                            { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING });
@ -494,7 +495,7 @@ TEST_F(FileCacheTest, get)
            /// state is changed not manually via segment->completeWithState(state) but from destructor of holder
            /// and notify_all() is also called from destructor of holder.

-            auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25]
+            auto holder = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25]
            assertEqual(holder,
                        { Range(2, 4),       Range(5, 23), Range(24, 26) },
                        { State::DOWNLOADED, State::EMPTY, State::DOWNLOADED });
@ -516,7 +517,7 @@ TEST_F(FileCacheTest, get)
                chassert(&DB::CurrentThread::get() == &thread_status_1);
                DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1);

-                auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again
+                auto holder2 = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25] once again
                assertEqual(holder,
                            { Range(2, 4),       Range(5, 23),       Range(24, 26) },
                            { State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED });
@ -560,9 +561,10 @@ TEST_F(FileCacheTest, get)
        auto key = cache2.createKeyForPath("key1");

        /// Get [2, 29]
-        assertEqual(cache2.getOrSet(key, 2, 28, {}),
-                    { Range(2, 4),       Range(5, 23),      Range(24, 26),     Range(27, 27),     Range(28, 29) },
-                    { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED });
+        assertEqual(
+            cache2.getOrSet(key, 2, 28, file_size, {}),
+            {Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29)},
+            {State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED});
    }

    std::cerr << "Step 12\n";
@ -578,9 +580,10 @@ TEST_F(FileCacheTest, get)
        auto key = cache2.createKeyForPath("key1");

        /// Get [0, 24]
-        assertEqual(cache2.getOrSet(key, 0, 25, {}),
-                    { Range(0, 9),  Range(10, 19), Range(20, 24) },
-                    { State::EMPTY, State::EMPTY,  State::EMPTY });
+        assertEqual(
+            cache2.getOrSet(key, 0, 25, file_size, {}),
+            {Range(0, 9), Range(10, 19), Range(20, 24)},
+            {State::EMPTY, State::EMPTY, State::EMPTY});
    }

    std::cerr << "Step 13\n";
@ -598,7 +601,7 @@ TEST_F(FileCacheTest, get)
        ASSERT_TRUE(!fs::exists(key_path));
        ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));

-        download(cache.getOrSet(key, 0, 10, {}));
+        download(cache.getOrSet(key, 0, 10, file_size, {}));
        ASSERT_EQ(cache.getUsedCacheSize(), 10);
        ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular)));

@ -628,7 +631,7 @@ TEST_F(FileCacheTest, get)
        ASSERT_TRUE(!fs::exists(key_path));
        ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path()));

-        download(cache.getOrSet(key, 0, 10, {}));
+        download(cache.getOrSet(key, 0, 10, file_size, {}));
        ASSERT_EQ(cache.getUsedCacheSize(), 10);
        ASSERT_TRUE(fs::exists(key_path));

@ -756,7 +759,7 @@ TEST_F(FileCacheTest, temporaryData)

    auto tmp_data_scope = std::make_shared<TemporaryDataOnDiskScope>(nullptr, &file_cache, 0);

-    auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{});
+    auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{});

    {
        ASSERT_EQ(some_data_holder->size(), 5);
--- a/src/Parsers/TokenIterator.cpp
+++ b/src/Parsers/TokenIterator.cpp
@ -4,7 +4,7 @@
 namespace DB
 {

-Tokens::Tokens(const char * begin, const char * end, size_t max_query_size)
+Tokens::Tokens(const char * begin, const char * end, size_t max_query_size, bool skip_insignificant)
 {
    Lexer lexer(begin, end, max_query_size);

@ -13,7 +13,7 @@ Tokens::Tokens(const char * begin, const char * end, size_t max_query_size)
    {
        Token token = lexer.nextToken();
        stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded;
-        if (token.isSignificant())
+        if (token.isSignificant() || (!skip_insignificant && !data.empty() && data.back().isSignificant()))
            data.emplace_back(std::move(token));
    } while (!stop);
 }
--- a/src/Parsers/TokenIterator.h
+++ b/src/Parsers/TokenIterator.h
@ -24,7 +24,7 @@ private:
    std::size_t last_accessed_index = 0;

 public:
-    Tokens(const char * begin, const char * end, size_t max_query_size = 0);
+    Tokens(const char * begin, const char * end, size_t max_query_size = 0, bool skip_insignificant = true);

    ALWAYS_INLINE inline const Token & operator[](size_t index)
    {
--- a/src/Parsers/parseQuery.cpp
+++ b/src/Parsers/parseQuery.cpp
@ -233,10 +233,11 @@ ASTPtr tryParseQuery(
    const std::string & query_description,
    bool allow_multi_statements,
    size_t max_query_size,
-    size_t max_parser_depth)
+    size_t max_parser_depth,
+    bool skip_insignificant)
 {
    const char * query_begin = _out_query_end;
-    Tokens tokens(query_begin, all_queries_end, max_query_size);
+    Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant);
    /// NOTE: consider use UInt32 for max_parser_depth setting.
    IParser::Pos token_iterator(tokens, static_cast<uint32_t>(max_parser_depth));

--- a/src/Parsers/parseQuery.h
+++ b/src/Parsers/parseQuery.h
@ -18,7 +18,8 @@ ASTPtr tryParseQuery(
    bool allow_multi_statements,    /// If false, check for non-space characters after semicolon and set error message if any.
    size_t max_query_size,          /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded".
                                    /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query.
-    size_t max_parser_depth);
+    size_t max_parser_depth,
+    bool skip_insignificant = true);  /// If true, lexer will skip all insignificant tokens (e.g. whitespaces)


 /// Parse query or throw an exception with error message.
--- a/src/Storages/MergeTree/MergeTreeReadPool.cpp
+++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp
@ -1,11 +1,11 @@
-#include <Storages/MergeTree/MergeTreeReadPool.h>
-#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
+#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
+#include <Storages/MergeTree/MergeTreeReadPool.h>
+#include <base/range.h>
 #include <Interpreters/Context_fwd.h>
 #include <Common/Stopwatch.h>
 #include <Common/formatReadable.h>
 #include <Common/logger_useful.h>
-#include <base/range.h>


 namespace ProfileEvents
@ -22,6 +22,14 @@ namespace ErrorCodes
 namespace DB
 {

+size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read)
+{
+    ColumnSize columns_size{};
+    for (const auto & col_name : columns_to_read)
+        columns_size.add(part.getColumnSize(col_name));
+    return columns_size.data_compressed;
+}
+
 MergeTreeReadPool::MergeTreeReadPool(
    size_t threads_,
    size_t sum_marks_,
@ -45,16 +53,43 @@ MergeTreeReadPool::MergeTreeReadPool(
    , parts_ranges(std::move(parts_))
    , predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0)
    , do_not_steal_tasks(do_not_steal_tasks_)
+    , merge_tree_use_const_size_tasks_for_remote_reading(context_->getSettingsRef().merge_tree_use_const_size_tasks_for_remote_reading)
    , backoff_settings{context_->getSettingsRef()}
    , backoff_state{threads_}
 {
    /// parts don't contain duplicate MergeTreeDataPart's.
    const auto per_part_sum_marks = fillPerPartInfo(
        parts_ranges, storage_snapshot, is_part_on_remote_disk,
-        do_not_steal_tasks, predict_block_size_bytes,
+        predict_block_size_bytes,
        column_names, virtual_column_names, prewhere_info,
        actions_settings, reader_settings, per_part_params);

+    if (std::ranges::count(is_part_on_remote_disk, true))
+    {
+        const auto & settings = context_->getSettingsRef();
+
+        size_t total_compressed_bytes = 0;
+        size_t total_marks = 0;
+        for (const auto & part : parts_ranges)
+        {
+            total_compressed_bytes += getApproxSizeOfPart(
+                *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_);
+            total_marks += part.getMarksCount();
+        }
+
+        if (total_marks)
+        {
+            const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading;
+            const auto avg_mark_bytes = std::max<size_t>(total_compressed_bytes / total_marks, 1);
+            /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible.
+            const auto heuristic_min_marks = std::min<size_t>(total_marks / threads_, min_bytes_per_task / avg_mark_bytes);
+            if (heuristic_min_marks > min_marks_for_concurrent_read)
+            {
+                min_marks_for_concurrent_read = heuristic_min_marks;
+            }
+        }
+    }
+
    fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges);
 }

@ -62,7 +97,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
    const RangesInDataParts & parts,
    const StorageSnapshotPtr & storage_snapshot,
    std::vector<bool> & is_part_on_remote_disk,
-    bool & do_not_steal_tasks,
    bool & predict_block_size_bytes,
    const Names & column_names,
    const Names & virtual_column_names,
@ -84,7 +118,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(

        bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk();
        is_part_on_remote_disk[i] = part_on_remote_disk;
-        do_not_steal_tasks |= part_on_remote_disk;

        /// Read marks for every data part.
        size_t sum_marks = 0;
@ -160,14 +193,13 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread)
    auto & marks_in_part = thread_tasks.sum_marks_in_parts.back();

    size_t need_marks;
-    if (is_part_on_remote_disk[part_idx]) /// For better performance with remote disks
+    if (is_part_on_remote_disk[part_idx] && !merge_tree_use_const_size_tasks_for_remote_reading)
        need_marks = marks_in_part;
    else /// Get whole part to read if it is small enough.
        need_marks = std::min(marks_in_part, min_marks_for_concurrent_read);

    /// Do not leave too little rows in part for next time.
-    if (marks_in_part > need_marks &&
-        marks_in_part - need_marks < min_marks_for_concurrent_read)
+    if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_for_concurrent_read / 2)
        need_marks = marks_in_part;

    MarkRanges ranges_to_get_from_part;
@ -300,6 +332,8 @@ void MergeTreeReadPool::fillPerThreadInfo(
            parts_queue.push(std::move(info.second));
    }

+    LOG_DEBUG(log, "min_marks_for_concurrent_read={}", min_marks_for_concurrent_read);
+
    const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1;

    for (size_t i = 0; i < threads && !parts_queue.empty(); ++i)
--- a/src/Storages/MergeTree/MergeTreeReadPool.h
+++ b/src/Storages/MergeTree/MergeTreeReadPool.h
@ -94,7 +94,6 @@ public:
        const RangesInDataParts & parts,
        const StorageSnapshotPtr & storage_snapshot,
        std::vector<bool> & is_part_on_remote_disk,
-        bool & do_not_steal_tasks,
        bool & predict_block_size_bytes,
        const Names & column_names,
        const Names & virtual_column_names,
@ -119,6 +118,7 @@ private:
    RangesInDataParts parts_ranges;
    bool predict_block_size_bytes;
    bool do_not_steal_tasks;
+    bool merge_tree_use_const_size_tasks_for_remote_reading = false;

    std::vector<PerPartParams> per_part_params;
    std::vector<bool> is_part_on_remote_disk;
@ -189,7 +189,7 @@ public:
        , parts_ranges(std::move(parts_))
    {
        MergeTreeReadPool::fillPerPartInfo(
-            parts_ranges, storage_snapshot, is_part_on_remote_disk, do_not_steal_tasks,
+            parts_ranges, storage_snapshot, is_part_on_remote_disk,
            predict_block_size_bytes, column_names, virtual_column_names, prewhere_info,
            actions_settings, reader_settings, per_part_params);

@ -226,7 +226,6 @@ private:
    const Names virtual_column_names;
    RangesInDataParts parts_ranges;

-    bool do_not_steal_tasks = false;
    bool predict_block_size_bytes = false;
    std::vector<bool> is_part_on_remote_disk;
    std::vector<MergeTreeReadPool::PerPartParams> per_part_params;
--- a/src/Storages/MergeTree/MergeTreeSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSource.cpp
@ -207,6 +207,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()

            try
            {
+                OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"};
                holder->setResult(algorithm->read());
            }
            catch (...)
@ -221,6 +222,7 @@ std::optional<Chunk> MergeTreeSource::tryGenerate()
    }
 #endif

+    OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"};
    return processReadResult(algorithm->read());
 }

--- a/src/Storages/StorageGenerateRandom.cpp
+++ b/src/Storages/StorageGenerateRandom.cpp
@ -470,7 +470,7 @@ class GenerateSource : public ISource
 {
 public:
    GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_)
-        : ISource(Nested::flatten(prepareBlockToFill(block_header_)))
+        : ISource(Nested::flattenArrayOfTuples(prepareBlockToFill(block_header_)))
        , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_)
        , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {}

@ -485,7 +485,7 @@ protected:
        for (const auto & elem : block_to_fill)
            columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context));

-        columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns();
+        columns = Nested::flattenArrayOfTuples(block_to_fill.cloneWithColumns(columns)).getColumns();
        return {std::move(columns), block_size};
    }

--- a/src/Storages/StoragePostgreSQL.cpp
+++ b/src/Storages/StoragePostgreSQL.cpp
@ -225,7 +225,7 @@ public:
    /// Cannot just use serializeAsText for array data type even though it converts perfectly
    /// any dimension number array into text format, because it encloses in '[]' and for postgres it must be '{}'.
    /// Check if array[...] syntax from PostgreSQL will be applicable.
-    void parseArray(const Field & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
+    static void parseArray(const Field & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
    {
        const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
        const auto & nested = array_type->getNestedType();
@ -233,7 +233,7 @@ public:

        if (!isArray(nested))
        {
-            writeText(clickhouseToPostgresArray(array, data_type), ostr);
+            parseArrayContent(array, data_type, ostr);
            return;
        }

@ -247,7 +247,7 @@ public:

            if (!isArray(nested_array_type->getNestedType()))
            {
-                writeText(clickhouseToPostgresArray(iter->get<Array>(), nested), ostr);
+                parseArrayContent(iter->get<Array>(), nested, ostr);
            }
            else
            {
@ -260,17 +260,36 @@ public:

    /// Conversion is done via column casting because with writeText(Array..) got incorrect conversion
    /// of Date and DateTime data types and it added extra quotes for values inside array.
-    static std::string clickhouseToPostgresArray(const Array & array_field, const DataTypePtr & data_type)
+    static void parseArrayContent(const Array & array_field, const DataTypePtr & data_type, WriteBuffer & ostr)
    {
-        auto nested = typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType();
-        auto array_column = ColumnArray::create(createNested(nested));
+        auto nested_type = typeid_cast<const DataTypeArray *>(data_type.get())->getNestedType();
+        auto array_column = ColumnArray::create(createNested(nested_type));
        array_column->insert(array_field);
-        WriteBufferFromOwnString ostr;
-        data_type->getDefaultSerialization()->serializeText(*array_column, 0, ostr, FormatSettings{});

-        /// ostr is guaranteed to be at least '[]', i.e. size is at least 2 and 2 only if ostr.str() == '[]'
-        assert(ostr.str().size() >= 2);
-        return '{' + std::string(ostr.str().begin() + 1, ostr.str().end() - 1) + '}';
+        const IColumn & nested_column = array_column->getData();
+        const auto serialization = nested_type->getDefaultSerialization();
+
+        FormatSettings settings;
+        settings.pretty.charset = FormatSettings::Pretty::Charset::ASCII;
+
+        if (nested_type->isNullable())
+            nested_type = static_cast<const DataTypeNullable *>(nested_type.get())->getNestedType();
+
+        /// UUIDs inside arrays are expected to be unquoted in PostgreSQL.
+        const bool quoted = !isUUID(nested_type);
+
+        writeChar('{', ostr);
+        for (size_t i = 0, size = array_field.size(); i < size; ++i)
+        {
+            if (i != 0)
+                writeChar(',', ostr);
+
+            if (quoted)
+                serialization->serializeTextQuoted(nested_column, i, ostr, settings);
+            else
+                serialization->serializeText(nested_column, i, ostr, settings);
+        }
+        writeChar('}', ostr);
    }

    static MutableColumnPtr createNested(DataTypePtr nested)
@ -295,6 +314,7 @@ public:
        else if (which.isFloat64())                      nested_column = ColumnFloat64::create();
        else if (which.isDate())                         nested_column = ColumnUInt16::create();
        else if (which.isDateTime())                     nested_column = ColumnUInt32::create();
+        else if (which.isUUID())                         nested_column = ColumnUUID::create();
        else if (which.isDateTime64())
        {
            nested_column = ColumnDecimal<DateTime64>::create(0, 6);
--- a/src/TableFunctions/TableFunctionGenerateRandom.cpp
+++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp
@ -1,18 +1,19 @@
-#include <Common/typeid_cast.h>
 #include <Common/Exception.h>

-#include <Core/Block.h>
 #include <Storages/StorageGenerateRandom.h>
 #include <Storages/checkAndGetLiteralArgument.h>

 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTFunction.h>

 #include <TableFunctions/ITableFunction.h>
 #include <TableFunctions/TableFunctionFactory.h>
 #include <TableFunctions/TableFunctionGenerateRandom.h>
+#include <Functions/FunctionGenerateRandomStructure.h>
 #include <Interpreters/parseColumnsListForTableFunction.h>
+#include <Interpreters/evaluateConstantExpression.h>
+
+#include <Common/randomSeed.h>

 #include "registerTableFunctions.h"

@ -25,10 +26,9 @@ namespace ErrorCodes
    extern const int BAD_ARGUMENTS;
    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
    extern const int LOGICAL_ERROR;
-    extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
 }

-void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/)
+void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr context)
 {
    ASTs & args_func = ast_function->children;

@ -40,10 +40,21 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
    if (args.empty())
        return;

-    if (args.size() > 4)
+    /// First, check if first argument is structure or seed.
+    const auto * first_arg_literal = args[0]->as<const ASTLiteral>();
+    bool first_argument_is_structure = !first_arg_literal || first_arg_literal->value.getType() == Field::Types::String;
+    size_t max_args = first_argument_is_structure ? 4 : 3;
+
+    if (args.size() > max_args)
        throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
-                        "Table function '{}' requires at most four arguments: "
-                        " structure, [random_seed, max_string_length, max_array_length].", getName());
+                        "Table function '{}' requires at most four (or three if structure is missing) arguments: "
+                        " [structure, random_seed, max_string_length, max_array_length].", getName());
+
+    if (first_argument_is_structure)
+    {
+        /// Allow constant expression for structure argument, it can be generated using generateRandomStructure function.
+        args[0] = evaluateConstantExpressionAsLiteral(args[0], context);
+    }

    // All the arguments must be literals.
    for (const auto & arg : args)
@ -51,26 +62,39 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co
        if (!arg->as<const ASTLiteral>())
        {
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-                "All arguments of table function '{}' must be literals. "
+                "All arguments of table function '{}' except structure argument must be literals. "
                "Got '{}' instead", getName(), arg->formatForErrorMessage());
        }
    }

-    /// Parsing first argument as table structure and creating a sample block
-    structure = checkAndGetLiteralArgument<String>(args[0], "structure");
+    size_t arg_index = 0;

-    if (args.size() >= 2)
+    if (first_argument_is_structure)
    {
-        const auto & literal = args[1]->as<const ASTLiteral &>();
+        /// Parsing first argument as table structure and creating a sample block
+        structure = checkAndGetLiteralArgument<String>(args[arg_index], "structure");
+        ++arg_index;
+    }
+
+    if (args.size() >= arg_index + 1)
+    {
+        const auto & literal = args[arg_index]->as<const ASTLiteral &>();
+        ++arg_index;
        if (!literal.value.isNull())
            random_seed = checkAndGetLiteralArgument<UInt64>(literal, "random_seed");
    }

-    if (args.size() >= 3)
-        max_string_length = checkAndGetLiteralArgument<UInt64>(args[2], "max_string_length");
+    if (args.size() >= arg_index + 1)
+    {
+        max_string_length = checkAndGetLiteralArgument<UInt64>(args[arg_index], "max_string_length");
+        ++arg_index;
+    }

-    if (args.size() == 4)
-        max_array_length = checkAndGetLiteralArgument<UInt64>(args[3], "max_string_length");
+    if (args.size() == arg_index + 1)
+    {
+        max_array_length = checkAndGetLiteralArgument<UInt64>(args[arg_index], "max_string_length");
+        ++arg_index;
+    }
 }

 ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const
@ -78,11 +102,11 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP
    if (structure == "auto")
    {
        if (structure_hint.empty())
-            throw Exception(
-                ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE,
-                "Table function '{}' was used without structure argument but structure could not be determined automatically. Please, "
-                "provide structure manually",
-                getName());
+        {
+            auto random_structure = FunctionGenerateRandomStructure::generateRandomStructure(random_seed.value_or(randomSeed()), context);
+            return parseColumnsListFromString(random_structure, context);
+        }
+
        return structure_hint;
    }

--- a/tests/ci/cherry_pick.py
+++ b/tests/ci/cherry_pick.py
@ -71,11 +71,11 @@ This pull-request will be merged automatically as it reaches the mergeable state
 ### If the PR was closed and then reopened

 If it stuck, check {pr_url} for `{backport_created_label}` and delete it if \
-necessary. Manually merging will do nothing, since `{label_backports_created}` \
+necessary. Manually merging will do nothing, since `{backport_created_label}` \
 prevents the original PR {pr_url} from being processed.

 If you want to recreate the PR: delete the `{label_cherrypick}` label and delete this branch.
-You may also need to delete the `{label_backports_created}` label from the original PR.
+You may also need to delete the `{backport_created_label}` label from the original PR.
 """
    BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \
 backporting.
--- a/tests/integration/test_keeper_client/init.py
+++ b/tests/integration/test_keeper_client/init.py
--- a/tests/integration/test_keeper_client/configs/keeper_config.xml
+++ b/tests/integration/test_keeper_client/configs/keeper_config.xml
@ -0,0 +1,3 @@
+<clickhouse>
+
+</clickhouse>
--- a/tests/integration/test_keeper_client/test.py
+++ b/tests/integration/test_keeper_client/test.py
@ -0,0 +1,63 @@
+import pytest
+from helpers.client import CommandRequest
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+
+node = cluster.add_instance(
+    "node",
+    main_configs=["configs/keeper_config.xml"],
+    with_zookeeper=True,
+    stay_alive=True,
+)
+
+
+@pytest.fixture(scope="module")
+def started_cluster():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_base_commands(started_cluster):
+    _ = started_cluster
+
+    command = CommandRequest(
+        [
+            started_cluster.server_bin_path,
+            "keeper-client",
+            "--host",
+            str(cluster.get_instance_ip("zoo1")),
+            "--port",
+            str(cluster.zookeeper_port),
+            "-q",
+            "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;",
+        ],
+        stdin="",
+    )
+
+    assert command.get_answer() == "testvalue1\n"
+
+
+def test_four_letter_word_commands(started_cluster):
+    _ = started_cluster
+
+    command = CommandRequest(
+        [
+            started_cluster.server_bin_path,
+            "keeper-client",
+            "--host",
+            str(cluster.get_instance_ip("zoo1")),
+            "--port",
+            str(cluster.zookeeper_port),
+            "-q",
+            "ruok",
+        ],
+        stdin="",
+    )
+
+    assert command.get_answer() == "imok\n"
--- a/tests/integration/test_storage_postgresql/test.py
+++ b/tests/integration/test_storage_postgresql/test.py
@ -123,7 +123,9 @@ def test_postgres_conversions(started_cluster):
                g Text[][][][][] NOT NULL,                  -- String
                h Integer[][][],                            -- Nullable(Int32)
                i Char(2)[][][][],                          -- Nullable(String)
-                k Char(2)[]                                 -- Nullable(String)
+                j Char(2)[],                                -- Nullable(String)
+                k UUID[],                                   -- Nullable(UUID)
+                l UUID[][]                                  -- Nullable(UUID)
           )"""
    )

@ -133,15 +135,18 @@ def test_postgres_conversions(started_cluster):
    )
    expected = (
        "a\tArray(Date)\t\t\t\t\t\n"
-        + "b\tArray(DateTime64(6))\t\t\t\t\t\n"
-        + "c\tArray(Array(Float32))\t\t\t\t\t\n"
-        + "d\tArray(Array(Float64))\t\t\t\t\t\n"
-        + "e\tArray(Array(Array(Decimal(5, 5))))\t\t\t\t\t\n"
-        + "f\tArray(Array(Array(Int32)))\t\t\t\t\t\n"
-        + "g\tArray(Array(Array(Array(Array(String)))))\t\t\t\t\t\n"
-        + "h\tArray(Array(Array(Nullable(Int32))))\t\t\t\t\t\n"
-        + "i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n"
-        + "k\tArray(Nullable(String))"
+        "b\tArray(DateTime64(6))\t\t\t\t\t\n"
+        "c\tArray(Array(Float32))\t\t\t\t\t\n"
+        "d\tArray(Array(Float64))\t\t\t\t\t\n"
+        "e\tArray(Array(Array(Decimal(5, 5))))\t\t\t\t\t\n"
+        "f\tArray(Array(Array(Int32)))\t\t\t\t\t\n"
+        "g\tArray(Array(Array(Array(Array(String)))))\t\t\t\t\t\n"
+        "h\tArray(Array(Array(Nullable(Int32))))\t\t\t\t\t\n"
+        "i\tArray(Array(Array(Array(Nullable(String)))))\t\t\t\t\t\n"
+        "j\tArray(Nullable(String))\t\t\t\t\t\n"
+        "k\tArray(Nullable(UUID))\t\t\t\t\t\n"
+        "l\tArray(Array(Nullable(UUID)))"
+        ""
    )
    assert result.rstrip() == expected

@ -157,7 +162,9 @@ def test_postgres_conversions(started_cluster):
        "[[[[['winx', 'winx', 'winx']]]]], "
        "[[[1, NULL], [NULL, 1]], [[NULL, NULL], [NULL, NULL]], [[4, 4], [5, 5]]], "
        "[[[[NULL]]]], "
-        "[]"
+        "[], "
+        "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', '42209d53-d641-4d73-a8b6-c038db1e75d6', NULL], "
+        "[[NULL, '42209d53-d641-4d73-a8b6-c038db1e75d6'], ['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a', NULL], [NULL, NULL]]"
        ")"
    )

@ -167,15 +174,17 @@ def test_postgres_conversions(started_cluster):
    )
    expected = (
        "['2000-05-12','2000-05-12']\t"
-        + "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t"
-        + "[[1.12345],[1.12345],[1.12345]]\t"
-        + "[[1.1234567891],[1.1234567891],[1.1234567891]]\t"
-        + "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t"
+        "['2000-05-12 12:12:12.012345','2000-05-12 12:12:12.012345']\t"
+        "[[1.12345],[1.12345],[1.12345]]\t"
+        "[[1.1234567891],[1.1234567891],[1.1234567891]]\t"
+        "[[[0.11111,0.11111]],[[0.22222,0.22222]],[[0.33333,0.33333]]]\t"
        "[[[1,1],[1,1]],[[3,3],[3,3]],[[4,4],[5,5]]]\t"
        "[[[[['winx','winx','winx']]]]]\t"
        "[[[1,NULL],[NULL,1]],[[NULL,NULL],[NULL,NULL]],[[4,4],[5,5]]]\t"
        "[[[[NULL]]]]\t"
-        "[]\n"
+        "[]\t"
+        "['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a','42209d53-d641-4d73-a8b6-c038db1e75d6',NULL]\t"
+        "[[NULL,'42209d53-d641-4d73-a8b6-c038db1e75d6'],['2a0c0bfc-4fec-4e32-ae3a-7fc8eea6626a',NULL],[NULL,NULL]]\n"
    )
    assert result == expected

--- a/tests/queries/0_stateless/01256_negative_generate_random.sql
+++ b/tests/queries/0_stateless/01256_negative_generate_random.sql
@ -1,5 +1,4 @@
 SELECT * FROM generateRandom('i8', 1, 10, 10); -- { serverError 62 }
 SELECT * FROM generateRandom; -- { serverError 60 }
-SELECT * FROM generateRandom(); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE }
 SELECT * FROM generateRandom('i8 UInt8', 1, 10, 10, 10, 10); -- { serverError 42 }
 SELECT * FROM generateRandom('', 1, 10, 10); -- { serverError 62 }
--- a/tests/queries/0_stateless/02344_describe_cache.reference
+++ b/tests/queries/0_stateless/02344_describe_cache.reference
@ -1,2 +1,2 @@
-134217728	1048576	104857600	1	0	0	0	/var/lib/clickhouse/caches/s3_cache/	0
+134217728	1048576	8388608	1	0	0	0	/var/lib/clickhouse/caches/s3_cache/	0
 134217728	1048576	104857600	0	0	0	0	/var/lib/clickhouse/caches/s3_cache_2/	0
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -266,7 +266,6 @@ encodeURLComponent
 encodeURLFormComponent
 encodeXMLComponent
 endsWith
-entropyLearnedHash
 equals
 erf
 erfc
@ -559,7 +558,6 @@ positionCaseInsensitive
 positionCaseInsensitiveUTF8
 positionUTF8
 pow
-prepareTrainEntropyLearnedHash
 proportionsZTest
 protocol
 queryID
@ -865,7 +863,6 @@ toYear
 toYearWeek
 today
 tokens
-trainEntropyLearnedHash
 transactionID
 transactionLatestSnapshot
 transactionOldestSnapshot
--- a/tests/queries/0_stateless/02586_generate_random_structure.reference
+++ b/tests/queries/0_stateless/02586_generate_random_structure.reference
@ -0,0 +1,19 @@
+c1 String, c2 UInt256, c3 String, c4 Decimal128(8), c5 UInt128
+String
+Const(String)
+`	90465455320735604871982424534384518837533904778028808627865442405232847164685	5&	-303477100882544888461471906106.82821046	75820566154622566322847299106656624693
+c1	FixedString(125)					
+c2	IPv4					
+c3.e1	Array(Enum16(\'e1V3\' = -24827, \'e1V14\' = -24479, \'e1V8\' = -22478, \'e1V10\' = -13735, \'e1V15\' = -12641, \'e1V11\' = -10191, \'e1V0\' = -8579, \'e1V7\' = -8104, \'e1V6\' = 712, \'e1V12\' = 5683, \'e1V13\' = 13678, \'e1V9\' = 19740, \'e1V5\' = 23066, \'e1V2\' = 23292, \'e1V4\' = 23736, \'e1V1\' = 31672))					
+c3.e2	Array(Map(Int8, Int32))					
+c3.e3	Array(Decimal(76, 64))					
+c3.e4	Array(Int32)					
+c3.e5	Array(Nullable(Int64))					
+c3.e6	Array(Int256)					
+c4	FixedString(183)					
+c5	IPv4					
+c6	UInt256					
+TbÔ#yV·Ü[>ôMá<4D>Á*Ü¨Ęáľ( O‡R8V1n—ŽĎ)Ň˙ň›HË}C¨¦Ęúţ™\'<27>“ŕóŞI7<49>t¦íĚqnëV)äł†ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁ‚Ô\'G¨Ľ	127.48.9.45	['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14']	[{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}]	[153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839]	[755855942,1804001770,-78103159,-866181765,731736602,-79599206]	[5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871]	[17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817]	ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wzZáť:Ń¸jú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE>‹›o±<6F>ň’ĎfČFÉO\0ŃýŐ–6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"Đ†Xp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\b›ę®Ń\rü4ĹH‹	16.177.117.209	7882774382721411359365561736453116698030365959050344381263687375357052837130
+TbÔ#yV·Ü[>ôMá<4D>Á*Ü¨Ęáľ( O‡R8V1n—ŽĎ)Ň˙ň›HË}C¨¦Ęúţ™\'<27>“ŕóŞI7<49>t¦íĚqnëV)äł†ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁ‚Ô\'G¨Ľ	127.48.9.45	['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14']	[{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}]	[153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839]	[755855942,1804001770,-78103159,-866181765,731736602,-79599206]	[5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871]	[17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817]	ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wzZáť:Ń¸jú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE>‹›o±<6F>ň’ĎfČFÉO\0ŃýŐ–6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"Đ†Xp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\b›ę®Ń\rü4ĹH‹	16.177.117.209	7882774382721411359365561736453116698030365959050344381263687375357052837130
+TbÔ#yV·Ü[>ôMá<4D>Á*Ü¨Ęáľ( O‡R8V1n—ŽĎ)Ň˙ň›HË}C¨¦Ęúţ™\'<27>“ŕóŞI7<49>t¦íĚqnëV)äł†ĐšqL˘ďPÍoŮRŘg<Č{™Ľ®3Śiű¨đůH_m˙ú!qŁ‚Ô\'G¨Ľ	127.48.9.45	['e1V10']	[{}]	[825002272867.1157788721157301271303736024856710948164507982705676578804195475]	[1865150610]	[7514464811443271056]	[33504961604882608369857530219353040639899064613284394558131808339620328539033]	ýÚˇ|<7C>A"žx<C5BE>ŔÂ>ń˘®ŤóęŻr—wzZáť:Ń¸jú8tZľĚD"TußŇ2hÚ!ďüŹWěIšśyżütP÷ía|Ž\'†yîĄ<C3AE>‹›o±<6F>ň’ĎfČFÉO\0ŃýŐ–6\fIrĺE Sac¶W<~´e ×l<C397>ŐT>P3})řá¬w%ú4@_2ýN"Đ†Xp$^Ň<>ůŤ<C5AF>Í°\04±@áŕşn\b›ę®Ń\rü4ĹH‹	16.177.117.209	7882774382721411359365561736453116698030365959050344381263687375357052837130
+c1 LowCardinality(Nullable(UInt64)), c2 Date32, c3 LowCardinality(Nullable(Float64)), c4 Int256, c5 Date32
--- a/tests/queries/0_stateless/02586_generate_random_structure.sql
+++ b/tests/queries/0_stateless/02586_generate_random_structure.sql
@ -0,0 +1,20 @@
+select generateRandomStructure(5, 42);
+select toTypeName(generateRandomStructure(5, 42));
+select toColumnTypeName(generateRandomStructure(5, 42));
+SELECT * FROM generateRandom(generateRandomStructure(5, 42), 42) LIMIT 1;
+
+select generateRandomStructure(5, 42, 42); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+select generateRandomStructure('5'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
+select generateRandomStructure(5, '42'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT}
+select generateRandomStructure(materialize(5), 42); -- {serverError ILLEGAL_COLUMN}
+select generateRandomStructure(5, materialize(42)); -- {serverError ILLEGAL_COLUMN}
+
+desc generateRandom(10000000);
+select * from generateRandom(10000000) limit 1;
+select * from generateRandom(10000000, 2) limit 1;
+select * from generateRandom(10000000, 2, 2) limit 1;
+select * from generateRandom(10000000, 2, 2, 2) limit 1; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+
+set allow_suspicious_low_cardinality_types=1;
+select generateRandomStructure(5, 4);
+
--- a/tests/queries/0_stateless/02734_entropy_learned_hashing.reference
+++ b/tests/queries/0_stateless/02734_entropy_learned_hashing.reference
@ -1,18 +0,0 @@
-0
-0
-0
-0
-0
-0
-2603192927274642682
-4947675599669400333
-10783339242466472992
-0
-0
-0
-0
-0
-0
-2603192927274642682
-4947675599669400333
-10783339242466472992
--- a/tests/queries/0_stateless/02734_entropy_learned_hashing.sql
+++ b/tests/queries/0_stateless/02734_entropy_learned_hashing.sql
@ -1,30 +0,0 @@
-- Tags: no-parallel
-- no-parallel because entropy-learned hash uses global state
-
-SET allow_experimental_hash_functions = 1;
-
-- no commonalities between keys
-DROP TABLE IF EXISTS tbl1;
-CREATE TABLE tbl1 (x String) ENGINE=Memory;
-INSERT INTO tbl1 VALUES ('a'), ('b'), ('c');
-SELECT prepareTrainEntropyLearnedHash(x, 'id1') FROM tbl1;
-SELECT trainEntropyLearnedHash('id1') FROM tbl1;
-SELECT entropyLearnedHash(x, 'id1') FROM tbl1;
-
-- with commonalities between keys
-DROP TABLE IF EXISTS tbl2;
-CREATE TABLE tbl2 (x String) ENGINE=Memory;
-INSERT INTO tbl2 VALUES ('aa'), ('ba'), ('ca');
-SELECT prepareTrainEntropyLearnedHash(x, 'id2') FROM tbl2;
-SELECT trainEntropyLearnedHash('id2') FROM tbl2;
-SELECT entropyLearnedHash(x, 'id2') FROM tbl2;
-
-- negative tests
-SELECT prepareTrainEntropyLearnedHash(x, 1) FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT prepareTrainEntropyLearnedHash(1, 'id1') FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT trainEntropyLearnedHash(1) FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT entropyLearnedHash(1, 'id1') FROM tbl1; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-SELECT entropyLearnedHash(x, 'non-existing id') FROM tbl1; -- { serverError BAD_ARGUMENTS }
-
-DROP TABLE tbl1;
-DROP TABLE tbl2;
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -1453,6 +1453,7 @@ gRPC
 gccMurmurHash
 gcem
 generateRandom
+generateRandomStructure
 generateULID
 generateUUIDv
 geoDistance