Merge remote-tracking branch 'origin/master' into cluster_discovery

2024-11-10 09:32:06 +00:00 · 2023-05-24 17:37:01 +08:00 · 2023-05-24 17:37:01 +08:00 · 0df4164180
commit 0df4164180
parent 6e80537ab6 32ffa2ae0b
221 changed files with 3458 additions and 1280 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -267,7 +267,7 @@
 	url = https://github.com/ClickHouse/nats.c
 [submodule "contrib/vectorscan"]
 	path = contrib/vectorscan
-	url = https://github.com/ClickHouse/vectorscan.git
+	url = https://github.com/VectorCamp/vectorscan.git
 [submodule "contrib/c-ares"]
 	path = contrib/c-ares
 	url = https://github.com/ClickHouse/c-ares
--- a/base/base/StringRef.h
+++ b/base/base/StringRef.h
@ -3,6 +3,7 @@
 #include <cassert>
 #include <stdexcept> // for std::logic_error
 #include <string>
+#include <type_traits>
 #include <vector>
 #include <functional>
 #include <iosfwd>
@ -326,5 +327,16 @@ namespace ZeroTraits
    inline void set(StringRef & x) { x.size = 0; }
 }

+namespace PackedZeroTraits
+{
+    template <typename Second, template <typename, typename> class PackedPairNoInit>
+    inline bool check(const PackedPairNoInit<StringRef, Second> p)
+    { return 0 == p.key.size; }
+
+    template <typename Second, template <typename, typename> class PackedPairNoInit>
+    inline void set(PackedPairNoInit<StringRef, Second> & p)
+    { p.key.size = 0; }
+}
+

 std::ostream & operator<<(std::ostream & os, const StringRef & str);
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -8,6 +8,9 @@ option (SANITIZE "Enable one of the code sanitizers" "")

 set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")

+# It's possible to pass an ignore list to sanitizers (-fsanitize-ignorelist). Intentionally not doing this because
+# 1. out-of-source suppressions are awkward 2. it seems ignore lists don't work after the Clang v16 upgrade (#49829)
+
 if (SANITIZE)
    if (SANITIZE STREQUAL "address")
        set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
@ -29,7 +32,7 @@ if (SANITIZE)

        # Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
        # Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
-        set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${PROJECT_SOURCE_DIR}/tests/msan_suppressions.txt")
+        set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie")
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")

--- a/contrib/vectorscan
+++ b/contrib/vectorscan
@ -1 +1 @@
-Subproject commit 1f4d448314e581473103187765e4c949d01b4259
+Subproject commit 38431d111781843741a781a57a6381a527d900a4
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
 ### Install Prerequisites {#install-prerequisites}

 ``` bash
-sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
+sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
 ```

 ### Install and Use the Clang compiler
@ -46,6 +46,11 @@ As of April 2023, any version of Clang >= 15 will work.
 GCC as a compiler is not supported
 To build with a specific Clang version:

+:::tip
+This is optional, if you are following along and just now installed Clang then check
+to see what version you have installed before setting this environment variable.
+:::
+
 ``` bash
 export CC=clang-16
 export CXX=clang++-16
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@ -181,7 +181,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
 - `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option.
 - `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default).
 - `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter).
- `--multiquery, -n` – If specified, allow processing multiple queries separated by semicolons.
+- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
 - `--format, -f` – Use the specified default format to output the result.
 - `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables.
 - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -452,6 +452,8 @@ Possible values:

 The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.

+ Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
+
 - hash

 [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section.
@ -1714,7 +1716,7 @@ Default value: `100000`.

 ### async_insert_max_query_number {#async-insert-max-query-number}

-The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#settings-async-insert-deduplicate) is enabled.
+The maximum number of insert queries per block before being inserted. This setting takes effect only if [async_insert_deduplicate](#async-insert-deduplicate) is enabled.

 Possible values:

@ -1745,7 +1747,7 @@ Possible values:

 Default value: `0`.

-### async_insert_deduplicate {#settings-async-insert-deduplicate}
+### async_insert_deduplicate {#async-insert-deduplicate}

 Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables).

--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@ -188,6 +188,7 @@ Arguments:
 - `-N`, `--table` — table name where to put output data, `table` by default.
 - `--format`, `--output-format` — output format, `TSV` by default.
 - `-d`, `--database` — default database, `_local` by default.
+- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
 - `--stacktrace` — whether to dump debug output in case of exception.
 - `--echo` — print query before execution.
 - `--verbose` — more details on query execution.
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@ -267,14 +267,16 @@ or
 LAYOUT(HASHED())
 ```

-If `shards` greater then 1 (default is `1`) the dictionary will load data in parallel, useful if you have huge amount of elements in one dictionary.
-
 Configuration example:

 ``` xml
 <layout>
  <hashed>
+    <!-- If shards greater then 1 (default is `1`) the dictionary will load
+         data in parallel, useful if you have huge amount of elements in one
+         dictionary. -->
    <shards>10</shards>
+
    <!-- Size of the backlog for blocks in parallel queue.

         Since the bottleneck in parallel loading is rehash, and so to avoid
@ -284,6 +286,14 @@ Configuration example:
         10000 is good balance between memory and speed.
         Even for 10e10 elements and can handle all the load without starvation. -->
    <shard_load_queue_backlog>10000</shard_load_queue_backlog>
+
+    <!-- Maximum load factor of the hash table, with greater values, the memory
+         is utilized more efficiently (less memory is wasted) but read/performance
+         may deteriorate.
+
+         Valid values: [0.5, 0.99]
+         Default: 0.5 -->
+    <max_load_factor>0.5</max_load_factor>
  </hashed>
 </layout>
 ```
@ -291,7 +301,7 @@ Configuration example:
 or

 ``` sql
-LAYOUT(HASHED(SHARDS 10 [SHARD_LOAD_QUEUE_BACKLOG 10000]))
+LAYOUT(HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5]))
 ```

 ### sparse_hashed
@ -304,14 +314,18 @@ Configuration example:

 ``` xml
 <layout>
-  <sparse_hashed />
+  <sparse_hashed>
+    <!-- <shards>1</shards> -->
+    <!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
+    <!-- <max_load_factor>0.5</max_load_factor> -->
+  </sparse_hashed>
 </layout>
 ```

 or

 ``` sql
-LAYOUT(SPARSE_HASHED())
+LAYOUT(SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5]))
 ```

 It is also possible to use `shards` for this type of dictionary, and again it is more important for `sparse_hashed` then for `hashed`, since `sparse_hashed` is slower.
@ -325,8 +339,9 @@ Configuration example:
 ``` xml
 <layout>
  <complex_key_hashed>
-    <shards>1</shards>
+    <!-- <shards>1</shards> -->
    <!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
+    <!-- <max_load_factor>0.5</max_load_factor> -->
  </complex_key_hashed>
 </layout>
 ```
@ -334,7 +349,7 @@ Configuration example:
 or

 ``` sql
-LAYOUT(COMPLEX_KEY_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000]))
+LAYOUT(COMPLEX_KEY_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5]))
 ```

 ### complex_key_sparse_hashed
@ -346,7 +361,9 @@ Configuration example:
 ``` xml
 <layout>
  <complex_key_sparse_hashed>
-    <shards>1</shards>
+    <!-- <shards>1</shards> -->
+    <!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
+    <!-- <max_load_factor>0.5</max_load_factor> -->
  </complex_key_sparse_hashed>
 </layout>
 ```
@ -354,7 +371,7 @@ Configuration example:
 or

 ``` sql
-LAYOUT(COMPLEX_KEY_SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000]))
+LAYOUT(COMPLEX_KEY_SPARSE_HASHED([SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000] [MAX_LOAD_FACTOR 0.5]))
 ```

 ### hashed_array
@ -2197,16 +2214,16 @@ Result:
 └─────────────────────────────────┴───────┘
 ```

-## RegExp Tree Dictionary {#regexp-tree-dictionary}
+## Regular Expression Tree Dictionary {#regexp-tree-dictionary}

-Regexp Tree dictionary stores multiple trees of regular expressions with attributions. Users can retrieve strings in the dictionary. If a string matches the root of the regexp tree, we will collect the corresponding attributes of the matched root and continue to walk the children. If any of the children matches the string, we will collect attributes and rewrite the old ones if conflicts occur, then continue the traverse until we reach leaf nodes.
+Regular expression tree dictionaries are a special type of dictionary which represent the mapping from key to attributes using a tree of regular expressions. There are some use cases, e.g. parsing of (user agent)[https://en.wikipedia.org/wiki/User_agent] strings, which can be expressed elegantly with regexp tree dictionaries.

-Example of the ddl query for creating Regexp Tree dictionary:
+### Use Regular Expression Tree Dictionary in ClickHouse Open-Source

-<CloudDetails />
+Regular expression tree dictionaries are defined in ClickHouse open-source using the YAMLRegExpTree source which is provided the path to a YAML file containing the regular expression tree.

 ```sql
-create dictionary regexp_dict
+CREATE DICTIONARY regexp_dict
 (
    regexp String,
    name String,
@ -2218,17 +2235,15 @@ LAYOUT(regexp_tree)
 ...
 ```

-**Source**
+The dictionary source `YAMLRegExpTree` represents the structure of a regexp tree. For example:

-We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:
-
-```xml
+```yaml
 - regexp: 'Linux/(\d+[\.\d]*).+tlinux'
  name: 'TencentOS'
  version: '\1'

 - regexp: '\d+/tclwebkit(?:\d+[\.\d]*)'
-  name: 'Andriod'
+  name: 'Android'
  versions:
    - regexp: '33/tclwebkit'
      version: '13'
@ -2240,17 +2255,14 @@ We introduce a type of source called `YAMLRegExpTree` representing the structure
      version: '10'
 ```

-The key `regexp` represents the regular expression of a tree node. The name of key is same as the dictionary key. The `name` and `version` is user-defined attributions in the dicitionary. The `versions` (which can be any name that not appear in attributions or the key) indicates the children nodes of this tree.
+This config consists of a list of regular expression tree nodes. Each node has the following structure:

-**Back Reference**
+- **regexp**: the regular expression of the node.
+- **attributes**: a list of user-defined dictionary attributes. In this example, there are two attributes: `name` and `version`. The first node defines both attributes. The second node only defines attribute `name`. Attribute `version` is provided by the child nodes of the second node.
+  - The value of an attribute may contain **back references**, referring to capture groups of the matched regular expression. In the example, the value of attribute `version` in the first node consists of a back-reference `\1` to capture group `(\d+[\.\d]*)` in the regular expression. Back-reference numbers range from 1 to 9 and are written as `$1` or `\1` (for number 1). The back reference is replaced by the matched capture group during query execution.
+- **child nodes**: a list of children of a regexp tree node, each of which has its own attributes and (potentially) children nodes. String matching proceeds in a depth-first fashion. If a string matches a regexp node, the dictionary checks if it also matches the nodes' child nodes. If that is the case, the attributes of the deepest matching node are assigned. Attributes of a child node overwrite equally named attributes of parent nodes. The name of child nodes in YAML files can be arbitrary, e.g. `versions` in above example.

-The value of an attribution could contain a back reference which refers to a capture group of the matched regular expression. Reference number ranges from 1 to 9 and writes as `$1` or `\1`.
-
-During the query execution, the back reference in the value will be replaced by the matched capture group.
-
-**Query**
-
-Due to the specialty of Regexp Tree dictionary, we only allow functions `dictGet`, `dictGetOrDefault` and `dictGetOrNull` work with it.
+Regexp tree dictionaries only allow access using functions `dictGet`, `dictGetOrDefault` and `dictGetOrNull`.

 Example:

@ -2260,12 +2272,83 @@ SELECT dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024');

 Result:

-```
+```text
 ┌─dictGet('regexp_dict', ('name', 'version'), '31/tclwebkit1024')─┐
-│ ('Andriod','12')                                                │
+│ ('Android','12')                                                │
 └─────────────────────────────────────────────────────────────────┘
 ```

+In this case, we first match the regular expression `\d+/tclwebkit(?:\d+[\.\d]*)` in the top layer's second node. The dictionary then continues to look into the child nodes and finds that the string also matches `3[12]/tclwebkit`. As a result, the value of attribute `name` is `Android` (defined in the first layer) and the value of attribute `version` is `12` (defined the child node).
+
+With a powerful YAML configure file, we can use a regexp tree dictionaries as a user agent string parser. We support [uap-core](https://github.com/ua-parser/uap-core) and demonstrate how to use it in the functional test [02504_regexp_dictionary_ua_parser](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/02504_regexp_dictionary_ua_parser.sh)
+
+### Use Regular Expression Tree Dictionary in ClickHouse Cloud
+
+Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause.
+
+```sql
+SELECT * FROM dictionary(regexp_dict) INTO OUTFILE('regexp_dict.csv')
+```
+
+The content of csv file is:
+
+```text
+1,0,"Linux/(\d+[\.\d]*).+tlinux","['version','name']","['\\1','TencentOS']"
+2,0,"(\d+)/tclwebkit(\d+[\.\d]*)","['comment','version','name']","['test $1 and $2','$1','Android']"
+3,2,"33/tclwebkit","['version']","['13']"
+4,2,"3[12]/tclwebkit","['version']","['12']"
+5,2,"3[12]/tclwebkit","['version']","['11']"
+6,2,"3[12]/tclwebkit","['version']","['10']"
+```
+
+The schema of dumped file is:
+
+- `id UInt64`: the id of the RegexpTree node.
+- `parent_id UInt64`: the id of the parent of a node.
+- `regexp String`: the regular expression string.
+- `keys Array(String)`: the names of user-defined attributes.
+- `values Array(String)`: the values of user-defined attributes.
+
+To create the dictionary in ClickHouse Cloud, first create a table `regexp_dictionary_source_table` with below table structure:
+
+```sql
+CREATE TABLE regexp_dictionary_source_table
+(
+    id UInt64,
+    parent_id UInt64,
+    regexp String,
+    keys   Array(String),
+    values Array(String)
+) ENGINE=Memory;
+```
+
+Then update the local CSV by
+
+```bash
+clickhouse client \
+    --host MY_HOST \
+    --secure \
+    --password MY_PASSWORD \
+    --query "
+    INSERT INTO regexp_dictionary_source_table 
+    SELECT * FROM input ('id UInt64, parent_id UInt64, regexp String, keys Array(String), values Array(String)') 
+    FORMAT CSV" < regexp_dict.csv
+```
+
+You can see how to [Insert Local Files](https://clickhouse.com/docs/en/integrations/data-ingestion/insert-local-files) for more details. After we initialize the source table, we can create a RegexpTree by table source:
+
+``` sql
+CREATE DICTIONARY regexp_dict
+(
+    regexp String,
+    name String,
+    version String
+PRIMARY KEY(regexp)
+SOURCE(CLICKHOUSE(TABLE 'regexp_dictionary_source_table'))
+LIFETIME(0)
+LAYOUT(regexp_tree);
+```
+
 ## Embedded Dictionaries {#embedded-dictionaries}

 <SelfManaged />
--- a/docs/en/sql-reference/functions/comparison-functions.md
+++ b/docs/en/sql-reference/functions/comparison-functions.md
@ -20,7 +20,7 @@ Strings are compared byte-by-byte. Note that this may lead to unexpected results

 A string S1 which has another string S2 as prefix is considered longer than S2.

-## equals
+## equals, `=`, `==` operators

 **Syntax**

@ -32,7 +32,7 @@ Alias:
 - `a = b` (operator)
 - `a == b` (operator)

-## notEquals
+## notEquals, `!=`, `<>` operators

 **Syntax**

@ -44,7 +44,7 @@ Alias:
 - `a != b` (operator)
 - `a <> b` (operator)

-## less
+## less, `<` operator

 **Syntax**

@ -55,7 +55,7 @@ less(a, b)
 Alias:
 - `a < b` (operator)

-## greater
+## greater, `>` operator

 **Syntax**

@ -66,7 +66,7 @@ greater(a, b)
 Alias:
 - `a > b` (operator)

-## lessOrEquals
+## lessOrEquals, `<=` operator

 **Syntax**

--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -357,14 +357,14 @@ Alias: `SECOND`.

 ## toUnixTimestamp

-For DateTime arguments: converts the value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
+Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation.

-For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp.
+If the function is called with a string, it accepts an optional timezone argument.

 **Syntax**

 ``` sql
-toUnixTimestamp(datetime)
+toUnixTimestamp(date)
 toUnixTimestamp(str, [timezone])
 ```

@ -377,15 +377,29 @@ Type: `UInt32`.
 **Example**

 ``` sql
-SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp
+SELECT
+    '2017-11-05 08:07:47' AS dt_str,
+    toUnixTimestamp(dt_str) AS from_str,
+    toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo,
+    toUnixTimestamp(toDateTime(dt_str)) AS from_datetime,
+    toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64,
+    toUnixTimestamp(toDate(dt_str)) AS from_date,
+    toUnixTimestamp(toDate32(dt_str)) AS from_date32
+FORMAT Vertical;
 ```

 Result:

 ``` text
-┌─unix_timestamp─┐
-│     1509836867 │
-└────────────────┘
+Row 1:
+──────
+dt_str:          2017-11-05 08:07:47
+from_str:        1509869267
+from_str_tokyo:  1509836867
+from_datetime:   1509869267
+from_datetime64: 1509869267
+from_date:       1509840000
+from_date32:     1509840000
 ```

 :::note
--- a/docs/en/sql-reference/functions/nlp-functions.md
+++ b/docs/en/sql-reference/functions/nlp-functions.md
@ -12,18 +12,18 @@ This is an experimental feature that is currently in development and is not read

 Performs stemming on a given word.

-**Syntax**
+### Syntax

 ``` sql
 stem('language', word)
 ```

-**Arguments**
+### Arguments

- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).
+- `language` — Language which rules will be applied. Use the two letter [ISO 639-1 code](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes).
 - `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string).

-**Examples**
+### Examples

 Query:

@ -38,23 +38,58 @@ Result:
 │ ['I','think','it','is','a','bless','in','disguis'] │
 └────────────────────────────────────────────────────┘
 ```
+### Supported languages for stem()
+
+:::note
+The stem() function uses the [Snowball stemming](https://snowballstem.org/) library, see the Snowball website for updated languages etc.
+:::
+
+- Arabic
+- Armenian
+- Basque
+- Catalan
+- Danish
+- Dutch
+- English
+- Finnish
+- French
+- German
+- Greek
+- Hindi
+- Hungarian
+- Indonesian
+- Irish
+- Italian
+- Lithuanian
+- Nepali
+- Norwegian
+- Porter
+- Portuguese
+- Romanian
+- Russian
+- Serbian
+- Spanish
+- Swedish
+- Tamil
+- Turkish
+- Yiddish

 ## lemmatize

 Performs lemmatization on a given word. Needs dictionaries to operate, which can be obtained [here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).

-**Syntax**
+### Syntax

 ``` sql
 lemmatize('language', word)
 ```

-**Arguments**
+### Arguments

 - `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string).
 - `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string).

-**Examples**
+### Examples

 Query:

@ -70,12 +105,18 @@ Result:
 └─────────────────────┘
 ```

-Configuration:
+### Configuration
+
+This configuration specifies that the dictionary `en.bin` should be used for lemmatization of English (`en`) words.  The `.bin` files can be downloaded from 
+[here](https://github.com/vpodpecan/lemmagen3/tree/master/src/lemmagen3/models).
+
 ``` xml
 <lemmatizers>
    <lemmatizer>
+        <!-- highlight-start -->
        <lang>en</lang>
        <path>en.bin</path>
+        <!-- highlight-end -->
    </lemmatizer>
 </lemmatizers>
 ```
@ -88,18 +129,18 @@ With the `plain` extension type we need to provide a path to a simple text file,

 With the `wordnet` extension type we need to provide a path to a directory with WordNet thesaurus in it. Thesaurus must contain a WordNet sense index.

-**Syntax**
+### Syntax

 ``` sql
 synonyms('extension_name', word)
 ```

-**Arguments**
+### Arguments

 - `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string).
 - `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string).

-**Examples**
+### Examples

 Query:

@ -115,7 +156,7 @@ Result:
 └──────────────────────────────────────────┘
 ```

-Configuration:
+### Configuration
 ``` xml
 <synonyms_extensions>
    <extension>
@ -137,17 +178,17 @@ Detects the language of the UTF8-encoded input string. The function uses the [CL

 The `detectLanguage` function works best when providing over 200 characters in the input string.

-**Syntax**
+### Syntax

 ``` sql
 detectLanguage('text_to_be_analyzed')
 ```

-**Arguments**
+### Arguments

 - `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).

-**Returned value**
+### Returned value

 - The 2-letter ISO code of the detected language

@ -156,7 +197,7 @@ Other possible results:
 - `un` = unknown, can not detect any language.
 - `other` = the detected language does not have 2 letter code.

-**Examples**
+### Examples

 Query:

@ -175,22 +216,22 @@ fr
 Similar to the `detectLanguage` function, but `detectLanguageMixed` returns a `Map` of 2-letter language codes that are mapped to the percentage of the certain language in the text.


-**Syntax**
+### Syntax

 ``` sql
 detectLanguageMixed('text_to_be_analyzed')
 ```

-**Arguments**
+### Arguments

 - `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).

-**Returned value**
+### Returned value

 - `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language


-**Examples**
+### Examples

 Query:

@ -211,17 +252,17 @@ Result:
 Similar to the `detectLanguage` function, except the `detectLanguageUnknown` function works with non-UTF8-encoded strings. Prefer this version when your character set is UTF-16 or UTF-32.


-**Syntax**
+### Syntax

 ``` sql
 detectLanguageUnknown('text_to_be_analyzed')
 ```

-**Arguments**
+### Arguments

 - `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).

-**Returned value**
+### Returned value

 - The 2-letter ISO code of the detected language

@ -230,7 +271,7 @@ Other possible results:
 - `un` = unknown, can not detect any language.
 - `other` = the detected language does not have 2 letter code.

-**Examples**
+### Examples

 Query:

@ -251,21 +292,21 @@ Result:
 The `detectCharset` function detects the character set of the non-UTF8-encoded input string.


-**Syntax**
+### Syntax

 ``` sql
 detectCharset('text_to_be_analyzed')
 ```

-**Arguments**
+### Arguments

 - `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string).

-**Returned value**
+### Returned value

 - A `String` containing the code of the detected character set

-**Examples**
+### Examples

 Query:

--- a/docs/en/sql-reference/table-functions/url.md
+++ b/docs/en/sql-reference/table-functions/url.md
@ -46,3 +46,12 @@ SELECT * FROM test_table;

 Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function.
 Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting.
+
+## Virtual Columns
+
+- `_path` — Path to the `URL`.
+- `_file` — Resource name of the `URL`.
+
+**See Also**
+
+- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns)
--- a/docs/ru/interfaces/cli.md
+++ b/docs/ru/interfaces/cli.md
@ -132,7 +132,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
 -   `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
 -   `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД ‘default’).
 -   `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.
-   `--multiquery, -n` — если указано — разрешить выполнять несколько запросов, разделённых точкой с запятой.
+-   `--multiquery, -n` — Если указано, то после опции `--query` могут быть перечислены несколько запросов, разделенных точкой с запятой. Для удобства можно также опустить `--query` и передавать запросы непосредственно после `--multiquery`.
 -   `--format, -f` — использовать указанный формат по умолчанию для вывода результата.
 -   `--vertical, -E` — если указано, использовать по умолчанию формат [Vertical](../interfaces/formats.md#vertical) для вывода результата. То же самое, что `–format=Vertical`. В этом формате каждое значение выводится на отдельной строке, что удобно для отображения широких таблиц.
 -   `--time, -t` — если указано, в неинтерактивном режиме вывести время выполнения запроса в поток ‘stderr’.
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -235,13 +235,13 @@ SELECT toDateTime('2021-04-21 10:20:30', 'Europe/Moscow') AS Time, toTypeName(Ti

 ## toUnixTimestamp {#to-unix-timestamp}

-Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
-Для аргумента String, строка конвертируется в дату и время в соответствии с часовым поясом (необязательный второй аргумент, часовой пояс сервера используется по умолчанию).
+Переводит строку, дату или дату-с-временем в [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time), имеющий тип `UInt32`.
+Строка может сопровождаться вторым (необязательным) аргументом, указывающим часовой пояс.

 **Синтаксис**

 ``` sql
-toUnixTimestamp(datetime)
+toUnixTimestamp(date)
 toUnixTimestamp(str, [timezone])
 ```

@ -256,15 +256,29 @@ toUnixTimestamp(str, [timezone])
 Запрос:

 ``` sql
-SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
+SELECT
+    '2017-11-05 08:07:47' AS dt_str,
+    toUnixTimestamp(dt_str) AS from_str,
+    toUnixTimestamp(dt_str, 'Asia/Tokyo') AS from_str_tokyo,
+    toUnixTimestamp(toDateTime(dt_str)) AS from_datetime,
+    toUnixTimestamp(toDateTime64(dt_str, 0)) AS from_datetime64,
+    toUnixTimestamp(toDate(dt_str)) AS from_date,
+    toUnixTimestamp(toDate32(dt_str)) AS from_date32
+FORMAT Vertical;
 ```

 Результат:

 ``` text
-┌─unix_timestamp─┐
-│     1509836867 │
-└────────────────┘
+Row 1:
+──────
+dt_str:          2017-11-05 08:07:47
+from_str:        1509869267
+from_str_tokyo:  1509836867
+from_datetime:   1509869267
+from_datetime64: 1509869267
+from_date:       1509840000
+from_date32:     1509840000
 ```

 :::note
--- a/docs/ru/sql-reference/table-functions/url.md
+++ b/docs/ru/sql-reference/table-functions/url.md
@ -46,3 +46,12 @@ SELECT * FROM test_table;

 Шаблоны в фигурных скобках `{ }` используются, чтобы сгенерировать список шардов или указать альтернативные адреса на случай отказа. Поддерживаемые типы шаблонов и примеры смотрите в описании функции [remote](remote.md#globs-in-addresses).
 Символ `|` внутри шаблонов используется, чтобы задать адреса, если предыдущие оказались недоступны. Эти адреса перебираются в том же порядке, в котором они указаны в шаблоне. Количество адресов, которые могут быть сгенерированы, ограничено настройкой [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements).
+
+## Виртуальные столбцы
+
+-   `_path` — Путь до `URL`.
+-   `_file` — Имя ресурса `URL`.
+
+**Смотрите также**
+
+-   [Виртуальные столбцы](index.md#table_engines-virtual_columns)
--- a/docs/zh/development/build-osx.md
+++ b/docs/zh/development/build-osx.md
@ -46,7 +46,7 @@ $ cd ..

 为此，请创建以下文件：

-/资源库/LaunchDaemons/limit.maxfiles.plist:
+/Library/LaunchDaemons/limit.maxfiles.plist:

 ``` xml
 <?xml version="1.0" encoding="UTF-8"?>
--- a/docs/zh/sql-reference/table-functions/url.md
+++ b/docs/zh/sql-reference/table-functions/url.md
@ -41,3 +41,11 @@ CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
 INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
 SELECT * FROM test_table;
 ```
+## 虚拟列 {#virtual-columns}
+
+-   `_path` — `URL`路径。
+-   `_file` — 资源名称。
+
+**另请参阅**
+
+-   [虚拟列](https://clickhouse.com/docs/en/operations/table_engines/#table_engines-virtual_columns)
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -1181,7 +1181,7 @@ void Client::processOptions(const OptionsDescription & options_description,
 void Client::processConfig()
 {
    /// Batch mode is enabled if one of the following is true:
-    /// - -e (--query) command line option is present.
+    /// - -q (--query) command line option is present.
    ///   The value of the option is used as the text of query (or of multiple queries).
    ///   If stdin is not a terminal, INSERT data for the first query is read from it.
    /// - stdin is not a terminal. In this case queries are read from it.
@ -1381,6 +1381,13 @@ void Client::readArguments(
                allow_repeated_settings = true;
            else if (arg == "--allow_merge_tree_settings")
                allow_merge_tree_settings = true;
+            else if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
+            {
+                /// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
+                ++arg_num;
+                arg = argv[arg_num];
+                addMultiquery(arg, common_arguments);
+            }
            else
                common_arguments.emplace_back(arg);
        }
--- a/programs/diagnostics/go.mod
+++ b/programs/diagnostics/go.mod
@ -33,6 +33,7 @@ require (
 	github.com/cenkalti/backoff/v4 v4.2.0 // indirect
 	github.com/containerd/containerd v1.6.17 // indirect
 	github.com/davecgh/go-spew v1.1.1 // indirect
+	github.com/distribution/distribution v2.8.2+incompatible // indirect
 	github.com/docker/distribution v2.8.1+incompatible // indirect
 	github.com/docker/docker v23.0.0+incompatible // indirect
 	github.com/docker/go-units v0.5.0 // indirect
--- a/programs/diagnostics/go.sum
+++ b/programs/diagnostics/go.sum
@ -126,6 +126,8 @@ github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxG
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/distribution/distribution v2.8.2+incompatible h1:k9+4DKdOG+quPFZXT/mUsiQrGu9vYCp+dXpuPkuqhk8=
+github.com/distribution/distribution v2.8.2+incompatible/go.mod h1:EgLm2NgWtdKgzF9NpMzUKgzmR7AMmb0VQi2B+ZzDRjc=
 github.com/docker/distribution v2.8.1+incompatible h1:Q50tZOPR6T/hjNsyc9g8/syEs6bk8XXApsHjKukMl68=
 github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
 github.com/docker/docker v23.0.0+incompatible h1:L6c28tNyqZ4/ub9AZC9d5QUuunoHHfEH4/Ue+h/E5nE=
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -69,6 +69,7 @@ if (BUILD_STANDALONE_KEEPER)
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -11,6 +11,9 @@
 #include <Core/ServerUUID.h>
 #include <Common/logger_useful.h>
 #include <Common/ErrorHandlers.h>
+#include <Common/assertProcessUserMatchesDataOwner.h>
+#include <Common/makeSocketAddress.h>
+#include <Server/waitServersToFinish.h>
 #include <base/scope_guard.h>
 #include <base/safeExit.h>
 #include <Poco/Net/NetException.h>
@ -75,92 +78,9 @@ namespace ErrorCodes
    extern const int NO_ELEMENTS_IN_CONFIG;
    extern const int SUPPORT_IS_DISABLED;
    extern const int NETWORK_ERROR;
-    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
-    extern const int FAILED_TO_GETPWUID;
    extern const int LOGICAL_ERROR;
 }

-namespace
-{
-
-size_t waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
-{
-    const size_t sleep_max_ms = 1000 * seconds_to_wait;
-    const size_t sleep_one_ms = 100;
-    size_t sleep_current_ms = 0;
-    size_t current_connections = 0;
-    for (;;)
-    {
-        current_connections = 0;
-
-        for (auto & server : servers)
-        {
-            server.stop();
-            current_connections += server.currentConnections();
-        }
-
-        if (!current_connections)
-            break;
-
-        sleep_current_ms += sleep_one_ms;
-        if (sleep_current_ms < sleep_max_ms)
-            std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
-        else
-            break;
-    }
-    return current_connections;
-}
-
-Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log)
-{
-    Poco::Net::SocketAddress socket_address;
-    try
-    {
-        socket_address = Poco::Net::SocketAddress(host, port);
-    }
-    catch (const Poco::Net::DNSException & e)
-    {
-        const auto code = e.code();
-        if (code == EAI_FAMILY
-#if defined(EAI_ADDRFAMILY)
-                    || code == EAI_ADDRFAMILY
-#endif
-           )
-        {
-            LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
-                "If it is an IPv6 address and your host has disabled IPv6, then consider to "
-                "specify IPv4 address to listen in <listen_host> element of configuration "
-                "file. Example: <listen_host>0.0.0.0</listen_host>",
-                host, e.code(), e.message());
-        }
-
-        throw;
-    }
-    return socket_address;
-}
-
-std::string getUserName(uid_t user_id)
-{
-    /// Try to convert user id into user name.
-    auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
-    if (buffer_size <= 0)
-        buffer_size = 1024;
-    std::string buffer;
-    buffer.reserve(buffer_size);
-
-    struct passwd passwd_entry;
-    struct passwd * result = nullptr;
-    const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
-
-    if (error)
-        throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
-    else if (result)
-        return result->pw_name;
-    return toString(user_id);
-}
-
-}
-
 Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const
 {
    auto address = makeSocketAddress(host, port, &logger());
@ -364,24 +284,7 @@ try
    std::filesystem::create_directories(path);

    /// Check that the process user id matches the owner of the data.
-    const auto effective_user_id = geteuid();
-    struct stat statbuf;
-    if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
-    {
-        const auto effective_user = getUserName(effective_user_id);
-        const auto data_owner = getUserName(statbuf.st_uid);
-        std::string message = "Effective user of the process (" + effective_user +
-            ") does not match the owner of the data (" + data_owner + ").";
-        if (effective_user_id == 0)
-        {
-            message += " Run under 'sudo -u " + data_owner + "'.";
-            throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
-        }
-        else
-        {
-            LOG_WARNING(log, fmt::runtime(message));
-        }
-    }
+    assertProcessUserMatchesDataOwner(path, [&](const std::string & message){ LOG_WARNING(log, fmt::runtime(message)); });

    DB::ServerUUID::load(path + "/uuid", log);

--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -818,8 +818,16 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
 {
    for (int arg_num = 1; arg_num < argc; ++arg_num)
    {
-        const char * arg = argv[arg_num];
-        common_arguments.emplace_back(arg);
+        std::string_view arg = argv[arg_num];
+        if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
+        {
+            /// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
+            ++arg_num;
+            arg = argv[arg_num];
+            addMultiquery(arg, common_arguments);
+        }
+        else
+            common_arguments.emplace_back(arg);
    }
 }

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -39,6 +39,9 @@
 #include <Common/remapExecutable.h>
 #include <Common/TLDListsHolder.h>
 #include <Common/Config/AbstractConfigurationComparison.h>
+#include <Common/assertProcessUserMatchesDataOwner.h>
+#include <Common/makeSocketAddress.h>
+#include <Server/waitServersToFinish.h>
 #include <Core/ServerUUID.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadBufferFromFile.h>
@ -200,40 +203,6 @@ int mainEntryClickHouseServer(int argc, char ** argv)
    }
 }

-
-namespace
-{
-
-size_t waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t seconds_to_wait)
-{
-    const size_t sleep_max_ms = 1000 * seconds_to_wait;
-    const size_t sleep_one_ms = 100;
-    size_t sleep_current_ms = 0;
-    size_t current_connections = 0;
-    for (;;)
-    {
-        current_connections = 0;
-
-        for (auto & server : servers)
-        {
-            server.stop();
-            current_connections += server.currentConnections();
-        }
-
-        if (!current_connections)
-            break;
-
-        sleep_current_ms += sleep_one_ms;
-        if (sleep_current_ms < sleep_max_ms)
-            std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
-        else
-            break;
-    }
-    return current_connections;
-}
-
-}
-
 namespace DB
 {

@ -244,8 +213,6 @@ namespace ErrorCodes
    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int EXCESSIVE_ELEMENT_IN_CONFIG;
    extern const int INVALID_CONFIG_PARAMETER;
-    extern const int FAILED_TO_GETPWUID;
-    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
    extern const int NETWORK_ERROR;
    extern const int CORRUPTED_DATA;
 }
@ -261,54 +228,6 @@ static std::string getCanonicalPath(std::string && path)
    return std::move(path);
 }

-static std::string getUserName(uid_t user_id)
-{
-    /// Try to convert user id into user name.
-    auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
-    if (buffer_size <= 0)
-        buffer_size = 1024;
-    std::string buffer;
-    buffer.reserve(buffer_size);
-
-    struct passwd passwd_entry;
-    struct passwd * result = nullptr;
-    const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
-
-    if (error)
-        throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
-    else if (result)
-        return result->pw_name;
-    return toString(user_id);
-}
-
-Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log)
-{
-    Poco::Net::SocketAddress socket_address;
-    try
-    {
-        socket_address = Poco::Net::SocketAddress(host, port);
-    }
-    catch (const Poco::Net::DNSException & e)
-    {
-        const auto code = e.code();
-        if (code == EAI_FAMILY
-#if defined(EAI_ADDRFAMILY)
-                    || code == EAI_ADDRFAMILY
-#endif
-           )
-        {
-            LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
-                "If it is an IPv6 address and your host has disabled IPv6, then consider to "
-                "specify IPv4 address to listen in <listen_host> element of configuration "
-                "file. Example: <listen_host>0.0.0.0</listen_host>",
-                host, e.code(), e.message());
-        }
-
-        throw;
-    }
-    return socket_address;
-}
-
 Poco::Net::SocketAddress Server::socketBindListen(
    const Poco::Util::AbstractConfiguration & config,
    Poco::Net::ServerSocket & socket,
@ -959,24 +878,7 @@ try
    std::string default_database = server_settings.default_database.toString();

    /// Check that the process user id matches the owner of the data.
-    const auto effective_user_id = geteuid();
-    struct stat statbuf;
-    if (stat(path_str.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
-    {
-        const auto effective_user = getUserName(effective_user_id);
-        const auto data_owner = getUserName(statbuf.st_uid);
-        std::string message = "Effective user of the process (" + effective_user +
-            ") does not match the owner of the data (" + data_owner + ").";
-        if (effective_user_id == 0)
-        {
-            message += " Run under 'sudo -u " + data_owner + "'.";
-            throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
-        }
-        else
-        {
-            global_context->addWarningMessage(message);
-        }
-    }
+    assertProcessUserMatchesDataOwner(path_str, [&](const std::string & message){ global_context->addWarningMessage(message); });

    global_context->setPath(path_str);

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -6355,7 +6355,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
    auto table_function_ast = table_function_node_typed.toAST();
    table_function_ptr->parseArguments(table_function_ast, scope_context);

-    auto table_function_storage = table_function_ptr->execute(table_function_ast, scope_context, table_function_ptr->getName());
+    auto table_function_storage = scope_context->getQueryContext()->executeTableFunction(table_function_ast, table_function_ptr);
    table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context);
 }

--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -255,6 +255,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
        client,
        s3_uri.bucket,
        fs::path(s3_uri.key) / file_name,
+        DBMS_DEFAULT_BUFFER_SIZE,
        request_settings,
        std::nullopt,
        threadPoolCallbackRunner<void>(BackupsIOThreadPool::get(), "BackupWriterS3"),
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1246,6 +1246,14 @@ void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query)
 }


+void ClientBase::addMultiquery(std::string_view query, Arguments & common_arguments) const
+{
+    common_arguments.emplace_back("--multiquery");
+    common_arguments.emplace_back("-q");
+    common_arguments.emplace_back(query);
+}
+
+
 void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr parsed_query)
 {
    auto query = query_to_execute;
@ -2592,15 +2600,19 @@ void ClientBase::init(int argc, char ** argv)
        ("version-clean", "print version in machine-readable format and exit")

        ("config-file,C", po::value<std::string>(), "config-file path")
-        ("queries-file", po::value<std::vector<std::string>>()->multitoken(),
-            "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
-        ("database,d", po::value<std::string>(), "database")
-        ("history_file", po::value<std::string>(), "path to history file")

        ("query,q", po::value<std::string>(), "query")
-        ("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
+        ("queries-file", po::value<std::vector<std::string>>()->multitoken(),
+            "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
+        ("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
+        ("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)")
+        ("database,d", po::value<std::string>(), "database")
        ("query_kind", po::value<std::string>()->default_value("initial_query"), "One of initial_query/secondary_query/no_query")
        ("query_id", po::value<std::string>(), "query_id")
+
+        ("history_file", po::value<std::string>(), "path to history file")
+
+        ("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
        ("progress", po::value<ProgressOption>()->implicit_value(ProgressOption::TTY, "tty")->default_value(ProgressOption::DEFAULT, "default"), "Print progress of queries execution - to TTY: tty|on|1|true|yes; to STDERR non-interactive mode: err; OFF: off|0|false|no; DEFAULT - interactive to TTY, non-interactive is off")

        ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.")
@ -2612,9 +2624,6 @@ void ClientBase::init(int argc, char ** argv)
        ("log-level", po::value<std::string>(), "log level")
        ("server_logs_file", po::value<std::string>(), "put server logs into specified file")

-        ("multiline,m", "multiline")
-        ("multiquery,n", "multiquery")
-
        ("suggestion_limit", po::value<int>()->default_value(10000),
            "Suggestion limit for how many databases, tables and columns to fetch.")

--- a/src/Client/ClientBase.h
+++ b/src/Client/ClientBase.h
@ -129,6 +129,7 @@ protected:

    void setInsertionTable(const ASTInsertQuery & insert_query);

+    void addMultiquery(std::string_view query, Arguments & common_arguments) const;

 private:
    void receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, bool partial_result_on_first_cancel);
--- a/src/Client/ConnectionEstablisher.cpp
+++ b/src/Client/ConnectionEstablisher.cpp
@ -4,6 +4,8 @@

 namespace ProfileEvents
 {
+    extern const Event DistributedConnectionTries;
+    extern const Event DistributedConnectionUsable;
    extern const Event DistributedConnectionMissingTable;
    extern const Event DistributedConnectionStaleReplica;
 }
@ -35,6 +37,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
    SCOPE_EXIT(is_finished = true);
    try
    {
+        ProfileEvents::increment(ProfileEvents::DistributedConnectionTries);
        result.entry = pool->get(*timeouts, settings, /* force_connected = */ false);
        AsyncCallbackSetter async_setter(&*result.entry, std::move(async_callback));

@ -45,6 +48,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
        if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
        {
            result.entry->forceConnected(*timeouts);
+            ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
            result.is_usable = true;
            result.is_up_to_date = true;
            return;
@ -65,6 +69,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
            return;
        }

+        ProfileEvents::increment(ProfileEvents::DistributedConnectionUsable);
        result.is_usable = true;

        UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
--- a/src/Client/ConnectionPool.h
+++ b/src/Client/ConnectionPool.h
@ -135,7 +135,6 @@ private:
    Protocol::Compression compression; /// Whether to compress data when interacting with the server.
    Protocol::Secure secure;           /// Whether to encrypt data when interacting with the server.
    Int64 priority;                    /// priority from <remote_servers>
-
 };

 /**
@ -192,6 +191,7 @@ inline bool operator==(const ConnectionPoolFactory::Key & lhs, const ConnectionP
 {
    return lhs.max_connections == rhs.max_connections && lhs.host == rhs.host && lhs.port == rhs.port
        && lhs.default_database == rhs.default_database && lhs.user == rhs.user && lhs.password == rhs.password
+        && lhs.quota_key == rhs.quota_key
        && lhs.cluster == rhs.cluster && lhs.cluster_secret == rhs.cluster_secret && lhs.client_name == rhs.client_name
        && lhs.compression == rhs.compression && lhs.secure == rhs.secure && lhs.priority == rhs.priority;
 }
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -73,9 +73,9 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts

 Int64 ConnectionPoolWithFailover::getPriority() const
 {
-    return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto &a, const auto &b)
+    return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b)
    {
-        return a->getPriority() - b->getPriority();
+        return a->getPriority() < b->getPriority();
    }))->getPriority();
 }

--- a/src/Common/AsyncTaskExecutor.h
+++ b/src/Common/AsyncTaskExecutor.h
@ -113,8 +113,8 @@ private:
    void createFiber();
    void destroyFiber();

-    Fiber fiber;
    FiberStack fiber_stack;
+    Fiber fiber;
    std::mutex fiber_lock;
    std::exception_ptr exception;

--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@ -10,6 +10,8 @@
 #include <type_traits>


+#define DATE_SECONDS_PER_DAY 86400 /// Number of seconds in a day, 60 * 60 * 24
+
 #define DATE_LUT_MIN_YEAR 1900 /// 1900 since majority of financial organizations consider 1900 as an initial year.
 #define DATE_LUT_MAX_YEAR 2299 /// Last supported year (complete)
 #define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table
--- a/src/Common/HashTable/ClearableHashSet.h
+++ b/src/Common/HashTable/ClearableHashSet.h
@ -10,6 +10,10 @@
  * Instead of this class, you could just use the pair (version, key) in the HashSet as the key
  * but then the table would accumulate all the keys that it ever stored, and it was unreasonably growing.
  * This class goes a step further and considers the keys with the old version empty in the hash table.
+  *
+  * Zero values note:
+  * A cell in ClearableHashSet can store a zero values as normal value
+  * If its version is equal to the version of the set itself, then it's not considered as empty even key's value is zero value of the corresponding type
  */


@ -48,30 +52,6 @@ struct ClearableHashTableCell : public BaseCell
    ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {}
 };

-using StringRefBaseCell = HashSetCellWithSavedHash<StringRef, DefaultHash<StringRef>, ClearableHashSetState>;
-
-/// specialization for StringRef to allow zero size key (empty string)
-template <>
-struct ClearableHashTableCell<StringRef, StringRefBaseCell> : public StringRefBaseCell
-{
-    using State = ClearableHashSetState;
-    using value_type = typename StringRefBaseCell::value_type;
-
-    UInt32 version;
-
-    bool isZero(const State & state) const { return version != state.version; }
-    static bool isZero(const StringRef & key_, const State & state_) { return StringRefBaseCell::isZero(key_, state_); }
-
-    /// Set the key value to zero.
-    void setZero() { version = 0; }
-
-    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
-    static constexpr bool need_zero_value_storage = true;
-
-    ClearableHashTableCell() { } /// NOLINT
-    ClearableHashTableCell(const StringRef & key_, const State & state) : StringRefBaseCell(key_, state), version(state.version) { }
-};
-
 template <
    typename Key,
    typename Hash = DefaultHash<Key>,
@ -90,13 +70,6 @@ public:
    {
        ++this->version;
        this->m_size = 0;
-
-        if constexpr (Cell::need_zero_value_storage)
-        {
-            /// clear ZeroValueStorage
-            if (this->hasZero())
-                this->clearHasZero();
-        }
    }
 };

@ -119,13 +92,6 @@ public:
    {
        ++this->version;
        this->m_size = 0;
-
-        if constexpr (Cell::need_zero_value_storage)
-        {
-            /// clear ZeroValueStorage
-            if (this->hasZero())
-                this->clearHasZero();
-        }
    }
 };

--- a/src/Common/HashTable/FixedHashTable.h
+++ b/src/Common/HashTable/FixedHashTable.h
@ -358,7 +358,7 @@ public:
        std::pair<LookupResult, bool> res;
        emplace(Cell::getKey(x), res.first, res.second);
        if (res.second)
-            insertSetMapped(res.first->getMapped(), x);
+            res.first->setMapped(x);

        return res;
    }
--- a/src/Common/HashTable/HashMap.h
+++ b/src/Common/HashTable/HashMap.h
@ -9,6 +9,8 @@
 /** NOTE HashMap could only be used for memmoveable (position independent) types.
  * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
  * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
+  *
+  * Please keep in sync with PackedHashMap.h
  */

 namespace DB
@ -53,13 +55,13 @@ PairNoInit<std::decay_t<First>, std::decay_t<Second>> makePairNoInit(First && fi
 }


-template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
+template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState, typename Pair = PairNoInit<Key, TMapped>>
 struct HashMapCell
 {
    using Mapped = TMapped;
    using State = TState;

-    using value_type = PairNoInit<Key, Mapped>;
+    using value_type = Pair;
    using mapped_type = Mapped;
    using key_type = Key;

@ -151,14 +153,14 @@ struct HashMapCell
 namespace std
 {

-    template <typename Key, typename TMapped, typename Hash, typename TState>
-    struct tuple_size<HashMapCell<Key, TMapped, Hash, TState>> : std::integral_constant<size_t, 2> { };
+    template <typename Key, typename TMapped, typename Hash, typename TState, typename Pair>
+    struct tuple_size<HashMapCell<Key, TMapped, Hash, TState, Pair>> : std::integral_constant<size_t, 2> { };

-    template <typename Key, typename TMapped, typename Hash, typename TState>
-    struct tuple_element<0, HashMapCell<Key, TMapped, Hash, TState>> { using type = Key; };
+    template <typename Key, typename TMapped, typename Hash, typename TState, typename Pair>
+    struct tuple_element<0, HashMapCell<Key, TMapped, Hash, TState, Pair>> { using type = Key; };

-    template <typename Key, typename TMapped, typename Hash, typename TState>
-    struct tuple_element<1, HashMapCell<Key, TMapped, Hash, TState>> { using type = TMapped; };
+    template <typename Key, typename TMapped, typename Hash, typename TState, typename Pair>
+    struct tuple_element<1, HashMapCell<Key, TMapped, Hash, TState, Pair>> { using type = TMapped; };
 }

 template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
--- a/src/Common/HashTable/HashSet.h
+++ b/src/Common/HashTable/HashSet.h
@ -41,6 +41,8 @@ public:
    using Base = HashTable<Key, TCell, Hash, Grower, Allocator>;
    using typename Base::LookupResult;

+    using Base::Base;
+
    void merge(const Self & rhs)
    {
        if (!this->hasZero() && rhs.hasZero())
--- a/src/Common/HashTable/HashTable.h
+++ b/src/Common/HashTable/HashTable.h
@ -117,7 +117,7 @@ inline bool bitEquals(T && a, T && b)
  * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable.
  *    GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic
  *    code that works with mapped values: it can overload on the return type of GetMapped(), and
-  *    doesn't need other parameters. One example is insertSetMapped() function.
+  *    doesn't need other parameters. One example is Cell::setMapped() function.
  *
  * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and
  *    GetMapped are supported.
@ -216,17 +216,6 @@ struct HashTableCell

 };

-/**
-  * A helper function for HashTable::insert() to set the "mapped" value.
-  * Overloaded on the mapped type, does nothing if it's VoidMapped.
-  */
-template <typename ValueType>
-void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {}
-
-template <typename MappedType, typename ValueType>
-void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; }
-
-
 /** Determines the size of the hash table, and when and how much it should be resized.
  * Has very small state (one UInt8) and useful for Set-s allocated in automatic memory (see uniqExact as an example).
  */
@ -241,6 +230,8 @@ struct HashTableGrower
    /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
    static constexpr auto performs_linear_probing_with_single_step = true;

+    static constexpr size_t max_size_degree = 23;
+
    /// The size of the hash table in the cells.
    size_t bufSize() const               { return 1ULL << size_degree; }

@ -259,17 +250,18 @@ struct HashTableGrower
    /// Increase the size of the hash table.
    void increaseSize()
    {
-        size_degree += size_degree >= 23 ? 1 : 2;
+        size_degree += size_degree >= max_size_degree ? 1 : 2;
    }

    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
    void set(size_t num_elems)
    {
-        size_degree = num_elems <= 1
-             ? initial_size_degree
-             : ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
-                 ? initial_size_degree
-                 : (static_cast<size_t>(log2(num_elems - 1)) + 2));
+        if (num_elems <= 1)
+            size_degree = initial_size_degree;
+        else if (initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
+            size_degree = initial_size_degree;
+        else
+            size_degree = static_cast<size_t>(log2(num_elems - 1)) + 2;
    }

    void setBufSize(size_t buf_size_)
@ -281,6 +273,7 @@ struct HashTableGrower
 /** Determines the size of the hash table, and when and how much it should be resized.
  * This structure is aligned to cache line boundary and also occupies it all.
  * Precalculates some values to speed up lookups and insertion into the HashTable (and thus has bigger memory footprint than HashTableGrower).
+  * This grower assume 0.5 load factor
  */
 template <size_t initial_size_degree = 8>
 class alignas(64) HashTableGrowerWithPrecalculation
@ -290,6 +283,7 @@ class alignas(64) HashTableGrowerWithPrecalculation
    UInt8 size_degree = initial_size_degree;
    size_t precalculated_mask = (1ULL << initial_size_degree) - 1;
    size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1);
+    static constexpr size_t max_size_degree = 23;

 public:
    UInt8 sizeDegree() const { return size_degree; }
@ -319,16 +313,17 @@ public:
    bool overflow(size_t elems) const { return elems > precalculated_max_fill; }

    /// Increase the size of the hash table.
-    void increaseSize() { increaseSizeDegree(size_degree >= 23 ? 1 : 2); }
+    void increaseSize() { increaseSizeDegree(size_degree >= max_size_degree ? 1 : 2); }

    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
    void set(size_t num_elems)
    {
-        size_degree = num_elems <= 1
-             ? initial_size_degree
-             : ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
-                 ? initial_size_degree
-                 : (static_cast<size_t>(log2(num_elems - 1)) + 2));
+        if (num_elems <= 1)
+            size_degree = initial_size_degree;
+        else if (initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
+            size_degree = initial_size_degree;
+        else
+            size_degree = static_cast<size_t>(log2(num_elems - 1)) + 2;
        increaseSizeDegree(0);
    }

@ -753,6 +748,7 @@ protected:

 public:
    using key_type = Key;
+    using grower_type = Grower;
    using mapped_type = typename Cell::mapped_type;
    using value_type = typename Cell::value_type;
    using cell_type = Cell;
@ -770,6 +766,14 @@ public:
        alloc(grower);
    }

+    explicit HashTable(const Grower & grower_)
+        : grower(grower_)
+    {
+        if (Cell::need_zero_value_storage)
+            this->zeroValue()->setZero();
+        alloc(grower);
+    }
+
    HashTable(size_t reserve_for_num_elements) /// NOLINT
    {
        if (Cell::need_zero_value_storage)
@ -1037,7 +1041,7 @@ public:
        }

        if (res.second)
-            insertSetMapped(res.first->getMapped(), x);
+            res.first->setMapped(x);

        return res;
    }
--- a/src/Common/HashTable/HashTableKeyHolder.h
+++ b/src/Common/HashTable/HashTableKeyHolder.h
@ -88,8 +88,12 @@ inline StringRef & ALWAYS_INLINE keyHolderGetKey(DB::ArenaKeyHolder & holder)

 inline void ALWAYS_INLINE keyHolderPersistKey(DB::ArenaKeyHolder & holder)
 {
-    // Hash table shouldn't ask us to persist a zero key
-    assert(holder.key.size > 0);
+    // Normally, our hash table shouldn't ask to persist a zero key,
+    // but it can happened in the case of clearable hash table (ClearableHashSet, for example).
+    // The clearable hash table doesn't use zero storage and
+    // distinguishes empty keys by using cell version, not the value itself.
+    // So, when an empty StringRef is inserted in ClearableHashSet we'll get here key of zero size.
+    // assert(holder.key.size > 0);
    holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
 }

--- a/src/Common/HashTable/PackedHashMap.h
+++ b/src/Common/HashTable/PackedHashMap.h
@ -0,0 +1,107 @@
+#pragma once
+
+/// Packed versions HashMap, please keep in sync with HashMap.h
+
+#include <Common/HashTable/HashMap.h>
+
+/// A pair that does not initialize the elements, if not needed.
+///
+/// NOTE: makePairNoInit() is omitted for PackedPairNoInit since it is not
+/// required for PackedHashMap (see mergeBlockWithPipe() for details)
+template <typename First, typename Second>
+struct __attribute__((packed)) PackedPairNoInit
+{
+    First first;
+    Second second;
+
+    PackedPairNoInit() {} /// NOLINT
+
+    template <typename FirstValue>
+    PackedPairNoInit(FirstValue && first_, NoInitTag)
+        : first(std::forward<FirstValue>(first_))
+    {
+    }
+
+    template <typename FirstValue, typename SecondValue>
+    PackedPairNoInit(FirstValue && first_, SecondValue && second_)
+        : first(std::forward<FirstValue>(first_))
+        , second(std::forward<SecondValue>(second_))
+    {
+    }
+};
+
+/// The difference with ZeroTraits is that PackedZeroTraits accepts PackedPairNoInit instead of Key.
+namespace PackedZeroTraits
+{
+    template <typename First, typename Second, template <typename, typename> class PackedPairNoInit>
+    bool check(const PackedPairNoInit<First, Second> p) { return p.first == First{}; }
+
+    template <typename First, typename Second, template <typename, typename> class PackedPairNoInit>
+    void set(PackedPairNoInit<First, Second> & p) { p.first = First{}; }
+}
+
+/// setZero() should be overwritten to pass the pair instead of key, to avoid
+/// "reference binding to misaligned address" errors from UBsan.
+template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
+struct PackedHashMapCell : public HashMapCell<Key, TMapped, Hash, TState, PackedPairNoInit<Key, TMapped>>
+{
+    using Base = HashMapCell<Key, TMapped, Hash, TState, PackedPairNoInit<Key, TMapped>>;
+    using State = typename Base::State;
+    using value_type = typename Base::value_type;
+    using key_type = typename Base::key_type;
+    using Mapped = typename Base::Mapped;
+
+    using Base::Base;
+
+    void setZero() { PackedZeroTraits::set(this->value); }
+
+    Key getKey() const { return this->value.first; }
+    static Key getKey(const value_type & value_) { return value_.first; }
+
+    Mapped & getMapped() { return this->value.second; }
+    Mapped getMapped() const { return this->value.second; }
+    value_type getValue() const { return this->value; }
+
+    bool keyEquals(const Key key_) const { return bitEqualsByValue(this->value.first, key_); }
+    bool keyEquals(const Key key_, size_t /*hash_*/) const { return bitEqualsByValue(this->value.first, key_); }
+    bool keyEquals(const Key key_, size_t /*hash_*/, const State & /*state*/) const { return bitEqualsByValue(this->value.first, key_); }
+
+    bool isZero(const State & state) const { return isZero(this->value.first, state); }
+    static bool isZero(const Key key, const State & /*state*/) { return ZeroTraits::check(key); }
+
+    static inline bool bitEqualsByValue(key_type a, key_type b) { return a == b; }
+
+    template <size_t I>
+    auto get() const
+    {
+        if constexpr (I == 0) return this->value.first;
+        else if constexpr (I == 1) return this->value.second;
+    }
+};
+
+namespace std
+{
+    template <typename Key, typename TMapped, typename Hash, typename TState>
+    struct tuple_size<PackedHashMapCell<Key, TMapped, Hash, TState>> : std::integral_constant<size_t, 2> { };
+
+    template <typename Key, typename TMapped, typename Hash, typename TState>
+    struct tuple_element<0, PackedHashMapCell<Key, TMapped, Hash, TState>> { using type = Key; };
+
+    template <typename Key, typename TMapped, typename Hash, typename TState>
+    struct tuple_element<1, PackedHashMapCell<Key, TMapped, Hash, TState>> { using type = TMapped; };
+}
+
+/// Packed HashMap - HashMap with structure without padding
+///
+/// Sometimes padding in structure can be crucial, consider the following
+/// example <UInt64, UInt16> as <Key, Value> in this case the padding overhead
+/// is 0.375, and this can be major in case of lots of keys.
+///
+/// Note, there is no need to provide PackedHashSet, since it cannot have padding.
+template <
+    typename Key,
+    typename Mapped,
+    typename Hash = DefaultHash<Key>,
+    typename Grower = HashTableGrower<>,
+    typename Allocator = HashTableAllocator>
+using PackedHashMap = HashMapTable<Key, PackedHashMapCell<Key, Mapped, Hash, HashTableNoState>, Hash, Grower, Allocator>;
--- a/src/Common/HashTable/TwoLevelHashTable.h
+++ b/src/Common/HashTable/TwoLevelHashTable.h
@ -224,7 +224,7 @@ public:
        emplace(Cell::getKey(x), res.first, res.second, hash_value);

        if (res.second)
-            insertSetMapped(res.first->getMapped(), x);
+            res.first->setMapped(x);

        return res;
    }
--- a/src/Common/OpenTelemetryTraceContext.cpp
+++ b/src/Common/OpenTelemetryTraceContext.cpp
@ -5,7 +5,8 @@
 #include <Common/Exception.h>
 #include <base/hex.h>
 #include <Core/Settings.h>
-#include <IO/Operators.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>

 #include <Common/AsyncTaskExecutor.h>

@ -249,26 +250,26 @@ String TracingContext::composeTraceparentHeader() const

 void TracingContext::deserialize(ReadBuffer & buf)
 {
-    buf >> this->trace_id
-        >> "\n"
-        >> this->span_id
-        >> "\n"
-        >> this->tracestate
-        >> "\n"
-        >> this->trace_flags
-        >> "\n";
+    readUUIDText(trace_id, buf);
+    assertChar('\n', buf);
+    readIntText(span_id, buf);
+    assertChar('\n', buf);
+    readEscapedString(tracestate, buf);
+    assertChar('\n', buf);
+    readIntText(trace_flags, buf);
+    assertChar('\n', buf);
 }

 void TracingContext::serialize(WriteBuffer & buf) const
 {
-    buf << this->trace_id
-        << "\n"
-        << this->span_id
-        << "\n"
-        << this->tracestate
-        << "\n"
-        << this->trace_flags
-        << "\n";
+    writeUUIDText(trace_id, buf);
+    writeChar('\n', buf);
+    writeIntText(span_id, buf);
+    writeChar('\n', buf);
+    writeEscapedString(tracestate, buf);
+    writeChar('\n', buf);
+    writeIntText(trace_flags, buf);
+    writeChar('\n', buf);
 }

 const TracingContextOnThread & CurrentContext()
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@ -63,12 +63,13 @@ const char * analyzeImpl(
    bool is_first_call = begin == regexp.data();
    int depth = 0;
    is_trivial = true;
+    bool is_prefix = true;
    required_substring.clear();
    bool has_alternative_on_depth_0 = false;
    bool has_case_insensitive_flag = false;

-    /// Substring with a position.
-    using Substring = std::pair<std::string, size_t>;
+    /// Substring with is_prefix.
+    using Substring = std::pair<std::string, bool>;
    using Substrings = std::vector<Substring>;

    Substrings trivial_substrings(1);
@ -98,6 +99,9 @@ const char * analyzeImpl(

    auto finish_non_trivial_char = [&](bool create_new_substr = true)
    {
+        is_trivial = false;
+        if (create_new_substr)
+            is_prefix = false;
        if (depth != 0)
            return;

@ -106,6 +110,7 @@ const char * analyzeImpl(
            if (alter.suffix)
            {
                alter.literal += last_substring->first;
+                alter.suffix = false;
            }
        }

@ -126,16 +131,24 @@ const char * analyzeImpl(
            if (alter.prefix)
            {
                alter.literal = last_substring->first + alter.literal;
+                alter.prefix = is_prefix;
            }
        }

        if (group_required_string.prefix)
+        {
            last_substring->first += group_required_string.literal;
+            last_substring->second = is_prefix;
+        }
        else
        {
            finish_non_trivial_char();
            last_substring->first = group_required_string.literal;
+            last_substring->second = false;
        }
+
+        is_prefix = is_prefix && group_required_string.prefix && group_required_string.suffix;
+
        /// if we can still append, no need to finish it. e.g. abc(de)fg should capture abcdefg
        if (!last_substring->first.empty() && !group_required_string.suffix)
        {
@ -185,7 +198,6 @@ const char * analyzeImpl(
                        goto ordinary;
                    default:
                        /// all other escape sequences are not supported
-                        is_trivial = false;
                        finish_non_trivial_char();
                        break;
                }
@ -196,6 +208,7 @@ const char * analyzeImpl(

            case '|':
                is_trivial = false;
+                is_prefix = false;
                ++pos;
                if (depth == 0)
                {
@ -205,6 +218,7 @@ const char * analyzeImpl(
                break;

            case '(':
+                /// bracket does not break is_prefix. for example abc(d) has a prefix 'abcd'
                is_trivial = false;
                if (!in_square_braces)
                {
@ -258,7 +272,6 @@ const char * analyzeImpl(
            case '[':
                in_square_braces = true;
                ++depth;
-                is_trivial = false;
                finish_non_trivial_char();
                ++pos;
                break;
@ -270,7 +283,6 @@ const char * analyzeImpl(
                --depth;
                if (depth == 0)
                    in_square_braces = false;
-                is_trivial = false;
                finish_non_trivial_char();
                ++pos;
                break;
@ -284,7 +296,6 @@ const char * analyzeImpl(
                break;

            case '^': case '$': case '.': case '+':
-                is_trivial = false;
                finish_non_trivial_char();
                ++pos;
                break;
@ -296,7 +307,6 @@ const char * analyzeImpl(
            case '?':
                [[fallthrough]];
            case '*':
-                is_trivial = false;
                if (depth == 0 && !last_substring->first.empty() && !in_square_braces)
                {
                    last_substring->first.resize(last_substring->first.size() - 1);
@ -318,8 +328,9 @@ const char * analyzeImpl(
            default:
                if (depth == 0 && !in_curly_braces && !in_square_braces)
                {
+                    /// record the first position of last string.
                    if (last_substring->first.empty())
-                        last_substring->second = pos - begin;
+                        last_substring->second = is_prefix;
                    last_substring->first.push_back(*pos);
                }
                ++pos;
@ -328,10 +339,9 @@ const char * analyzeImpl(
    }
 finish:

-    finish_non_trivial_char(false);
-
    if (!is_trivial)
    {
+        finish_non_trivial_char(false);
        /// we calculate required substring even though has_alternative_on_depth_0.
        /// we will clear the required substring after putting it to alternatives.
        if (!has_case_insensitive_flag)
@ -357,7 +367,7 @@ finish:
            if (max_length >= MIN_LENGTH_FOR_STRSTR || (!is_first_call && max_length > 0))
            {
                required_substring.literal = candidate_it->first;
-                required_substring.prefix = candidate_it->second == 0;
+                required_substring.prefix = candidate_it->second;
                required_substring.suffix = candidate_it + 1 == trivial_substrings.end();
            }
        }
@ -365,7 +375,8 @@ finish:
    else if (!trivial_substrings.empty())
    {
        required_substring.literal = trivial_substrings.front().first;
-        required_substring.prefix = trivial_substrings.front().second == 0;
+        /// trivial string means the whole regex is a simple string literal, so the prefix and suffix should be true.
+        required_substring.prefix = true;
        required_substring.suffix = true;
    }

--- a/src/Common/PoolBase.h
+++ b/src/Common/PoolBase.h
@ -7,7 +7,13 @@

 #include <Common/logger_useful.h>
 #include <Common/Exception.h>
+#include <Common/ProfileEvents.h>
+#include <Common/Stopwatch.h>

+namespace ProfileEvents
+{
+    extern const Event ConnectionPoolIsFullMicroseconds;
+}

 namespace DB
 {
@ -144,17 +150,19 @@ public:
                return Entry(*items.back());
            }

+            Stopwatch blocked;
            if (timeout < 0)
            {
-                LOG_INFO(log, "No free connections in pool. Waiting undefinitelly.");
+                LOG_INFO(log, "No free connections in pool. Waiting indefinitely.");
                available.wait(lock);
            }
            else
            {
-                auto timeout_ms = std::chrono::microseconds(timeout);
+                auto timeout_ms = std::chrono::milliseconds(timeout);
                LOG_INFO(log, "No free connections in pool. Waiting {} ms.", timeout_ms.count());
                available.wait_for(lock, timeout_ms);
            }
+            ProfileEvents::increment(ProfileEvents::ConnectionPoolIsFullMicroseconds, blocked.elapsedMicroseconds());
        }
    }

--- a/src/Common/PoolWithFailoverBase.h
+++ b/src/Common/PoolWithFailoverBase.h
@ -101,7 +101,7 @@ public:
    struct ShuffledPool
    {
        NestedPool * pool{};
-        const PoolState * state{};
+        const PoolState * state{}; // WARNING: valid only during initial ordering, dangling
        size_t index = 0;
        size_t error_count = 0;
        size_t slowdown_count = 0;
@ -115,7 +115,6 @@ public:
    /// this functor. The pools with lower result value will be tried first.
    using GetPriorityFunc = std::function<size_t(size_t index)>;

-
    /// Returns at least min_entries and at most max_entries connections (at most one connection per nested pool).
    /// The method will throw if it is unable to get min_entries alive connections or
    /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas.
@ -175,10 +174,11 @@ PoolWithFailoverBase<TNestedPool>::getShuffledPools(
    }

    /// Sort the pools into order in which they will be tried (based on respective PoolStates).
+    /// Note that `error_count` and `slowdown_count` are used for ordering, but set to zero in the resulting ShuffledPool
    std::vector<ShuffledPool> shuffled_pools;
    shuffled_pools.reserve(nested_pools.size());
    for (size_t i = 0; i < nested_pools.size(); ++i)
-        shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, 0});
+        shuffled_pools.push_back(ShuffledPool{nested_pools[i].get(), &pool_states[i], i, /* error_count = */ 0, /* slowdown_count = */ 0});
    ::sort(
        shuffled_pools.begin(), shuffled_pools.end(),
        [](const ShuffledPool & lhs, const ShuffledPool & rhs)
@ -227,6 +227,10 @@ PoolWithFailoverBase<TNestedPool>::getMany(
 {
    std::vector<ShuffledPool> shuffled_pools = getShuffledPools(max_ignored_errors, get_priority);

+    /// Limit `max_tries` value by `max_error_cap` to avoid unlimited number of retries
+    if (max_tries > max_error_cap)
+        max_tries = max_error_cap;
+
    /// We will try to get a connection from each pool until a connection is produced or max_tries is reached.
    std::vector<TryResult> try_results(shuffled_pools.size());
    size_t entries_count = 0;
@ -371,7 +375,7 @@ PoolWithFailoverBase<TNestedPool>::updatePoolStates(size_t max_ignored_errors)

    /// distributed_replica_max_ignored_errors
    for (auto & state : result)
-        state.error_count = std::max<UInt64>(0, state.error_count - max_ignored_errors);
+        state.error_count = state.error_count > max_ignored_errors ? state.error_count - max_ignored_errors : 0;

    return result;
 }
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -131,6 +131,8 @@
    M(ZooKeeperBytesSent, "Number of bytes send over network while communicating with ZooKeeper.") \
    M(ZooKeeperBytesReceived, "Number of bytes received over network while communicating with ZooKeeper.") \
    \
+    M(DistributedConnectionTries, "Total count of distributed connection attempts.") \
+    M(DistributedConnectionUsable, "Total count of successful distributed connections to a usable server (with required table, but maybe stale).") \
    M(DistributedConnectionFailTry, "Total count when distributed connection fails with retry.") \
    M(DistributedConnectionMissingTable, "Number of times we rejected a replica from a distributed query, because it did not contain a table needed for the query.") \
    M(DistributedConnectionStaleReplica, "Number of times we rejected a replica from a distributed query, because some table needed for a query had replication lag higher than the configured threshold.") \
@ -501,6 +503,8 @@ The server successfully detected this situation and will download merged part fr
    M(MergeTreeReadTaskRequestsSentElapsedMicroseconds, "Time spent in callbacks requested from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \
    M(MergeTreeAllRangesAnnouncementsSentElapsedMicroseconds, "Time spent in sending the announcement from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \
    \
+    M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.") \
+    \
    M(LogTest, "Number of log messages with level Test") \
    M(LogTrace, "Number of log messages with level Trace") \
    M(LogDebug, "Number of log messages with level Debug") \
--- a/src/Common/assertProcessUserMatchesDataOwner.cpp
+++ b/src/Common/assertProcessUserMatchesDataOwner.cpp
@ -0,0 +1,66 @@
+#include <Common/assertProcessUserMatchesDataOwner.h>
+#include <Common/logger_useful.h>
+#include <Common/Exception.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <pwd.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int FAILED_TO_GETPWUID;
+    extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA;
+}
+
+namespace
+{
+    std::string getUserName(uid_t user_id)
+    {
+        /// Try to convert user id into user name.
+        auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX);
+        if (buffer_size <= 0)
+            buffer_size = 1024;
+        std::string buffer;
+        buffer.reserve(buffer_size);
+
+        struct passwd passwd_entry;
+        struct passwd * result = nullptr;
+        const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result);
+
+        if (error)
+            throwFromErrno("Failed to find user name for " + std::to_string(user_id), ErrorCodes::FAILED_TO_GETPWUID, error);
+        else if (result)
+            return result->pw_name;
+        return std::to_string(user_id);
+    }
+}
+
+void assertProcessUserMatchesDataOwner(const std::string & path, std::function<void(const std::string &)> on_warning)
+{
+    /// Check that the process user id matches the owner of the data.
+    const auto effective_user_id = geteuid();
+    struct stat statbuf;
+    if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
+    {
+        const auto effective_user = getUserName(effective_user_id);
+        const auto data_owner = getUserName(statbuf.st_uid);
+        std::string message = fmt::format(
+            "Effective user of the process ({}) does not match the owner of the data ({}).",
+            effective_user, data_owner);
+
+        if (effective_user_id == 0)
+        {
+            message += fmt::format(" Run under 'sudo -u {}'.", data_owner);
+            throw Exception(ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA, "{}", message);
+        }
+        else
+        {
+            on_warning(message);
+        }
+    }
+}
+
+}
--- a/src/Common/assertProcessUserMatchesDataOwner.h
+++ b/src/Common/assertProcessUserMatchesDataOwner.h
@ -0,0 +1,10 @@
+#pragma once
+#include <string>
+
+namespace DB
+{
+
+void assertProcessUserMatchesDataOwner(
+    const std::string & path, std::function<void(const std::string &)> on_warning);
+
+}
--- a/src/Common/makeSocketAddress.cpp
+++ b/src/Common/makeSocketAddress.cpp
@ -0,0 +1,36 @@
+#include <Common/makeSocketAddress.h>
+#include <Common/logger_useful.h>
+#include <Poco/Net/NetException.h>
+
+namespace DB
+{
+
+Poco::Net::SocketAddress makeSocketAddress(const std::string & host, uint16_t port, Poco::Logger * log)
+{
+    Poco::Net::SocketAddress socket_address;
+    try
+    {
+        socket_address = Poco::Net::SocketAddress(host, port);
+    }
+    catch (const Poco::Net::DNSException & e)
+    {
+        const auto code = e.code();
+        if (code == EAI_FAMILY
+#if defined(EAI_ADDRFAMILY)
+                    || code == EAI_ADDRFAMILY
+#endif
+        )
+        {
+            LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
+                "If it is an IPv6 address and your host has disabled IPv6, then consider to "
+                "specify IPv4 address to listen in <listen_host> element of configuration "
+                "file. Example: <listen_host>0.0.0.0</listen_host>",
+                host, e.code(), e.message());
+        }
+
+        throw;
+    }
+    return socket_address;
+}
+
+}
--- a/src/Common/makeSocketAddress.h
+++ b/src/Common/makeSocketAddress.h
@ -0,0 +1,11 @@
+#pragma once
+#include <Poco/Net/SocketAddress.h>
+
+namespace Poco { class Logger; }
+
+namespace DB
+{
+
+Poco::Net::SocketAddress makeSocketAddress(const std::string & host, uint16_t port, Poco::Logger * log);
+
+}
--- a/src/Common/parseRemoteDescription.cpp
+++ b/src/Common/parseRemoteDescription.cpp
@ -64,7 +64,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t &
 * abc{1..9}de{f,g,h}   - is a direct product, 27 shards.
 * abc{1..9}de{0|1}     - is a direct product, 9 shards, in each 2 replicas.
 */
-std::vector<String> parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses)
+std::vector<String>
+parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
 {
    std::vector<String> res;
    std::vector<String> cur;
@ -97,28 +98,41 @@ std::vector<String> parseRemoteDescription(const String & description, size_t l,
                if (cnt == 0) break;
            }
            if (cnt != 0)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': incorrect brace sequence in first argument");
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}': incorrect brace sequence in first argument", func_name);
            /// The presence of a dot - numeric interval
            if (last_dot != -1)
            {
                size_t left, right;
                if (description[last_dot - 1] != '.')
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': incorrect argument in braces (only one dot): {}",
-                                    description.substr(i, m - i + 1));
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Table function '{}': incorrect argument in braces (only one dot): {}",
+                        func_name,
+                        description.substr(i, m - i + 1));
                if (!parseNumber(description, i + 1, last_dot - 1, left))
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': "
-                                    "incorrect argument in braces (Incorrect left number): {}",
-                                    description.substr(i, m - i + 1));
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Table function '{}': "
+                        "incorrect argument in braces (Incorrect left number): {}",
+                        func_name,
+                        description.substr(i, m - i + 1));
                if (!parseNumber(description, last_dot + 1, m, right))
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': "
-                                    "incorrect argument in braces (Incorrect right number): {}",
-                                    description.substr(i, m - i + 1));
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Table function '{}': "
+                        "incorrect argument in braces (Incorrect right number): {}",
+                        func_name,
+                        description.substr(i, m - i + 1));
                if (left > right)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': "
-                                    "incorrect argument in braces (left number is greater then right): {}",
-                                    description.substr(i, m - i + 1));
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Table function '{}': "
+                        "incorrect argument in braces (left number is greater then right): {}",
+                        func_name,
+                        description.substr(i, m - i + 1));
                if (right - left + 1 >  max_addresses)
-                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': first argument generates too many result addresses");
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS, "Table function '{}': first argument generates too many result addresses", func_name);
                bool add_leading_zeroes = false;
                size_t len = last_dot - 1 - (i + 1);
                /// If the left and right borders have equal numbers, then you must add leading zeros.
@ -161,7 +175,7 @@ std::vector<String> parseRemoteDescription(const String & description, size_t l,

    res.insert(res.end(), cur.begin(), cur.end());
    if (res.size() > max_addresses)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'remote': first argument generates too many result addresses");
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}': first argument generates too many result addresses", func_name);

    return res;
 }
--- a/src/Common/parseRemoteDescription.h
+++ b/src/Common/parseRemoteDescription.h
@ -15,7 +15,8 @@ namespace DB
 * abc{1..9}de{f,g,h}   - is a direct product, 27 shards.
 * abc{1..9}de{0|1}     - is a direct product, 9 shards, in each 2 replicas.
 */
-std::vector<String> parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses);
+std::vector<String> parseRemoteDescription(
+    const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name = "remote");

 /// Parse remote description for external database (MySQL or PostgreSQL).
 std::vector<std::pair<String, uint16_t>> parseRemoteDescriptionForExternalDatabase(const String & description, size_t max_addresses, UInt16 default_port);
--- a/src/Common/tests/gtest_optimize_re.cpp
+++ b/src/Common/tests/gtest_optimize_re.cpp
@ -4,37 +4,40 @@

 TEST(OptimizeRE, analyze)
 {
-    auto test_f = [](const std::string & regexp, const std::string & answer, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false)
+    auto test_f = [](const std::string & regexp, const std::string & required, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false, bool prefix_expected = false)
    {
-        std::string required;
+        std::string answer;
        bool is_trivial;
        bool is_prefix;
        std::vector<std::string> alternatives;
-        OptimizedRegularExpression::analyze(regexp, required, is_trivial, is_prefix, alternatives);
+        OptimizedRegularExpression::analyze(regexp, answer, is_trivial, is_prefix, alternatives);
        std::cerr << regexp << std::endl;
        EXPECT_EQ(required, answer);
        EXPECT_EQ(alternatives, expect_alternatives);
        EXPECT_EQ(is_trivial, trival_expected);
+        EXPECT_EQ(is_prefix, prefix_expected);
    };
-    test_f("abc", "abc", {}, true);
+    test_f("abc", "abc", {}, true, true);
    test_f("c([^k]*)de", "");
-    test_f("abc(de)fg", "abcdefg");
-    test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"});
-    test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"});
+    test_f("abc(de)fg", "abcdefg", {}, false, true);
+    test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true);
+    test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true);
    test_f("abc|fgk|xyz", "", {"abc","fgk", "xyz"});
-    test_f("(abc)", "abc");
+    test_f("(abc)", "abc", {}, false, true);
    test_f("(abc|fgk)", "", {"abc","fgk"});
    test_f("(abc|fgk)(e|f|zkh|)", "", {"abc","fgk"});
    test_f("abc(abc|fg)xyzz", "xyzz", {"abcabcxyzz","abcfgxyzz"});
+    test_f("((abc|fg)kkk*)xyzz", "xyzz", {"abckk", "fgkk"});
+    test_f("abc(*(abc|fg)*)xyzz", "xyzz");
    test_f("abc[k]xyzz", "xyzz");
    test_f("(abc[k]xyzz)", "xyzz");
-    test_f("abc((de)fg(hi))jk", "abcdefghijk");
-    test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk");
-    test_f("abc((de)fghi+zzz)jk", "abcdefghi");
-    test_f("abc((de)fg(hi))?jk", "abc");
-    test_f("abc((de)fghi?zzz)jk", "abcdefgh");
+    test_f("abc((de)fg(hi))jk", "abcdefghijk", {}, false, true);
+    test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk", {}, false, true);
+    test_f("abc((de)fghi+zzz)jk", "abcdefghi", {}, false, true);
+    test_f("abc((de)fg(hi))?jk", "abc", {}, false, true);
+    test_f("abc((de)fghi?zzz)jk", "abcdefgh", {}, false, true);
    test_f("abc(*cd)jk", "cdjk");
-    test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"});
+    test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"}, false, true);
    test_f("abc(abc|fg)?xyzz", "xyzz");
    test_f("abc(abc|fg){0,1}xyzz", "xyzz");
    test_f("abc(abc|fg)xyzz|bcdd?k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bc"});
@ -43,4 +46,5 @@ TEST(OptimizeRE, analyze)
    test_f(R"([Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Daumoa(?:-feedfetcher|)|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|))", "", {"pider-", "bingbot", "Yeti-", "Yeti", "Catchpoint bot", "Catchpoint", "harlotte", "Daumoa-feedfetcher", "Daumoa", "-Googlebot", "Googlebot"});
    test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
    test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
+    test_f(R"(\\A(?:(?:[-0-9_a-z]+(?:\\.[-0-9_a-z]+)*)/k8s1)\\z)", "/k8s1");
 }
--- a/src/Coordination/Changelog.cpp
+++ b/src/Coordination/Changelog.cpp
@ -279,7 +279,17 @@ private:
        flush();

        if (log_file_settings.max_size != 0)
-            ftruncate(file_buffer->getFD(), initial_file_size + file_buffer->count());
+        {
+            int res = -1;
+            do
+            {
+                res = ftruncate(file_buffer->getFD(), initial_file_size + file_buffer->count());
+            }
+            while (res < 0 && errno == EINTR);
+
+            if (res != 0)
+                LOG_WARNING(log, "Could not ftruncate file. Error: {}, errno: {}", errnoToString(), errno);
+        }

        if (log_file_settings.compress_logs)
            compressed_buffer.reset();
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@ -149,6 +149,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
                s3_client->client,
                s3_client->uri.bucket,
                key,
+                DBMS_DEFAULT_BUFFER_SIZE,
                request_settings_1
            };
        };
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -914,6 +914,7 @@ class IColumn;
    M(Bool, output_format_parquet_fixed_string_as_fixed_byte_array, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary for FixedString columns.", 0) \
    M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \
    M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \
+    M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \
    M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \
    M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \
    M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -80,8 +80,9 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
-    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reade to reorder rows for better parallelism."},
-              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}}},
+    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
+              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."},
+              {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}},
    {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"},
              {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"},
              {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"},
--- a/src/DataTypes/Serializations/SerializationNullable.cpp
+++ b/src/DataTypes/Serializations/SerializationNullable.cpp
@ -355,6 +355,9 @@ ReturnType SerializationNullable::deserializeTextEscapedAndRawImpl(IColumn & col
        /// It can happen only if there is a string instead of a number
        /// or if someone uses tab or LF in TSV null_representation.
        /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
+        /// We also should delete incorrectly deserialized value from nested column.
+        nested_column.popBack(1);
+
        if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos)
            throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation "
                                       "containing '\\t' or '\\n' may not work correctly for large input.");
@ -447,6 +450,8 @@ ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, Re

        /// We have some unread data in PeekableReadBuffer own memory.
        /// It can happen only if there is an unquoted string instead of a number.
+        /// We also should delete incorrectly deserialized value from nested column.
+        nested_column.popBack(1);
        throw DB::ParsingException(
            ErrorCodes::CANNOT_READ_ALL_DATA,
            "Error while parsing Nullable: got an unquoted string {} instead of a number",
@ -579,6 +584,9 @@ ReturnType SerializationNullable::deserializeTextCSVImpl(IColumn & column, ReadB
        /// It can happen only if there is an unquoted string instead of a number
        /// or if someone uses csv delimiter, LF or CR in CSV null representation.
        /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
+        /// We also should delete incorrectly deserialized value from nested column.
+        nested_column.popBack(1);
+
        if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos
            || null_representation.find('\n') != std::string::npos)
            throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing "
--- a/src/Databases/DatabaseReplicatedSettings.h
+++ b/src/Databases/DatabaseReplicatedSettings.h
@ -8,8 +8,8 @@ namespace DB
 class ASTStorage;

 #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M, ALIAS) \
-    M(Float,  max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
-    M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
+    M(Float,  max_broken_tables_ratio, 1, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
+    M(UInt64, max_replication_lag_to_enqueue, 50, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
    M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
    M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
    M(Bool, check_consistency, true, "Check consistency of local metadata and metadata in Keeper, do replica recovery on inconsistency", 0) \
--- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp
@ -176,7 +176,7 @@ StoragePtr DatabasePostgreSQL::tryGetTable(const String & table_name, ContextPtr
 }


-StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, bool table_checked) const
+StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr context_, bool table_checked) const
 {
    if (!cache_tables || !cached_tables.contains(table_name))
    {
@ -191,7 +191,8 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr,

        auto storage = std::make_shared<StoragePostgreSQL>(
                StorageID(database_name, table_name), pool, table_name,
-                ColumnsDescription{columns_info->columns}, ConstraintsDescription{}, String{}, configuration.schema, configuration.on_conflict);
+                ColumnsDescription{columns_info->columns}, ConstraintsDescription{}, String{},
+                context_, configuration.schema, configuration.on_conflict);

        if (cache_tables)
        {
--- a/src/Dictionaries/HashedDictionary.cpp
+++ b/src/Dictionaries/HashedDictionary.cpp
@ -9,6 +9,7 @@
 #include <Common/logger_useful.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/MemoryTrackerBlockerInThread.h>

 #include <Core/Defines.h>

@ -21,6 +22,7 @@
 #include <Dictionaries/DictionarySource.h>
 #include <Dictionaries/DictionaryFactory.h>
 #include <Dictionaries/HierarchyDictionariesUtils.h>
+#include <Dictionaries/HashedDictionaryCollectionTraits.h>

 namespace CurrentMetrics
 {
@ -28,24 +30,11 @@ namespace CurrentMetrics
    extern const Metric HashedDictionaryThreadsActive;
 }

-namespace
-{
-
-/// NOTE: Trailing return type is explicitly specified for SFINAE.
-
-/// google::sparse_hash_map
-template <typename T> auto getKeyFromCell(const T & value) -> decltype(value->first) { return value->first; } // NOLINT
-template <typename T> auto getValueFromCell(const T & value) -> decltype(value->second) { return value->second; } // NOLINT
-
-/// HashMap
-template <typename T> auto getKeyFromCell(const T & value) -> decltype(value->getKey()) { return value->getKey(); } // NOLINT
-template <typename T> auto getValueFromCell(const T & value) -> decltype(value->getMapped()) { return value->getMapped(); } // NOLINT
-
-}
-
 namespace DB
 {

+using namespace HashedDictionaryImpl;
+
 namespace ErrorCodes
 {
    extern const int BAD_ARGUMENTS;
@ -80,6 +69,9 @@ public:
            shards_queues[shard].emplace(backlog);
            pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()]
            {
+                /// Do not account memory that was occupied by the dictionaries for the query/user context.
+                MemoryTrackerBlockerInThread memory_blocker;
+
                if (thread_group)
                    CurrentThread::attachToGroupIfDetached(thread_group);
                setThreadName("HashedDictLoad");
@ -238,14 +230,14 @@ HashedDictionary<dictionary_key_type, sparse, sharded>::~HashedDictionary()

        pool.trySchedule([&container, thread_group = CurrentThread::getGroup()]
        {
+            /// Do not account memory that was occupied by the dictionaries for the query/user context.
+            MemoryTrackerBlockerInThread memory_blocker;
+
            if (thread_group)
                CurrentThread::attachToGroupIfDetached(thread_group);
            setThreadName("HashedDictDtor");

-            if constexpr (sparse)
-                container.clear();
-            else
-                container.clearAndShrink();
+            clearContainer(container);
        });

        ++hash_tables_count;
@ -647,6 +639,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
    const auto size = dict_struct.attributes.size();
    attributes.reserve(size);

+    HashTableGrowerWithPrecalculationAndMaxLoadFactor grower(configuration.max_load_factor);
+
    for (const auto & dictionary_attribute : dict_struct.attributes)
    {
        auto type_call = [&, this](const auto & dictionary_attribute_type)
@ -656,8 +650,28 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
            using ValueType = DictionaryValueType<AttributeType>;

            auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
-            Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
-            attributes.emplace_back(std::move(attribute));
+            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
+            {
+                CollectionsHolder<ValueType> collections;
+                collections.reserve(configuration.shards);
+                for (size_t i = 0; i < configuration.shards; ++i)
+                    collections.emplace_back(grower);
+
+                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), std::move(collections)};
+                attributes.emplace_back(std::move(attribute));
+            }
+            else
+            {
+                Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
+                for (auto & container : std::get<CollectionsHolder<ValueType>>(attribute.containers))
+                    container.max_load_factor(configuration.max_load_factor);
+                attributes.emplace_back(std::move(attribute));
+            }
+
+            if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
+                LOG_TRACE(log, "Using builtin hash table for {} attribute", dictionary_attribute.name);
+            else
+                LOG_TRACE(log, "Using sparsehash for {} attribute", dictionary_attribute.name);
        };

        callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
@ -665,7 +679,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()

    if (unlikely(attributes.size()) == 0)
    {
-        no_attributes_containers.resize(configuration.shards);
+        no_attributes_containers.reserve(configuration.shards);
+        for (size_t i = 0; i < configuration.shards; ++i)
+            no_attributes_containers.emplace_back(grower);
    }

    string_arenas.resize(configuration.shards);
@ -834,12 +850,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added
    if (unlikely(attributes_size == 0))
    {
        size_t reserve_size = added_rows + no_attributes_containers.front().size();
-
-        if constexpr (sparse)
-            no_attributes_containers.front().resize(reserve_size);
-        else
-            no_attributes_containers.front().reserve(reserve_size);
-
+        resizeContainer(no_attributes_containers.front(), reserve_size);
        return;
    }

@ -849,11 +860,7 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::resize(size_t added
        {
            auto & container = containers.front();
            size_t reserve_size = added_rows + container.size();
-
-            if constexpr (sparse)
-                container.resize(reserve_size);
-            else
-                container.reserve(reserve_size);
+            resizeContainer(container, reserve_size);
        });
    }
 }
@ -973,25 +980,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
        {
            for (const auto & container : containers)
            {
-                using ContainerType = std::decay_t<decltype(container)>;
-                using AttributeValueType = typename ContainerType::mapped_type;
-
                bytes_allocated += sizeof(container);
-
-                if constexpr (sparse || std::is_same_v<AttributeValueType, Field>)
-                {
-                    /// bucket_count() - Returns table size, that includes empty and deleted
-                    /// size()         - Returns table size, without empty and deleted
-                    /// and since this is sparsehash, empty cells should not be significant,
-                    /// and since items cannot be removed from the dictionary, deleted is also not important.
-                    bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
-                    bucket_count += container.bucket_count();
-                }
-                else
-                {
-                    bytes_allocated += container.getBufferSizeInBytes();
-                    bucket_count += container.getBufferSizeInCells();
-                }
+                bytes_allocated += getBufferSizeInBytes(container);
+                bucket_count += getBufferSizeInCells(container);
            }
        });

@ -1010,17 +1001,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
        for (const auto & container : no_attributes_containers)
        {
            bytes_allocated += sizeof(container);
-
-            if constexpr (sparse)
-            {
-                bytes_allocated += container.size() * (sizeof(KeyType));
-                bucket_count += container.bucket_count();
-            }
-            else
-            {
-                bytes_allocated += container.getBufferSizeInBytes();
-                bucket_count += container.getBufferSizeInCells();
-            }
+            bytes_allocated += getBufferSizeInBytes(container);
+            bucket_count += getBufferSizeInCells(container);
        }
    }

@ -1078,12 +1060,7 @@ Pipe HashedDictionary<dictionary_key_type, sparse, sharded>::read(const Names &
            keys.reserve(keys.size() + container.size());

            for (const auto & key : container)
-            {
-                if constexpr (sparse)
-                    keys.emplace_back(key);
-                else
-                    keys.emplace_back(key.getKey());
-            }
+                keys.emplace_back(getSetKeyFromCell(key));
        }
    }

@ -1192,9 +1169,14 @@ void registerDictionaryHashed(DictionaryFactory & factory)
        if (shard_load_queue_backlog <= 0)
            throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);

+        float max_load_factor = static_cast<float>(config.getDouble(config_prefix + dictionary_layout_prefix + ".max_load_factor", 0.5));
+        if (max_load_factor < 0.5f || max_load_factor > 0.99f)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor);
+
        HashedDictionaryConfiguration configuration{
            static_cast<UInt64>(shards),
            static_cast<UInt64>(shard_load_queue_backlog),
+            max_load_factor,
            require_nonempty,
            dict_lifetime,
        };
--- a/src/Dictionaries/HashedDictionary.h
+++ b/src/Dictionaries/HashedDictionary.h
@ -4,17 +4,14 @@
 #include <memory>
 #include <variant>
 #include <optional>
-#include <sparsehash/sparse_hash_map>
-#include <sparsehash/sparse_hash_set>

-#include <Common/HashTable/HashMap.h>
-#include <Common/HashTable/HashSet.h>
 #include <Core/Block.h>

 #include <Dictionaries/DictionaryStructure.h>
 #include <Dictionaries/IDictionary.h>
 #include <Dictionaries/IDictionarySource.h>
 #include <Dictionaries/DictionaryHelpers.h>
+#include <Dictionaries/HashedDictionaryCollectionType.h>

 /** This dictionary stores all content in a hash table in memory
  * (a separate Key -> Value map for each attribute)
@ -28,6 +25,7 @@ struct HashedDictionaryConfiguration
 {
    const UInt64 shards;
    const UInt64 shard_load_queue_backlog;
+    const float max_load_factor;
    const bool require_nonempty;
    const DictionaryLifetime lifetime;
 };
@ -136,42 +134,7 @@ public:

 private:
    template <typename Value>
-    using CollectionTypeNonSparse = std::conditional_t<
-        dictionary_key_type == DictionaryKeyType::Simple,
-        HashMap<UInt64, Value, DefaultHash<UInt64>>,
-        HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
-
-    using NoAttributesCollectionTypeNonSparse = std::conditional_t<
-        dictionary_key_type == DictionaryKeyType::Simple,
-        HashSet<UInt64, DefaultHash<UInt64>>,
-        HashSetWithSavedHash<StringRef, DefaultHash<StringRef>>>;
-
-    /// Here we use sparse_hash_map with DefaultHash<> for the following reasons:
-    ///
-    /// - DefaultHash<> is used for HashMap
-    /// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
-    ///   in case of sequential set of keys, but with random access to this set, i.e.
-    ///
-    ///       SELECT number FROM numbers(3000000) ORDER BY rand()
-    ///
-    ///   And even though std::hash<> works better in some other cases,
-    ///   DefaultHash<> is preferred since the difference for this particular
-    ///   case is significant, i.e. it can be 10x+.
-    template <typename Value>
-    using CollectionTypeSparse = std::conditional_t<
-        dictionary_key_type == DictionaryKeyType::Simple,
-        google::sparse_hash_map<UInt64, Value, DefaultHash<KeyType>>,
-        google::sparse_hash_map<StringRef, Value, DefaultHash<KeyType>>>;
-
-    using NoAttributesCollectionTypeSparse = google::sparse_hash_set<KeyType, DefaultHash<KeyType>>;
-
-    template <typename Value>
-    using CollectionType = std::conditional_t<sparse, CollectionTypeSparse<Value>, CollectionTypeNonSparse<Value>>;
-
-    template <typename Value>
-    using CollectionsHolder = std::vector<CollectionType<Value>>;
-
-    using NoAttributesCollectionType = std::conditional_t<sparse, NoAttributesCollectionTypeSparse, NoAttributesCollectionTypeNonSparse>;
+    using CollectionsHolder = std::vector<typename HashedDictionaryImpl::HashedDictionaryMapType<dictionary_key_type, sparse, KeyType, Value>::Type>;

    using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
    using NullableSets = std::vector<NullableSet>;
@ -269,7 +232,7 @@ private:

    BlockPtr update_field_loaded_block;
    std::vector<std::unique_ptr<Arena>> string_arenas;
-    std::vector<NoAttributesCollectionType> no_attributes_containers;
+    std::vector<typename HashedDictionaryImpl::HashedDictionarySetType<dictionary_key_type, sparse, KeyType>::Type> no_attributes_containers;
    DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
 };

--- a/src/Dictionaries/HashedDictionaryCollectionTraits.h
+++ b/src/Dictionaries/HashedDictionaryCollectionTraits.h
@ -0,0 +1,107 @@
+#pragma once
+
+#include <type_traits>
+#include <sparsehash/sparse_hash_map>
+#include <Common/HashTable/Hash.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
+#include <Common/HashTable/PackedHashMap.h>
+
+namespace DB
+{
+
+namespace HashedDictionaryImpl
+{
+
+/// sparse_hash_map/sparse_hash_set
+template <typename C>
+concept IsGoogleSparseHashTable = std::is_same_v<C, google::sparse_hash_map<
+    typename C::key_type,
+    typename C::mapped_type,
+    /// HashFcn is not exported in sparse_hash_map is public type
+    DefaultHash<typename C::key_type>>>;
+
+template <typename V>
+concept IsStdMapCell = requires (V v)
+{
+    v->first;
+    v->second;
+};
+
+/// HashMap/HashMapWithSavedHash/HashSet/HashMapWithSavedHash/PackedHashMap and their Cells
+template <typename C>
+concept IsBuiltinHashTable = (
+    std::is_same_v<C, HashMapWithSavedHash<
+        typename C::key_type,
+        typename C::mapped_type,
+        DefaultHash<typename C::key_type>,
+        typename C::grower_type>> ||
+    std::is_same_v<C, HashMap<
+        typename C::key_type,
+        typename C::mapped_type,
+        DefaultHash<typename C::key_type>,
+        typename C::grower_type>> ||
+    std::is_same_v<C, PackedHashMap<
+        typename C::key_type,
+        typename C::mapped_type,
+        DefaultHash<typename C::key_type>,
+        typename C::grower_type>> ||
+    std::is_same_v<C, HashSetWithSavedHash<
+        typename C::key_type,
+        DefaultHash<typename C::key_type>,
+        typename C::grower_type>> ||
+    std::is_same_v<C, HashSet<
+        typename C::key_type,
+        DefaultHash<typename C::key_type>,
+        typename C::grower_type>>
+);
+
+template <typename V>
+concept IsBuiltinSetCell = requires (V v)
+{
+    v.getKey();
+};
+
+template <typename V>
+concept IsBuiltinMapCell = requires (V v)
+{
+    v->getKey();
+    v->getMapped();
+};
+
+// NOLINTBEGIN(*)
+
+/// google::sparse_hash_map
+template <typename T> auto getSetKeyFromCell(const T & value) { return value; }
+template <typename T> auto getKeyFromCell(const T & value) requires (IsStdMapCell<T>) { return value->first; }
+template <typename T> auto getValueFromCell(const T & value) requires (IsStdMapCell<T>) { return value->second; }
+
+/// size() - returns table size, without empty and deleted
+/// and since this is sparsehash, empty cells should not be significant,
+/// and since items cannot be removed from the dictionary, deleted is also not important.
+///
+/// NOTE: for google::sparse_hash_set value_type is Key, for sparse_hash_map
+/// value_type is std::pair<Key, Value>, and now we correctly takes into
+/// account padding in structures, if any.
+template <typename C> auto getBufferSizeInBytes(const C & c) requires (IsGoogleSparseHashTable<C>) { return c.size() * sizeof(typename C::value_type); }
+/// bucket_count() - Returns table size, that includes empty and deleted
+template <typename C> auto getBufferSizeInCells(const C & c) requires (IsGoogleSparseHashTable<C>) { return c.bucket_count(); }
+
+template <typename C> auto resizeContainer(C & c, size_t size) requires (IsGoogleSparseHashTable<C>) { return c.resize(size); }
+template <typename C> auto clearContainer(C & c) requires (IsGoogleSparseHashTable<C>) { return c.clear(); }
+
+/// HashMap
+template <typename T> auto getSetKeyFromCell(const T & value) requires (IsBuiltinSetCell<T>) { return value.getKey(); }
+template <typename T> auto getKeyFromCell(const T & value) requires (IsBuiltinMapCell<T>) { return value->getKey(); }
+template <typename T> auto getValueFromCell(const T & value) requires (IsBuiltinMapCell<T>) { return value->getMapped(); }
+
+template <typename C> auto getBufferSizeInBytes(const C & c) requires (IsBuiltinHashTable<C>) { return c.getBufferSizeInBytes(); }
+template <typename C> auto getBufferSizeInCells(const C & c) requires (IsBuiltinHashTable<C>) { return c.getBufferSizeInCells(); }
+template <typename C> auto resizeContainer(C & c, size_t size) requires (IsBuiltinHashTable<C>) { return c.reserve(size); }
+template <typename C> void clearContainer(C & c) requires (IsBuiltinHashTable<C>) { return c.clearAndShrink(); }
+
+// NOLINTEND(*)
+
+}
+
+}
--- a/src/Dictionaries/HashedDictionaryCollectionType.h
+++ b/src/Dictionaries/HashedDictionaryCollectionType.h
@ -0,0 +1,262 @@
+#pragma once
+
+#include <Dictionaries/IDictionary.h>
+#include <Common/HashTable/PackedHashMap.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/HashTable/HashSet.h>
+#include <Core/Types_fwd.h>
+#include <sparsehash/sparse_hash_map>
+#include <sparsehash/sparse_hash_set>
+#include <type_traits>
+
+namespace DB
+{
+
+namespace HashedDictionaryImpl
+{
+
+/// Return true if the type is POD [1] for the purpose of layout (this is not
+/// the same as STL traits has).
+///
+///   [1]: https://stackoverflow.com/questions/4178175/what-are-aggregates-and-pods-and-how-why-are-they-special/4178176#4178176
+///
+/// The behaviour had been change in clang-16, see this for more details:
+/// - https://github.com/llvm/llvm-project/commit/a8b0c6fa28acced71db33e80bd0b51d00422035b
+/// - https://github.com/llvm/llvm-project/commit/277123376ce08c98b07c154bf83e4092a5d4d3c6
+/// - https://github.com/llvm/llvm-project/issues/62422
+/// - https://github.com/llvm/llvm-project/issues/62353
+/// - https://github.com/llvm/llvm-project/issues/62358
+template <typename V>
+constexpr bool isPodLayout()
+{
+    if constexpr (std::is_same_v<V, UUID>)
+        return false;
+    if constexpr (std::is_same_v<V, DateTime64>)
+        return false;
+    if constexpr (std::is_same_v<V, Decimal32> || std::is_same_v<V, Decimal64> || std::is_same_v<V, Decimal128> || std::is_same_v<V, Decimal256>)
+        return false;
+    if constexpr (std::is_same_v<V, StringRef>)
+        return false;
+    if constexpr (std::is_same_v<V, IPv6> || std::is_same_v<V, IPv4>)
+        return false;
+    return true;
+}
+
+/// HashMap with packed structure is better than google::sparse_hash_map if the
+/// <K, V> pair is small, for the sizeof(std::pair<K, V>) == 16, RSS for hash
+/// table with 1e9 elements will be:
+///
+/// - google::sparse_hash_map             : 26GiB
+/// - HashMap                             : 35GiB
+/// - PackedHashMap                       : 22GiB
+/// - google::sparse_hash_map<packed_pair>: 17GiB
+///
+/// Also note here sizeof(std::pair<>) was used since google::sparse_hash_map
+/// uses it to store <K, V>, yes we can modify google::sparse_hash_map to work
+/// with packed analog of std::pair, but the allocator overhead is still
+/// significant, because of tons of reallocations (and those cannot be solved
+/// with reserve() due to some internals of google::sparse_hash_map) and poor
+/// jemalloc support of such pattern, which results in 33% fragmentation (in
+/// comparison with glibc).
+///
+/// Plus since google::sparse_hash_map cannot use packed structure, it will
+/// have the same memory footprint for everything from UInt8 to UInt64 values
+/// and so on.
+///
+/// Returns true hen google::sparse_hash_map should be used, otherwise
+/// PackedHashMap should be used instead.
+template <typename K, typename V>
+constexpr bool useSparseHashForHashedDictionary()
+{
+    if constexpr (!isPodLayout<K>())
+        return true;
+    if constexpr (!isPodLayout<V>())
+        return true;
+    /// NOTE: One should not use PackedPairNoInit<K, V> here since this will
+    /// create instantion of this type, and it could be illformed.
+    return sizeof(V) > 8;
+}
+
+/// Grower with custom fill limit/load factor (instead of default 50%).
+///
+/// Based on HashTableGrowerWithPrecalculation
+template <size_t initial_size_degree = 8>
+class alignas(64) HashTableGrowerWithPrecalculationAndMaxLoadFactor
+{
+    UInt8 size_degree = initial_size_degree;
+    size_t precalculated_mask = (1ULL << initial_size_degree) - 1;
+    size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1);
+    float max_load_factor = 0.5;
+    /// HashTableGrowerWithPrecalculation has 23, but to decrease memory usage
+    /// at least slightly 19 is used here. Also note, that for dictionaries it
+    /// is not that important since they are not that frequently loaded.
+    static constexpr size_t max_size_degree_quadratic = 19;
+
+public:
+    static constexpr auto initial_count = 1ULL << initial_size_degree;
+
+    /// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
+    static constexpr auto performs_linear_probing_with_single_step = true;
+
+    HashTableGrowerWithPrecalculationAndMaxLoadFactor() = default;
+    explicit HashTableGrowerWithPrecalculationAndMaxLoadFactor(float max_load_factor_)
+        : max_load_factor(max_load_factor_)
+    {
+        increaseSizeDegree(0);
+    }
+
+    UInt8 sizeDegree() const { return size_degree; }
+
+    void increaseSizeDegree(UInt8 delta)
+    {
+        size_degree += delta;
+        precalculated_mask = (1ULL << size_degree) - 1;
+        precalculated_max_fill = static_cast<size_t>((1ULL << size_degree) * max_load_factor);
+    }
+
+    /// The size of the hash table in the cells.
+    size_t bufSize() const { return 1ULL << size_degree; }
+
+    /// From the hash value, get the cell number in the hash table.
+    size_t place(size_t x) const { return x & precalculated_mask; }
+
+    /// The next cell in the collision resolution chain.
+    size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; }
+
+    /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
+    bool overflow(size_t elems) const { return elems > precalculated_max_fill; }
+
+    /// Increase the size of the hash table.
+    void increaseSize() { increaseSizeDegree(size_degree >= max_size_degree_quadratic ? 1 : 2); }
+
+    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
+    void set(size_t num_elems)
+    {
+        if (num_elems <= 1)
+            size_degree = initial_size_degree;
+        else if (initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
+            size_degree = initial_size_degree;
+        else
+        {
+            /// Slightly more optimal than HashTableGrowerWithPrecalculation
+            /// and takes into account max_load_factor.
+            size_degree = static_cast<size_t>(log2(num_elems - 1)) + 1;
+            if ((1ULL << size_degree) * max_load_factor < num_elems)
+                ++size_degree;
+        }
+        increaseSizeDegree(0);
+    }
+
+    void setBufSize(size_t buf_size_)
+    {
+        size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
+        increaseSizeDegree(0);
+    }
+};
+static_assert(sizeof(HashTableGrowerWithPrecalculationAndMaxLoadFactor<>) == 64);
+
+/// Above goes various specialisations for the hash table that will be used for
+/// HASHED/SPARSE_HASHED dictionary, it could use one of the following depends
+/// on the layout of the dictionary and types of key/value (for more info see
+/// comments in this file):
+/// - HashMap
+/// - HashSet
+/// - HashMapWithSavedHash
+/// - HashSetWithSavedHash
+/// - PackedHashMap
+/// - google::sparse_hash_map
+
+///
+/// Map (dictionary with attributes)
+///
+
+/// Type of the hash table for the dictionary.
+template <DictionaryKeyType dictionary_key_type, bool sparse, typename Key, typename Value>
+struct HashedDictionaryMapType;
+
+/// Default implementation using builtin HashMap (for HASHED layout).
+template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
+struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ false, Key, Value>
+{
+    using Type = std::conditional_t<
+        dictionary_key_type == DictionaryKeyType::Simple,
+        HashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>,
+        HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>;
+};
+
+/// Implementations for SPARSE_HASHED layout.
+template <DictionaryKeyType dictionary_key_type, typename Key, typename Value, bool use_sparse_hash>
+struct HashedDictionarySparseMapType;
+
+/// Implementation based on google::sparse_hash_map for SPARSE_HASHED.
+template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
+struct HashedDictionarySparseMapType<dictionary_key_type, Key, Value, /* use_sparse_hash= */ true>
+{
+    /// Here we use sparse_hash_map with DefaultHash<> for the following reasons:
+    ///
+    /// - DefaultHash<> is used for HashMap
+    /// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
+    ///   in case of sequential set of keys, but with random access to this set, i.e.
+    ///
+    ///       SELECT number FROM numbers(3000000) ORDER BY rand()
+    ///
+    ///   And even though std::hash<> works better in some other cases,
+    ///   DefaultHash<> is preferred since the difference for this particular
+    ///   case is significant, i.e. it can be 10x+.
+    using Type = std::conditional_t<
+        dictionary_key_type == DictionaryKeyType::Simple,
+        google::sparse_hash_map<UInt64, Value, DefaultHash<Key>>,
+        google::sparse_hash_map<StringRef, Value, DefaultHash<Key>>>;
+};
+
+/// Implementation based on PackedHashMap for SPARSE_HASHED.
+template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
+struct HashedDictionarySparseMapType<dictionary_key_type, Key, Value, /* use_sparse_hash= */ false>
+{
+    using Type = std::conditional_t<
+        dictionary_key_type == DictionaryKeyType::Simple,
+        PackedHashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>,
+        PackedHashMap<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>;
+};
+template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
+struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ true, Key, Value>
+    : public HashedDictionarySparseMapType<
+        dictionary_key_type, Key, Value,
+        /* use_sparse_hash= */ useSparseHashForHashedDictionary<Key, Value>()>
+{};
+
+///
+/// Set (dictionary with attributes)
+///
+
+/// Type of the hash table for the dictionary.
+template <DictionaryKeyType dictionary_key_type, bool sparse, typename Key>
+struct HashedDictionarySetType;
+
+/// Default implementation using builtin HashMap (for HASHED layout).
+template <DictionaryKeyType dictionary_key_type, typename Key>
+struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ false, Key>
+{
+    using Type = std::conditional_t<
+        dictionary_key_type == DictionaryKeyType::Simple,
+        HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>,
+        HashSetWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>;
+};
+
+/// Implementation for SPARSE_HASHED.
+///
+/// NOTE: There is no implementation based on google::sparse_hash_set since
+/// PackedHashMap is more optimal anyway (see comments for
+/// useSparseHashForHashedDictionary()).
+template <DictionaryKeyType dictionary_key_type, typename Key>
+struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ true, Key>
+{
+    using Type = std::conditional_t<
+        dictionary_key_type == DictionaryKeyType::Simple,
+        HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>,
+        HashSet<StringRef, DefaultHash<StringRef>, HashTableGrowerWithPrecalculationAndMaxLoadFactor<>>>;
+};
+
+}
+
+}
--- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
+++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp
@ -156,11 +156,11 @@ void buildLayoutConfiguration(

        const auto value_field = value_literal->value;

-        if (value_field.getType() != Field::Types::UInt64 && value_field.getType() != Field::Types::String)
+        if (value_field.getType() != Field::Types::UInt64 && value_field.getType() != Field::Types::Float64 && value_field.getType() != Field::Types::String)
        {
            throw DB::Exception(
                ErrorCodes::BAD_ARGUMENTS,
-                "Dictionary layout parameter value must be an UInt64 or String, got '{}' instead",
+                "Dictionary layout parameter value must be an UInt64, Float64 or String, got '{}' instead",
                value_field.getTypeName());
        }

--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp
@ -26,7 +26,7 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage(
    size_t max_single_part_upload_size_,
    size_t buf_size_,
    const WriteSettings & write_settings_)
-    : BufferWithOwnMemory<WriteBuffer>(buf_size_, nullptr, 0)
+    : WriteBufferFromFileBase(buf_size_, nullptr, 0)
    , log(&Poco::Logger::get("WriteBufferFromAzureBlobStorage"))
    , max_single_part_upload_size(max_single_part_upload_size_)
    , blob_path(blob_path_)
--- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
+++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h
@ -6,7 +6,7 @@

 #include <memory>

-#include <IO/BufferWithOwnMemory.h>
+#include <IO/WriteBufferFromFileBase.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteSettings.h>
 #include <azure/storage/blobs.hpp>
@ -21,7 +21,7 @@ class Logger;
 namespace DB
 {

-class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory<WriteBuffer>
+class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase
 {
 public:
    using AzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
@ -37,6 +37,9 @@ public:

    void nextImpl() override;

+    std::string getFileName() const override { return blob_path; }
+    void sync() override { next(); }
+
 private:
    void finalizeImpl() override;
    void execWithRetry(std::function<void()> func, size_t num_tries, size_t cost = 0);
--- a/src/Disks/IO/WriteBufferWithFinalizeCallback.cpp
+++ b/src/Disks/IO/WriteBufferWithFinalizeCallback.cpp
@ -1,11 +1,11 @@
-#include "WriteIndirectBufferFromRemoteFS.h"
+#include "WriteBufferWithFinalizeCallback.h"

 namespace DB
 {

-WriteIndirectBufferFromRemoteFS::WriteIndirectBufferFromRemoteFS(
+WriteBufferWithFinalizeCallback::WriteBufferWithFinalizeCallback(
    std::unique_ptr<WriteBuffer> impl_,
-    CreateMetadataCallback && create_callback_,
+    FinalizeCallback && create_callback_,
    const String & remote_path_)
    : WriteBufferFromFileDecorator(std::move(impl_))
    , create_metadata_callback(std::move(create_callback_))
@ -14,7 +14,7 @@ WriteIndirectBufferFromRemoteFS::WriteIndirectBufferFromRemoteFS(
 }


-WriteIndirectBufferFromRemoteFS::~WriteIndirectBufferFromRemoteFS()
+WriteBufferWithFinalizeCallback::~WriteBufferWithFinalizeCallback()
 {
    try
    {
@ -26,7 +26,7 @@ WriteIndirectBufferFromRemoteFS::~WriteIndirectBufferFromRemoteFS()
    }
 }

-void WriteIndirectBufferFromRemoteFS::finalizeImpl()
+void WriteBufferWithFinalizeCallback::finalizeImpl()
 {
    WriteBufferFromFileDecorator::finalizeImpl();
    if (create_metadata_callback)
--- a/src/Disks/IO/WriteBufferWithFinalizeCallback.h
+++ b/src/Disks/IO/WriteBufferWithFinalizeCallback.h
@ -8,25 +8,25 @@
 namespace DB
 {

-using CreateMetadataCallback = std::function<void(size_t bytes_count)>;
+using FinalizeCallback = std::function<void(size_t bytes_count)>;

 /// Stores data in S3/HDFS and adds the object path and object size to metadata file on local FS.
-class WriteIndirectBufferFromRemoteFS final : public WriteBufferFromFileDecorator
+class WriteBufferWithFinalizeCallback final : public WriteBufferFromFileDecorator
 {
 public:
-    WriteIndirectBufferFromRemoteFS(
+    WriteBufferWithFinalizeCallback(
        std::unique_ptr<WriteBuffer> impl_,
-        CreateMetadataCallback && create_callback_,
+        FinalizeCallback && create_callback_,
        const String & remote_path_);

-    ~WriteIndirectBufferFromRemoteFS() override;
+    ~WriteBufferWithFinalizeCallback() override;

    String getFileName() const override { return remote_path; }

 private:
    void finalizeImpl() override;

-    CreateMetadataCallback create_metadata_callback;
+    FinalizeCallback create_metadata_callback;
    String remote_path;
 };

--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp
@ -129,7 +129,6 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO
    const StoredObject & object,
    WriteMode mode,
    std::optional<ObjectAttributes>,
-    FinalizeCallback && finalize_callback,
    size_t buf_size,
    const WriteSettings & write_settings)
 {
@ -138,14 +137,12 @@ std::unique_ptr<WriteBufferFromFileBase> AzureObjectStorage::writeObject( /// NO

    LOG_TEST(log, "Writing file: {}", object.remote_path);

-    auto buffer = std::make_unique<WriteBufferFromAzureBlobStorage>(
+    return std::make_unique<WriteBufferFromAzureBlobStorage>(
        client.get(),
        object.remote_path,
        settings.get()->max_single_part_upload_size,
        buf_size,
        patchSettings(write_settings));
-
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(buffer), std::move(finalize_callback), object.remote_path);
 }

 void AzureObjectStorage::findAllFiles(const std::string & path, RelativePathsWithSize & children, int max_keys) const
--- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
+++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h
@ -7,7 +7,6 @@
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
 #include <Disks/ObjectStorages/IObjectStorage.h>
 #include <Common/MultiVersion.h>

@ -83,7 +82,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp
@ -97,13 +97,12 @@ std::unique_ptr<WriteBufferFromFileBase> CachedObjectStorage::writeObject( /// N
    const StoredObject & object,
    WriteMode mode, // Cached doesn't support append, only rewrite
    std::optional<ObjectAttributes> attributes,
-    FinalizeCallback && finalize_callback,
    size_t buf_size,
    const WriteSettings & write_settings)
 {
    /// Add cache relating settings to WriteSettings.
    auto modified_write_settings = IObjectStorage::patchSettings(write_settings);
-    auto implementation_buffer = object_storage->writeObject(object, mode, attributes, std::move(finalize_callback), buf_size, modified_write_settings);
+    auto implementation_buffer = object_storage->writeObject(object, mode, attributes, buf_size, modified_write_settings);

    bool cache_on_write = modified_write_settings.enable_filesystem_cache_on_write_operations
        && FileCacheFactory::instance().getByName(cache_config_name).settings.cache_on_write_operations
--- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
+++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h
@ -43,7 +43,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@ -1,5 +1,6 @@
 #include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
 #include <Disks/ObjectStorages/DiskObjectStorage.h>
+#include <Disks/IO/WriteBufferWithFinalizeCallback.h>
 #include <Common/checkStackSize.h>
 #include <ranges>
 #include <Common/logger_useful.h>
@ -658,14 +659,16 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorageTransaction::writeFile

    operations_to_execute.emplace_back(std::move(write_operation));

-    /// We always use mode Rewrite because we simulate append using metadata and different files
-    return object_storage.writeObject(
+    auto impl = object_storage.writeObject(
        object,
+        /// We always use mode Rewrite because we simulate append using metadata and different files
        WriteMode::Rewrite,
        object_attributes,
-        std::move(create_metadata_callback),
        buf_size,
        settings);
+
+    return std::make_unique<WriteBufferWithFinalizeCallback>(
+        std::move(impl), std::move(create_metadata_callback), object.remote_path);
 }


--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp
@ -9,7 +9,6 @@
 #include <Storages/HDFS/ReadBufferFromHDFS.h>
 #include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Common/getRandomASCIIString.h>

@ -83,7 +82,6 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
    const StoredObject & object,
    WriteMode mode,
    std::optional<ObjectAttributes> attributes,
-    FinalizeCallback && finalize_callback,
    size_t buf_size,
    const WriteSettings & write_settings)
 {
@ -93,11 +91,9 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
            "HDFS API doesn't support custom attributes/metadata for stored objects");

    /// Single O_WRONLY in libhdfs adds O_TRUNC
-    auto hdfs_buffer = std::make_unique<WriteBufferFromHDFS>(
+    return std::make_unique<WriteBufferFromHDFS>(
        object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size,
        mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND);
-
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(std::move(hdfs_buffer), std::move(finalize_callback), object.remote_path);
 }


--- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
+++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h
@ -81,7 +81,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Disks/ObjectStorages/IObjectStorage.h
+++ b/src/Disks/ObjectStorages/IObjectStorage.h
@ -48,8 +48,6 @@ struct ObjectMetadata
    std::optional<ObjectAttributes> attributes;
 };

-using FinalizeCallback = std::function<void(size_t bytes_count)>;
-
 /// Base class for all object storages which implement some subset of ordinary filesystem operations.
 ///
 /// Examples of object storages are S3, Azure Blob Storage, HDFS.
@ -119,7 +117,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) = 0;

--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp
@ -7,7 +7,6 @@
 #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/IO/createReadBufferFromFileBase.h>
-#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
 #include <IO/SeekAvoidingReadBuffer.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/copyData.h>
@ -124,7 +123,6 @@ std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NO
    const StoredObject & object,
    WriteMode mode,
    std::optional<ObjectAttributes> /* attributes */,
-    FinalizeCallback && finalize_callback,
    size_t buf_size,
    const WriteSettings & /* write_settings */)
 {
@ -132,9 +130,7 @@ std::unique_ptr<WriteBufferFromFileBase> LocalObjectStorage::writeObject( /// NO
        throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files");

    LOG_TEST(log, "Write object: {}", object.remote_path);
-    auto impl = std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(
-        std::move(impl), std::move(finalize_callback), object.remote_path);
+    return std::make_unique<WriteBufferFromFile>(object.remote_path, buf_size);
 }

 void LocalObjectStorage::removeObject(const StoredObject & object)
--- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
+++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h
@ -41,7 +41,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@ -8,7 +8,6 @@
 #include <Disks/ObjectStorages/DiskObjectStorageCommon.h>
 #include <Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
 #include <IO/WriteBufferFromS3.h>
 #include <IO/ReadBufferFromS3.h>
@ -160,8 +159,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
    const StoredObject & object,
    WriteMode mode, // S3 doesn't support append, only rewrite
    std::optional<ObjectAttributes> attributes,
-    FinalizeCallback && finalize_callback,
-    size_t buf_size [[maybe_unused]],
+    size_t buf_size,
    const WriteSettings & write_settings)
 {
    WriteSettings disk_write_settings = IObjectStorage::patchSettings(write_settings);
@ -174,17 +172,15 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
    if (write_settings.s3_allow_parallel_part_upload)
        scheduler = threadPoolCallbackRunner<void>(getThreadPoolWriter(), "VFSWrite");

-    auto s3_buffer = std::make_unique<WriteBufferFromS3>(
+    return std::make_unique<WriteBufferFromS3>(
        client.get(),
        bucket,
        object.remote_path,
+        buf_size,
        settings_ptr->request_settings,
        attributes,
        std::move(scheduler),
        disk_write_settings);
-
-    return std::make_unique<WriteIndirectBufferFromRemoteFS>(
-        std::move(s3_buffer), std::move(finalize_callback), object.remote_path);
 }

 void S3ObjectStorage::findAllFiles(const std::string & path, RelativePathsWithSize & children, int max_keys) const
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h
@ -97,7 +97,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp
@ -9,7 +9,6 @@
 #include <IO/WriteHelpers.h>

 #include <Disks/IO/ReadIndirectBufferFromRemoteFS.h>
-#include <Disks/IO/WriteIndirectBufferFromRemoteFS.h>
 #include <Disks/IO/ReadBufferFromRemoteFSGather.h>
 #include <Disks/IO/ReadBufferFromWebServer.h>
 #include <Disks/IO/ThreadPoolRemoteFSReader.h>
@ -211,7 +210,6 @@ std::unique_ptr<WriteBufferFromFileBase> WebObjectStorage::writeObject( /// NOLI
    const StoredObject & /* object */,
    WriteMode /* mode */,
    std::optional<ObjectAttributes> /* attributes */,
-    FinalizeCallback && /* finalize_callback */,
    size_t /* buf_size */,
    const WriteSettings & /* write_settings */)
 {
--- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h
+++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h
@ -51,7 +51,6 @@ public:
        const StoredObject & object,
        WriteMode mode,
        std::optional<ObjectAttributes> attributes = {},
-        FinalizeCallback && finalize_callback = {},
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        const WriteSettings & write_settings = {}) override;

--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -122,6 +122,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
    format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
    format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
+    format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types;
    format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
    format_settings.pretty.color = settings.output_format_pretty_color;
    format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -220,6 +220,7 @@ struct FormatSettings
        UInt64 max_block_size = 8192;
        ParquetVersion output_version;
        ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
+        bool output_compliant_nested_types = true;
    } parquet;

    struct Pretty
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@ -591,7 +591,7 @@ template <> struct CompileOp<NotEqualsOp>
 {
    static llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * x, llvm::Value * y, bool /*is_signed*/)
    {
-        return x->getType()->isIntegerTy() ? b.CreateICmpNE(x, y) : b.CreateFCmpONE(x, y);
+        return x->getType()->isIntegerTy() ? b.CreateICmpNE(x, y) : b.CreateFCmpUNE(x, y);
    }
 };

--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -145,13 +145,6 @@ struct ConvertImpl
        using ColVecFrom = typename FromDataType::ColumnType;
        using ColVecTo = typename ToDataType::ColumnType;

-        if (std::is_same_v<Name, NameToUnixTimestamp>)
-        {
-            if (isDateOrDate32(named_from.type))
-                throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal type {} of first argument of function {}",
-                    named_from.type->getName(), Name::name);
-        }
-
        if constexpr ((IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
            && !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>))
        {
@ -306,6 +299,8 @@ struct ConvertImpl
                        {
                            if constexpr (std::is_same_v<ToDataType, DataTypeIPv4> && std::is_same_v<FromDataType, DataTypeUInt64>)
                                vec_to[i] = static_cast<ToFieldType>(static_cast<IPv4::UnderlyingType>(vec_from[i]));
+                            else if constexpr (std::is_same_v<Name, NameToUnixTimestamp> && (std::is_same_v<FromDataType, DataTypeDate> || std::is_same_v<FromDataType, DataTypeDate32>))
+                                vec_to[i] = static_cast<ToFieldType>(vec_from[i] * DATE_SECONDS_PER_DAY);
                            else
                                vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
                        }
--- a/src/Functions/keyvaluepair/impl/NeedleFactory.h
+++ b/src/Functions/keyvaluepair/impl/NeedleFactory.h
@ -38,7 +38,7 @@ public:
        return SearchSymbols {std::string{needles.data(), needles.size()}};
    }

-    SearchSymbols getReadNeedles(const Configuration & extractor_configuration)
+    SearchSymbols getReadKeyNeedles(const Configuration & extractor_configuration)
    {
        const auto & [key_value_delimiter, quoting_character, pair_delimiters]
            = extractor_configuration;
@ -57,6 +57,26 @@ public:

        return SearchSymbols {std::string{needles.data(), needles.size()}};
    }
+
+    SearchSymbols getReadValueNeedles(const Configuration & extractor_configuration)
+    {
+        const auto & [key_value_delimiter, quoting_character, pair_delimiters]
+            = extractor_configuration;
+
+        std::vector<char> needles;
+
+        needles.push_back(quoting_character);
+
+        std::copy(pair_delimiters.begin(), pair_delimiters.end(), std::back_inserter(needles));
+
+        if constexpr (WITH_ESCAPING)
+        {
+            needles.push_back('\\');
+        }
+
+        return SearchSymbols {std::string{needles.data(), needles.size()}};
+    }
+
    SearchSymbols getReadQuotedNeedles(const Configuration & extractor_configuration)
    {
        const auto quoting_character = extractor_configuration.quoting_character;
--- a/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
+++ b/src/Functions/keyvaluepair/impl/StateHandlerImpl.h
@ -41,7 +41,8 @@ public:
        NeedleFactory<WITH_ESCAPING> needle_factory;

        wait_needles = needle_factory.getWaitNeedles(configuration);
-        read_needles = needle_factory.getReadNeedles(configuration);
+        read_key_needles = needle_factory.getReadKeyNeedles(configuration);
+        read_value_needles = needle_factory.getReadValueNeedles(configuration);
        read_quoted_needles = needle_factory.getReadQuotedNeedles(configuration);
    }

@ -77,7 +78,7 @@ public:

        size_t pos = 0;

-        while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_needles))
+        while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_key_needles))
        {
            auto character_position = p - file.begin();
            size_t next_pos = character_position + 1u;
@ -191,10 +192,6 @@ public:
            {
                return {pos + 1u, State::READING_QUOTED_VALUE};
            }
-            else if (isKeyValueDelimiter(current_character))
-            {
-                return {pos, State::WAITING_KEY};
-            }

            if constexpr (WITH_ESCAPING)
            {
@ -218,7 +215,7 @@ public:

        size_t pos = 0;

-        while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_needles))
+        while (const auto * p = find_first_symbols_or_null({file.begin() + pos, file.end()}, read_value_needles))
        {
            const size_t character_position = p - file.begin();
            size_t next_pos = character_position + 1u;
@ -237,10 +234,6 @@ public:
                    }
                }
            }
-            else if (isKeyValueDelimiter(*p))
-            {
-                return {next_pos, State::WAITING_KEY};
-            }
            else if (isPairDelimiter(*p))
            {
                value.append(file.begin() + pos, file.begin() + character_position);
@ -300,7 +293,8 @@ public:

 private:
    SearchSymbols wait_needles;
-    SearchSymbols read_needles;
+    SearchSymbols read_key_needles;
+    SearchSymbols read_value_needles;
    SearchSymbols read_quoted_needles;

    /*
--- a/src/IO/S3/copyS3File.cpp
+++ b/src/IO/S3/copyS3File.cpp
@ -15,6 +15,10 @@

 namespace ProfileEvents
 {
+    extern const Event WriteBufferFromS3Bytes;
+    extern const Event WriteBufferFromS3Microseconds;
+    extern const Event WriteBufferFromS3RequestsErrors;
+
    extern const Event S3CreateMultipartUpload;
    extern const Event S3CompleteMultipartUpload;
    extern const Event S3PutObject;
@ -135,7 +139,10 @@ namespace
                LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", dest_bucket, dest_key, multipart_upload_id);
            }
            else
+            {
+                ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
                throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
+            }
        }

        void completeMultipartUpload()
@ -184,7 +191,7 @@ namespace
                    LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", dest_bucket, dest_key, multipart_upload_id, part_tags.size());
                    continue; /// will retry
                }
-
+                ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
                throw S3Exception(
                    outcome.GetError().GetErrorType(),
                    "Message: {}, Key: {}, Bucket: {}, Tags: {}",
@ -228,7 +235,12 @@ namespace
                    size_t next_position = std::min(position + normal_part_size, end_position);
                    size_t part_size = next_position - position; /// `part_size` is either `normal_part_size` or smaller if it's the final part.

+                    Stopwatch watch;
                    uploadPart(part_number, position, part_size);
+                    watch.stop();
+
+                    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, part_size);
+                    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());

                    position = next_position;
                }
@ -485,16 +497,21 @@ namespace
                if (for_disk_s3)
                    ProfileEvents::increment(ProfileEvents::DiskS3PutObject);

+                Stopwatch watch;
                auto outcome = client_ptr->PutObject(request);
+                watch.stop();

                if (outcome.IsSuccess())
                {
+                    Int64 object_size = request.GetContentLength();
+                    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, object_size);
+                    ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds());
                    LOG_TRACE(
                        log,
                        "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}",
                        dest_bucket,
                        dest_key,
-                        request.GetContentLength());
+                        object_size);
                    break;
                }

@ -523,7 +540,7 @@ namespace
                        request.GetContentLength());
                    continue; /// will retry
                }
-
+                ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
                throw S3Exception(
                    outcome.GetError().GetErrorType(),
                    "Message: {}, Key: {}, Bucket: {}, Object size: {}",
@ -567,6 +584,7 @@ namespace
            if (!outcome.IsSuccess())
            {
                abortMultipartUpload();
+                ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1);
                throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
            }

--- a/src/IO/S3/tests/gtest_aws_s3_client.cpp
+++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp
@ -92,6 +92,7 @@ void doWriteRequest(std::shared_ptr<const DB::S3::Client> client, const DB::S3::
        client,
        uri.bucket,
        uri.key,
+        DBMS_DEFAULT_BUFFER_SIZE,
        request_settings
    );

--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -79,11 +79,13 @@ WriteBufferFromS3::WriteBufferFromS3(
    std::shared_ptr<const S3::Client> client_ptr_,
    const String & bucket_,
    const String & key_,
+    size_t buf_size_,
    const S3Settings::RequestSettings & request_settings_,
    std::optional<std::map<String, String>> object_metadata_,
    ThreadPoolCallbackRunner<void> schedule_,
    const WriteSettings & write_settings_)
-    : bucket(bucket_)
+    : WriteBufferFromFileBase(buf_size_, nullptr, 0)
+    , bucket(bucket_)
    , key(key_)
    , request_settings(request_settings_)
    , upload_settings(request_settings.getUploadSettings())
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@ -5,7 +5,7 @@
 #if USE_AWS_S3

 #include <base/types.h>
-#include <IO/BufferWithOwnMemory.h>
+#include <IO/WriteBufferFromFileBase.h>
 #include <IO/WriteBuffer.h>
 #include <IO/WriteSettings.h>
 #include <Storages/StorageS3Settings.h>
@ -24,13 +24,14 @@ namespace DB
 * Data is divided on chunks with size greater than 'minimum_upload_part_size'. Last chunk can be less than this threshold.
 * Each chunk is written as a part to S3.
 */
-class WriteBufferFromS3 final : public BufferWithOwnMemory<WriteBuffer>
+class WriteBufferFromS3 final : public WriteBufferFromFileBase
 {
 public:
    WriteBufferFromS3(
        std::shared_ptr<const S3::Client> client_ptr_,
        const String & bucket_,
        const String & key_,
+        size_t buf_size_,
        const S3Settings::RequestSettings & request_settings_,
        std::optional<std::map<String, String>> object_metadata_ = std::nullopt,
        ThreadPoolCallbackRunner<void> schedule_ = {},
@ -39,8 +40,9 @@ public:
    ~WriteBufferFromS3() override;
    void nextImpl() override;
    void preFinalize() override;
+    std::string getFileName() const override { return key; }
+    void sync() override { next(); }

-public:
    class IBufferAllocationPolicy
    {
    public:
--- a/src/IO/tests/gtest_writebuffer_s3.cpp
+++ b/src/IO/tests/gtest_writebuffer_s3.cpp
@ -529,6 +529,7 @@ public:
                    client,
                    bucket,
                    file_name,
+                    DBMS_DEFAULT_BUFFER_SIZE,
                    request_settings,
                    std::nullopt,
                    getAsyncPolicy().getScheduler());
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@ -1216,11 +1216,22 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
            else if (data.is_create_parameterized_view && query_parameter)
            {
                const auto data_type = DataTypeFactory::instance().get(query_parameter->type);
-                ColumnWithTypeAndName column(data_type,query_parameter->getColumnName());
+                /// Use getUniqueName() to allow multiple use of query parameter in the query:
+                ///
+                ///     CREATE VIEW view AS
+                ///     SELECT *
+                ///     FROM system.one
+                ///     WHERE dummy = {k1:Int}+1 OR dummy = {k1:Int}+2
+                ///                    ^^                    ^^
+                ///
+                /// NOTE: query in the VIEW will not be modified this is needed
+                /// only during analysis for CREATE VIEW to avoid duplicated
+                /// column names.
+                ColumnWithTypeAndName column(data_type, data.getUniqueName("__" + query_parameter->getColumnName()));
                data.addColumn(column);

                argument_types.push_back(data_type);
-                argument_names.push_back(query_parameter->name);
+                argument_names.push_back(column.name);
            }
            else
            {
--- a/src/Interpreters/Cache/FileCache.cpp
+++ b/src/Interpreters/Cache/FileCache.cpp
@ -528,7 +528,7 @@ KeyMetadata::iterator FileCache::addFileSegment(
    }
 }

-bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
+bool FileCache::tryReserve(FileSegment & file_segment, const size_t size)
 {
    assertInitialized();
    auto cache_lock = cache_guard.lock();
@ -563,40 +563,34 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
            file_segment.key(), file_segment.offset());
    }

-    size_t queue_size = main_priority->getElementsCount(cache_lock);
-    chassert(queue_size <= main_priority->getElementsLimit());
-
    /// A file_segment_metadata acquires a LRUQueue iterator on first successful space reservation attempt.
    auto queue_iterator = file_segment.getQueueIterator();
-    if (queue_iterator)
-        chassert(file_segment.getReservedSize() > 0);
-    else
-        queue_size += 1;
+    chassert(!queue_iterator || file_segment.getReservedSize() > 0);

-    class EvictionCandidates final : public std::vector<FileSegmentMetadataPtr>
+    struct EvictionCandidates
    {
-    public:
        explicit EvictionCandidates(KeyMetadataPtr key_metadata_) : key_metadata(key_metadata_) {}

-        KeyMetadata & getMetadata() { return *key_metadata; }
-
        void add(FileSegmentMetadataPtr candidate)
        {
            candidate->removal_candidate = true;
-            push_back(candidate);
+            candidates.push_back(candidate);
        }

        ~EvictionCandidates()
        {
-            for (const auto & candidate : *this)
+            /// If failed to reserve space, we don't delete the candidates but drop the flag instead
+            /// so the segments can be used again
+            for (const auto & candidate : candidates)
                candidate->removal_candidate = false;
        }

-    private:
        KeyMetadataPtr key_metadata;
+        std::vector<FileSegmentMetadataPtr> candidates;
    };

    std::unordered_map<Key, EvictionCandidates> to_delete;
+    size_t freeable_space = 0, freeable_count = 0;

    size_t removed_size = 0;
    auto iterate_func = [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
@ -608,17 +602,19 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)

        if (releasable)
        {
-            removed_size += segment_metadata->size();
-            --queue_size;
-
            auto segment = segment_metadata->file_segment;
            if (segment->state() == FileSegment::State::DOWNLOADED)
            {
                const auto & key = segment->key();
+
                auto it = to_delete.find(key);
                if (it == to_delete.end())
                    it = to_delete.emplace(key, locked_key.getKeyMetadata()).first;
                it->second.add(segment_metadata);
+
+                freeable_space += segment_metadata->size();
+                freeable_count += 1;
+
                return PriorityIterationResult::CONTINUE;
            }

@ -633,17 +629,20 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
    {
        auto is_query_priority_overflow = [&]
        {
-            const size_t new_size = query_priority->getSize(cache_lock) + size - removed_size;
+            const size_t new_size = query_priority->getSize(cache_lock) + size - freeable_space;
            return new_size > query_priority->getSizeLimit();
        };

-        query_priority->iterate(
-            [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
-            { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
-            cache_lock);
-
        if (is_query_priority_overflow())
-            return false;
+        {
+            query_priority->iterate(
+                [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
+                { return is_query_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
+                cache_lock);
+
+            if (is_query_priority_overflow())
+                return false;
+        }

        LOG_TEST(
            log, "Query limits satisfied (while reserving for {}:{})",
@ -653,10 +652,11 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
    auto is_main_priority_overflow = [&]
    {
        /// max_size == 0 means unlimited cache size,
-        /// max_element_size means unlimited number of cache elements.
+        /// max_element_size == 0 means unlimited number of cache elements.
        const bool is_overflow = (main_priority->getSizeLimit() != 0
-                                  && main_priority->getSize(cache_lock) + size - removed_size > main_priority->getSizeLimit())
-            || (main_priority->getElementsLimit() != 0 && queue_size > main_priority->getElementsLimit());
+                                  && (main_priority->getSize(cache_lock) + size - freeable_space > main_priority->getSizeLimit()))
+            || (main_priority->getElementsLimit() != 0
+                && freeable_count == 0 && main_priority->getElementsCount(cache_lock) == main_priority->getElementsLimit());

        LOG_TEST(
            log, "Overflow: {}, size: {}, ready to remove: {}, current cache size: {}/{}, elements: {}/{}, while reserving for {}:{}",
@ -668,35 +668,41 @@ bool FileCache::tryReserve(FileSegment & file_segment, size_t size)
        return is_overflow;
    };

-    main_priority->iterate(
-        [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
-        { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
-        cache_lock);
-
    if (is_main_priority_overflow())
-        return false;
+    {
+        main_priority->iterate(
+            [&](LockedKey & locked_key, FileSegmentMetadataPtr segment_metadata)
+            { return is_main_priority_overflow() ? iterate_func(locked_key, segment_metadata) : PriorityIterationResult::BREAK; },
+            cache_lock);
+
+        if (is_main_priority_overflow())
+            return false;
+    }

    if (!file_segment.getKeyMetadata()->createBaseDirectory())
        return false;

    for (auto & [current_key, deletion_info] : to_delete)
    {
-        auto locked_key = deletion_info.getMetadata().tryLock();
+        auto locked_key = deletion_info.key_metadata->tryLock();
        if (!locked_key)
            continue; /// key could become invalid after we released the key lock above, just skip it.

-        for (auto it = deletion_info.begin(); it != deletion_info.end();)
+        /// delete from vector in reverse order just for efficiency
+        auto & candidates = deletion_info.candidates;
+        while (!candidates.empty())
        {
-            chassert((*it)->releasable());
+            auto & candidate = candidates.back();
+            chassert(candidate->releasable());

-            auto segment = (*it)->file_segment;
+            const auto * segment = candidate->file_segment.get();
            locked_key->removeFileSegment(segment->offset(), segment->lock());
            segment->getQueueIterator()->remove(cache_lock);

            if (query_context)
                query_context->remove(current_key, segment->offset(), cache_lock);

-            it = deletion_info.erase(it);
+            candidates.pop_back();
        }
    }

--- a/src/Interpreters/Cache/Metadata.h
+++ b/src/Interpreters/Cache/Metadata.h
@ -24,7 +24,7 @@ struct FileSegmentMetadata : private boost::noncopyable

    bool valid() const { return !removal_candidate.load(); }

-    Priority::Iterator getQueueIterator() { return file_segment->getQueueIterator(); }
+    Priority::Iterator getQueueIterator() const { return file_segment->getQueueIterator(); }

    FileSegmentPtr file_segment;
    std::atomic<bool> removal_candidate{false};
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@ -487,8 +487,8 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config,
                    throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", replica_key);
            }

-            addShard(settings, std::move(replica_addresses), false, current_shard_num,
-                     std::move(insert_paths), /* treat_local_as_remote */ weight, internal_replication);
+            addShard(settings, std::move(replica_addresses), /* treat_local_as_remote = */ false, current_shard_num,
+                     std::move(insert_paths), weight, internal_replication);
        }
        else
            throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key);
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1623,6 +1623,20 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
    return res;
 }

+StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const TableFunctionPtr & table_function_ptr)
+{
+    auto hash = table_expression->getTreeHash();
+    String key = toString(hash.first) + '_' + toString(hash.second);
+    StoragePtr & res = table_function_results[key];
+
+    if (!res)
+    {
+        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+    }
+
+    return res;
+}
+

 void Context::addViewSource(const StoragePtr & storage)
 {
--- a/Show More
+++ b/Show More